6599 kstat queues should assert on both platforms
[unleashed.git] / usr / src / uts / sun4u / cpu / common_asm.s
blobaee74db44219cf4165a70c8e7805f10b086a838c
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
25 #if !defined(lint)
26 #include "assym.h"
27 #endif /* !lint */
30 * General assembly language routines.
31 * It is the intent of this file to contain routines that are
32 * specific to cpu architecture.
36 * WARNING: If you add a fast trap handler which can be invoked by a
37 * non-privileged user, you may have to use the FAST_TRAP_DONE macro
38 * instead of "done" instruction to return back to the user mode. See
39 * comments for the "fast_trap_done" entry point for more information.
41 #define FAST_TRAP_DONE \
42 ba,a fast_trap_done
45 * Override GET_NATIVE_TIME for the cpu module code. This is not
46 * guaranteed to be exactly one instruction, be careful of using
47 * the macro in delay slots.
49 * Do not use any instruction that modifies condition codes as the
50 * caller may depend on these to remain unchanged across the macro.
52 #if defined(CHEETAH) || defined(OLYMPUS_C)
54 #define GET_NATIVE_TIME(out, scr1, scr2) \
55 rd STICK, out
56 #define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
57 rd STICK, reg; \
58 add reg, delta, reg; \
59 wr reg, STICK
60 #define RD_TICKCMPR(out, scr) \
61 rd STICK_COMPARE, out
62 #define WR_TICKCMPR(in, scr1, scr2, label) \
63 wr in, STICK_COMPARE
65 #elif defined(HUMMINGBIRD)
66 #include <sys/spitregs.h>
69 * the current hummingbird version of %stick and %stick_cmp
70 * were both implemented as (2) 32-bit locations in ASI_IO space;
71 * the hdwr should support atomic r/w; meanwhile: ugly alert! ...
73 * 64-bit opcodes are required, but move only 32-bits:
75 * ldxa [phys]ASI_IO, %dst reads the low 32-bits from phys into %dst
76 * stxa %src, [phys]ASI_IO writes the low 32-bits from %src into phys
78 * reg equivalent [phys]ASI_IO
79 * ------------------ ---------------
80 * %stick_cmp low-32 0x1FE.0000.F060
81 * %stick_cmp high-32 0x1FE.0000.F068
82 * %stick low-32 0x1FE.0000.F070
83 * %stick high-32 0x1FE.0000.F078
85 #define HSTC_LOW 0x60 /* stick_cmp low 32-bits */
86 #define HSTC_HIGH 0x68 /* stick_cmp high 32-bits */
87 #define HST_LOW 0x70 /* stick low 32-bits */
88 #define HST_HIGH 0x78 /* stick high 32-bits */
89 #define HST_DIFF 0x08 /* low<-->high diff */
92 * Any change in the number of instructions in SETL41()
93 * will affect SETL41_OFF
95 #define SETL41(reg, byte) \
96 sethi %hi(0x1FE00000), reg; /* 0000.0000.1FE0.0000 */ \
97 or reg, 0xF, reg; /* 0000.0000.1FE0.000F */ \
98 sllx reg, 12, reg; /* 0000.01FE.0000.F000 */ \
99 or reg, byte, reg; /* 0000.01FE.0000.F0xx */
102 * SETL41_OFF is used to calulate the relative PC value when a
103 * branch instruction needs to go over SETL41() macro
105 #define SETL41_OFF 16
108 * reading stick requires 2 loads, and there could be an intervening
109 * low-to-high 32-bit rollover resulting in a return value that is
110 * off by about (2 ^ 32); this rare case is prevented by re-reading
111 * the low-32 bits after the high-32 and verifying the "after" value
112 * is >= the "before" value; if not, increment the high-32 value.
114 * this method is limited to 1 rollover, and based on the fixed
115 * stick-frequency (5555555), requires the loads to complete within
116 * 773 seconds; incrementing the high-32 value will not overflow for
117 * about 52644 years.
119 * writing stick requires 2 stores; if the old/new low-32 value is
120 * near 0xffffffff, there could be another rollover (also rare).
121 * to prevent this, we first write a 0 to the low-32, then write
122 * new values to the high-32 then the low-32.
124 * When we detect a carry in the lower %stick register, we need to
125 * read HST_HIGH again. However at the point where we detect this,
126 * we need to rebuild the register address HST_HIGH.This involves more
127 * than one instructions and a branch is unavoidable. However, most of
128 * the time, there is no carry. So we take the penalty of a branch
129 * instruction only when there is carry (less frequent).
131 * For GET_NATIVE_TIME(), we start afresh and branch to SETL41().
132 * For DELTA_NATIVE_TIME(), we branch to just after SETL41() since
133 * addr already points to HST_LOW.
135 * NOTE: this method requires disabling interrupts before using
136 * DELTA_NATIVE_TIME.
138 #define GET_NATIVE_TIME(out, scr, tmp) \
139 SETL41(scr, HST_LOW); \
140 ldxa [scr]ASI_IO, tmp; \
141 inc HST_DIFF, scr; \
142 ldxa [scr]ASI_IO, out; \
143 dec HST_DIFF, scr; \
144 ldxa [scr]ASI_IO, scr; \
145 sub scr, tmp, tmp; \
146 brlz,pn tmp, .-(SETL41_OFF+24); \
147 sllx out, 32, out; \
148 or out, scr, out
149 #define DELTA_NATIVE_TIME(delta, addr, high, low, tmp) \
150 SETL41(addr, HST_LOW); \
151 ldxa [addr]ASI_IO, tmp; \
152 inc HST_DIFF, addr; \
153 ldxa [addr]ASI_IO, high; \
154 dec HST_DIFF, addr; \
155 ldxa [addr]ASI_IO, low; \
156 sub low, tmp, tmp; \
157 brlz,pn tmp, .-24; \
158 sllx high, 32, high; \
159 or high, low, high; \
160 add high, delta, high; \
161 srl high, 0, low; \
162 srlx high, 32, high; \
163 stxa %g0, [addr]ASI_IO; \
164 inc HST_DIFF, addr; \
165 stxa high, [addr]ASI_IO; \
166 dec HST_DIFF, addr; \
167 stxa low, [addr]ASI_IO
168 #define RD_TICKCMPR(out, scr) \
169 SETL41(scr, HSTC_LOW); \
170 ldxa [scr]ASI_IO, out; \
171 inc HST_DIFF, scr; \
172 ldxa [scr]ASI_IO, scr; \
173 sllx scr, 32, scr; \
174 or scr, out, out
175 #define WR_TICKCMPR(in, scra, scrd, label) \
176 SETL41(scra, HSTC_HIGH); \
177 srlx in, 32, scrd; \
178 stxa scrd, [scra]ASI_IO; \
179 dec HST_DIFF, scra; \
180 stxa in, [scra]ASI_IO
182 #else /* !CHEETAH && !HUMMINGBIRD */
184 #define GET_NATIVE_TIME(out, scr1, scr2) \
185 rdpr %tick, out
186 #define DELTA_NATIVE_TIME(delta, reg, scr1, scr2, scr3) \
187 rdpr %tick, reg; \
188 add reg, delta, reg; \
189 wrpr reg, %tick
190 #define RD_TICKCMPR(out, scr) \
191 rd TICK_COMPARE, out
192 #ifdef BB_ERRATA_1 /* writes to TICK_COMPARE may fail */
194 * Writes to the TICK_COMPARE register sometimes fail on blackbird modules.
195 * The failure occurs only when the following instruction decodes to wr or
196 * wrpr. The workaround is to immediately follow writes to TICK_COMPARE
197 * with a read, thus stalling the pipe and keeping following instructions
198 * from causing data corruption. Aligning to a quadword will ensure these
199 * two instructions are not split due to i$ misses.
201 #define WR_TICKCMPR(cmpr,scr1,scr2,label) \
202 ba,a .bb_errata_1.label ;\
203 .align 64 ;\
204 .bb_errata_1.label: ;\
205 wr cmpr, TICK_COMPARE ;\
206 rd TICK_COMPARE, %g0
207 #else /* BB_ERRATA_1 */
208 #define WR_TICKCMPR(in,scr1,scr2,label) \
209 wr in, TICK_COMPARE
210 #endif /* BB_ERRATA_1 */
212 #endif /* !CHEETAH && !HUMMINGBIRD */
214 #include <sys/clock.h>
216 #if defined(lint)
217 #include <sys/types.h>
218 #include <sys/scb.h>
219 #include <sys/systm.h>
220 #include <sys/regset.h>
221 #include <sys/sunddi.h>
222 #include <sys/lockstat.h>
223 #endif /* lint */
226 #include <sys/asm_linkage.h>
227 #include <sys/privregs.h>
228 #include <sys/machparam.h> /* To get SYSBASE and PAGESIZE */
229 #include <sys/machthread.h>
230 #include <sys/clock.h>
231 #include <sys/intreg.h>
232 #include <sys/psr_compat.h>
233 #include <sys/isa_defs.h>
234 #include <sys/dditypes.h>
235 #include <sys/intr.h>
237 #if !defined(lint)
238 #include "assym.h"
239 #endif /* !lint */
241 #if defined(lint)
243 uint_t
244 get_impl(void)
245 { return (0); }
247 #else /* lint */
249 ENTRY(get_impl)
250 GET_CPU_IMPL(%o0)
251 retl
253 SET_SIZE(get_impl)
255 #endif /* lint */
257 #if defined(lint)
259 * Softint generated when counter field of tick reg matches value field
260 * of tick_cmpr reg
262 /*ARGSUSED*/
263 void
264 tickcmpr_set(uint64_t clock_cycles)
267 #else /* lint */
269 ENTRY_NP(tickcmpr_set)
270 ! get 64-bit clock_cycles interval
271 mov %o0, %o2
272 mov 8, %o3 ! A reasonable initial step size
274 WR_TICKCMPR(%o2,%o4,%o5,__LINE__) ! Write to TICK_CMPR
276 GET_NATIVE_TIME(%o0, %o4, %o5) ! Read %tick to confirm the
277 sllx %o0, 1, %o0 ! value we wrote was in the future.
278 srlx %o0, 1, %o0
280 cmp %o2, %o0 ! If the value we wrote was in the
281 bg,pt %xcc, 2f ! future, then blow out of here.
282 sllx %o3, 1, %o3 ! If not, then double our step size,
283 ba,pt %xcc, 1b ! and take another lap.
284 add %o0, %o3, %o2 !
286 retl
288 SET_SIZE(tickcmpr_set)
290 #endif /* lint */
292 #if defined(lint)
294 void
295 tickcmpr_disable(void)
298 #else /* lint */
300 ENTRY_NP(tickcmpr_disable)
301 mov 1, %g1
302 sllx %g1, TICKINT_DIS_SHFT, %o0
303 WR_TICKCMPR(%o0,%o4,%o5,__LINE__) ! Write to TICK_CMPR
304 retl
306 SET_SIZE(tickcmpr_disable)
308 #endif /* lint */
310 #if defined(lint)
313 * tick_write_delta() increments %tick by the specified delta. This should
314 * only be called after a CPR event to assure that gethrtime() continues to
315 * increase monotonically. Obviously, writing %tick needs to de done very
316 * carefully to avoid introducing unnecessary %tick skew across CPUs. For
317 * this reason, we make sure we're i-cache hot before actually writing to
318 * %tick.
320 /*ARGSUSED*/
321 void
322 tick_write_delta(uint64_t delta)
325 #else /* lint */
327 #ifdef DEBUG
328 .seg ".text"
329 tick_write_panic:
330 .asciz "tick_write_delta: interrupts already disabled on entry"
331 #endif /* DEBUG */
333 ENTRY_NP(tick_write_delta)
334 rdpr %pstate, %g1
335 #ifdef DEBUG
336 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts
337 bnz 0f ! aren't already disabled.
338 sethi %hi(tick_write_panic), %o1
339 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
340 call panic
341 or %i1, %lo(tick_write_panic), %o0
342 #endif /* DEBUG */
343 0: wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
344 mov %o0, %o2
345 ba 0f ! Branch to cache line-aligned instr.
347 .align 16
348 0: nop ! The next 3 instructions are now hot.
349 DELTA_NATIVE_TIME(%o2, %o3, %o4, %o5, %g2) ! read/inc/write %tick
351 retl ! Return
352 wrpr %g0, %g1, %pstate ! delay: Re-enable interrupts
353 #endif /* lint */
355 #if defined(lint)
357 * return 1 if disabled
361 tickcmpr_disabled(void)
362 { return (0); }
364 #else /* lint */
366 ENTRY_NP(tickcmpr_disabled)
367 RD_TICKCMPR(%g1, %o0)
368 retl
369 srlx %g1, TICKINT_DIS_SHFT, %o0
370 SET_SIZE(tickcmpr_disabled)
372 #endif /* lint */
375 * Get current tick
377 #if defined(lint)
379 u_longlong_t
380 gettick(void)
381 { return (0); }
383 u_longlong_t
384 randtick(void)
385 { return (0); }
387 #else /* lint */
389 ENTRY(gettick)
390 ALTENTRY(randtick)
391 GET_NATIVE_TIME(%o0, %o2, %o3)
392 retl
394 SET_SIZE(randtick)
395 SET_SIZE(gettick)
397 #endif /* lint */
401 * Return the counter portion of the tick register.
404 #if defined(lint)
406 uint64_t
407 gettick_counter(void)
408 { return(0); }
410 #else /* lint */
412 ENTRY_NP(gettick_counter)
413 rdpr %tick, %o0
414 sllx %o0, 1, %o0
415 retl
416 srlx %o0, 1, %o0 ! shake off npt bit
417 SET_SIZE(gettick_counter)
418 #endif /* lint */
421 * Provide a C callable interface to the trap that reads the hi-res timer.
422 * Returns 64-bit nanosecond timestamp in %o0 and %o1.
425 #if defined(lint)
427 hrtime_t
428 gethrtime(void)
430 return ((hrtime_t)0);
433 hrtime_t
434 gethrtime_unscaled(void)
436 return ((hrtime_t)0);
439 hrtime_t
440 gethrtime_max(void)
442 return ((hrtime_t)0);
445 void
446 scalehrtime(hrtime_t *hrt)
448 *hrt = 0;
451 void
452 gethrestime(timespec_t *tp)
454 tp->tv_sec = 0;
455 tp->tv_nsec = 0;
458 time_t
459 gethrestime_sec(void)
461 return (0);
464 void
465 gethrestime_lasttick(timespec_t *tp)
467 tp->tv_sec = 0;
468 tp->tv_nsec = 0;
471 /*ARGSUSED*/
472 void
473 hres_tick(void)
477 void
478 panic_hres_tick(void)
482 #else /* lint */
484 ENTRY_NP(gethrtime)
485 GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2)
486 ! %g1 = hrtime
487 retl
488 mov %g1, %o0
489 SET_SIZE(gethrtime)
491 ENTRY_NP(gethrtime_unscaled)
492 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
493 retl
494 mov %g1, %o0
495 SET_SIZE(gethrtime_unscaled)
497 ENTRY_NP(gethrtime_waitfree)
498 ALTENTRY(dtrace_gethrtime)
499 GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time
500 NATIVE_TIME_TO_NSEC(%g1, %o2, %o3)
501 retl
502 mov %g1, %o0
503 SET_SIZE(dtrace_gethrtime)
504 SET_SIZE(gethrtime_waitfree)
506 ENTRY(gethrtime_max)
507 NATIVE_TIME_MAX(%g1)
508 NATIVE_TIME_TO_NSEC(%g1, %o0, %o1)
510 ! hrtime_t's are signed, max hrtime_t must be positive
511 mov -1, %o2
512 brlz,a %g1, 1f
513 srlx %o2, 1, %g1
515 retl
516 mov %g1, %o0
517 SET_SIZE(gethrtime_max)
519 ENTRY(scalehrtime)
520 ldx [%o0], %o1
521 NATIVE_TIME_TO_NSEC(%o1, %o2, %o3)
522 retl
523 stx %o1, [%o0]
524 SET_SIZE(scalehrtime)
527 * Fast trap to return a timestamp, uses trap window, leaves traps
528 * disabled. Returns a 64-bit nanosecond timestamp in %o0 and %o1.
530 * This is the handler for the ST_GETHRTIME trap.
533 ENTRY_NP(get_timestamp)
534 GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2) ! %g1 = hrtime
535 srlx %g1, 32, %o0 ! %o0 = hi32(%g1)
536 srl %g1, 0, %o1 ! %o1 = lo32(%g1)
537 FAST_TRAP_DONE
538 SET_SIZE(get_timestamp)
541 * Macro to convert GET_HRESTIME() bits into a timestamp.
543 * We use two separate macros so that the platform-dependent GET_HRESTIME()
544 * can be as small as possible; CONV_HRESTIME() implements the generic part.
546 #define CONV_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano) \
547 brz,pt adj, 3f; /* no adjustments, it's easy */ \
548 add hrestnsec, nslt, hrestnsec; /* hrest.tv_nsec += nslt */ \
549 brlz,pn adj, 2f; /* if hrestime_adj negative */ \
550 srlx nslt, ADJ_SHIFT, nslt; /* delay: nslt >>= 4 */ \
551 subcc adj, nslt, %g0; /* hrestime_adj - nslt/16 */ \
552 movg %xcc, nslt, adj; /* adj by min(adj, nslt/16) */ \
553 ba 3f; /* go convert to sec/nsec */ \
554 add hrestnsec, adj, hrestnsec; /* delay: apply adjustment */ \
555 2: addcc adj, nslt, %g0; /* hrestime_adj + nslt/16 */ \
556 bge,a,pt %xcc, 3f; /* is adj less negative? */ \
557 add hrestnsec, adj, hrestnsec; /* yes: hrest.nsec += adj */ \
558 sub hrestnsec, nslt, hrestnsec; /* no: hrest.nsec -= nslt/16 */ \
559 3: cmp hrestnsec, nano; /* more than a billion? */ \
560 bl,pt %xcc, 4f; /* if not, we're done */ \
561 nop; /* delay: do nothing :( */ \
562 add hrestsec, 1, hrestsec; /* hrest.tv_sec++; */ \
563 sub hrestnsec, nano, hrestnsec; /* hrest.tv_nsec -= NANOSEC; */ \
564 ba,a 3b; /* check >= billion again */ \
567 ENTRY_NP(gethrestime)
568 GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
569 CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5)
570 stn %o1, [%o0]
571 retl
572 stn %o2, [%o0 + CLONGSIZE]
573 SET_SIZE(gethrestime)
576 * Similar to gethrestime(), but gethrestime_sec() returns current hrestime
577 * seconds.
579 ENTRY_NP(gethrestime_sec)
580 GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4)
581 CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5)
582 retl ! %o0 current hrestime seconds
584 SET_SIZE(gethrestime_sec)
587 * Returns the hrestime on the last tick. This is simpler than gethrestime()
588 * and gethrestime_sec(): no conversion is required. gethrestime_lasttick()
589 * follows the same locking algorithm as GET_HRESTIME and GET_HRTIME,
590 * outlined in detail in clock.h. (Unlike GET_HRESTIME/GET_HRTIME, we don't
591 * rely on load dependencies to effect the membar #LoadLoad, instead declaring
592 * it explicitly.)
594 ENTRY_NP(gethrestime_lasttick)
595 sethi %hi(hres_lock), %o1
597 lduw [%o1 + %lo(hres_lock)], %o2 ! Load lock value
598 membar #LoadLoad ! Load of lock must complete
599 andn %o2, 1, %o2 ! Mask off lowest bit
600 ldn [%o1 + %lo(hrestime)], %g1 ! Seconds.
601 add %o1, %lo(hrestime), %o4
602 ldn [%o4 + CLONGSIZE], %g2 ! Nanoseconds.
603 membar #LoadLoad ! All loads must complete
604 lduw [%o1 + %lo(hres_lock)], %o3 ! Reload lock value
605 cmp %o3, %o2 ! If lock is locked or has
606 bne 0b ! changed, retry.
607 stn %g1, [%o0] ! Delay: store seconds
608 retl
609 stn %g2, [%o0 + CLONGSIZE] ! Delay: store nanoseconds
610 SET_SIZE(gethrestime_lasttick)
613 * Fast trap for gettimeofday(). Returns a timestruc_t in %o0 and %o1.
615 * This is the handler for the ST_GETHRESTIME trap.
618 ENTRY_NP(get_hrestime)
619 GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3)
620 CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3)
621 FAST_TRAP_DONE
622 SET_SIZE(get_hrestime)
625 * Fast trap to return lwp virtual time, uses trap window, leaves traps
626 * disabled. Returns a 64-bit number in %o0:%o1, which is the number
627 * of nanoseconds consumed.
629 * This is the handler for the ST_GETHRVTIME trap.
631 * Register usage:
632 * %o0, %o1 = return lwp virtual time
633 * %o2 = CPU/thread
634 * %o3 = lwp
635 * %g1 = scratch
636 * %g5 = scratch
638 ENTRY_NP(get_virtime)
639 GET_NATIVE_TIME(%g5, %g1, %g2) ! %g5 = native time in ticks
640 CPU_ADDR(%g2, %g3) ! CPU struct ptr to %g2
641 ldn [%g2 + CPU_THREAD], %g2 ! thread pointer to %g2
642 ldn [%g2 + T_LWP], %g3 ! lwp pointer to %g3
645 * Subtract start time of current microstate from time
646 * of day to get increment for lwp virtual time.
648 ldx [%g3 + LWP_STATE_START], %g1 ! ms_state_start
649 sub %g5, %g1, %g5
652 * Add current value of ms_acct[LMS_USER]
654 ldx [%g3 + LWP_ACCT_USER], %g1 ! ms_acct[LMS_USER]
655 add %g5, %g1, %g5
656 NATIVE_TIME_TO_NSEC(%g5, %g1, %o0)
658 srl %g5, 0, %o1 ! %o1 = lo32(%g5)
659 srlx %g5, 32, %o0 ! %o0 = hi32(%g5)
661 FAST_TRAP_DONE
662 SET_SIZE(get_virtime)
666 .seg ".text"
667 hrtime_base_panic:
668 .asciz "hrtime_base stepping back"
671 ENTRY_NP(hres_tick)
672 save %sp, -SA(MINFRAME), %sp ! get a new window
674 sethi %hi(hrestime), %l4
675 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5 ! try locking
676 7: tst %l5
677 bz,pt %xcc, 8f ! if we got it, drive on
678 ld [%l4 + %lo(nsec_scale)], %l5 ! delay: %l5 = scaling factor
679 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
680 9: tst %l5
681 bz,a,pn %xcc, 7b
682 ldstub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
683 ba,pt %xcc, 9b
684 ldub [%l4 + %lo(hres_lock + HRES_LOCK_OFFSET)], %l5
686 membar #StoreLoad|#StoreStore
689 ! update hres_last_tick. %l5 has the scaling factor (nsec_scale).
691 ldx [%l4 + %lo(hrtime_base)], %g1 ! load current hrtime_base
692 GET_NATIVE_TIME(%l0, %l3, %l6) ! current native time
693 stx %l0, [%l4 + %lo(hres_last_tick)]! prev = current
694 ! convert native time to nsecs
695 NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT)
697 sub %l0, %g1, %i1 ! get accurate nsec delta
699 ldx [%l4 + %lo(hrtime_base)], %l1
700 cmp %l1, %l0
701 bg,pn %xcc, 9f
704 stx %l0, [%l4 + %lo(hrtime_base)] ! update hrtime_base
707 ! apply adjustment, if any
709 ldx [%l4 + %lo(hrestime_adj)], %l0 ! %l0 = hrestime_adj
710 brz %l0, 2f
711 ! hrestime_adj == 0 ?
712 ! yes, skip adjustments
713 clr %l5 ! delay: set adj to zero
714 tst %l0 ! is hrestime_adj >= 0 ?
715 bge,pt %xcc, 1f ! yes, go handle positive case
716 srl %i1, ADJ_SHIFT, %l5 ! delay: %l5 = adj
718 addcc %l0, %l5, %g0 ! hrestime_adj < -adj ?
719 bl,pt %xcc, 2f ! yes, use current adj
720 neg %l5 ! delay: %l5 = -adj
721 ba,pt %xcc, 2f
722 mov %l0, %l5 ! no, so set adj = hrestime_adj
724 subcc %l0, %l5, %g0 ! hrestime_adj < adj ?
725 bl,a,pt %xcc, 2f ! yes, set adj = hrestime_adj
726 mov %l0, %l5 ! delay: adj = hrestime_adj
728 ldx [%l4 + %lo(timedelta)], %l0 ! %l0 = timedelta
729 sub %l0, %l5, %l0 ! timedelta -= adj
731 stx %l0, [%l4 + %lo(timedelta)] ! store new timedelta
732 stx %l0, [%l4 + %lo(hrestime_adj)] ! hrestime_adj = timedelta
734 or %l4, %lo(hrestime), %l2
735 ldn [%l2], %i2 ! %i2:%i3 = hrestime sec:nsec
736 ldn [%l2 + CLONGSIZE], %i3
737 add %i3, %l5, %i3 ! hrestime.nsec += adj
738 add %i3, %i1, %i3 ! hrestime.nsec += nslt
740 set NANOSEC, %l5 ! %l5 = NANOSEC
741 cmp %i3, %l5
742 bl,pt %xcc, 5f ! if hrestime.tv_nsec < NANOSEC
743 sethi %hi(one_sec), %i1 ! delay
744 add %i2, 0x1, %i2 ! hrestime.tv_sec++
745 sub %i3, %l5, %i3 ! hrestime.tv_nsec - NANOSEC
746 mov 0x1, %l5
747 st %l5, [%i1 + %lo(one_sec)]
749 stn %i2, [%l2]
750 stn %i3, [%l2 + CLONGSIZE] ! store the new hrestime
752 membar #StoreStore
754 ld [%l4 + %lo(hres_lock)], %i1
755 inc %i1 ! release lock
756 st %i1, [%l4 + %lo(hres_lock)] ! clear hres_lock
759 restore
763 ! release hres_lock
765 ld [%l4 + %lo(hres_lock)], %i1
766 inc %i1
767 st %i1, [%l4 + %lo(hres_lock)]
769 sethi %hi(hrtime_base_panic), %o0
770 call panic
771 or %o0, %lo(hrtime_base_panic), %o0
773 SET_SIZE(hres_tick)
775 #endif /* lint */
777 #if !defined(lint) && !defined(__lint)
779 .seg ".text"
780 kstat_q_panic_msg:
781 .asciz "kstat_q_exit: qlen == 0"
783 ENTRY(kstat_q_panic)
784 save %sp, -SA(MINFRAME), %sp
785 sethi %hi(kstat_q_panic_msg), %o0
786 call panic
787 or %o0, %lo(kstat_q_panic_msg), %o0
788 /*NOTREACHED*/
789 SET_SIZE(kstat_q_panic)
791 #define BRZPN brz,pn
792 #define BRZPT brz,pt
794 #define KSTAT_Q_UPDATE(QOP, QBR, QZERO, QRETURN, QTYPE) \
795 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \
796 QOP %o1, 1, %o2; /* %o2 = new qlen */ \
797 QBR %o1, QZERO; /* done if qlen == 0 */ \
798 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \
799 ldx [%o0 + QTYPE/**/LASTUPDATE], %o3; \
800 ldx [%o0 + QTYPE/**/TIME], %o4; /* %o4 = old time */ \
801 ldx [%o0 + QTYPE/**/LENTIME], %o5; /* %o5 = old lentime */ \
802 sub %g1, %o3, %o2; /* %o2 = time delta */ \
803 mulx %o1, %o2, %o3; /* %o3 = cur lentime */ \
804 add %o4, %o2, %o4; /* %o4 = new time */ \
805 add %o5, %o3, %o5; /* %o5 = new lentime */ \
806 stx %o4, [%o0 + QTYPE/**/TIME]; /* save time */ \
807 stx %o5, [%o0 + QTYPE/**/LENTIME]; /* save lentime */ \
808 QRETURN; \
809 stx %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
811 #if !defined(DEBUG)
813 * same as KSTAT_Q_UPDATE but without:
814 * QBR %o1, QZERO;
815 * to be used only with non-debug build. mimics ASSERT() behaviour.
817 #define KSTAT_Q_UPDATE_ND(QOP, QRETURN, QTYPE) \
818 ld [%o0 + QTYPE/**/CNT], %o1; /* %o1 = old qlen */ \
819 QOP %o1, 1, %o2; /* %o2 = new qlen */ \
820 st %o2, [%o0 + QTYPE/**/CNT]; /* delay: save qlen */ \
821 ldx [%o0 + QTYPE/**/LASTUPDATE], %o3; \
822 ldx [%o0 + QTYPE/**/TIME], %o4; /* %o4 = old time */ \
823 ldx [%o0 + QTYPE/**/LENTIME], %o5; /* %o5 = old lentime */ \
824 sub %g1, %o3, %o2; /* %o2 = time delta */ \
825 mulx %o1, %o2, %o3; /* %o3 = cur lentime */ \
826 add %o4, %o2, %o4; /* %o4 = new time */ \
827 add %o5, %o3, %o5; /* %o5 = new lentime */ \
828 stx %o4, [%o0 + QTYPE/**/TIME]; /* save time */ \
829 stx %o5, [%o0 + QTYPE/**/LENTIME]; /* save lentime */ \
830 QRETURN; \
831 stx %g1, [%o0 + QTYPE/**/LASTUPDATE]; /* lastupdate = now */
832 #endif
834 .align 16
835 ENTRY(kstat_waitq_enter)
836 GET_NATIVE_TIME(%g1, %g2, %g3)
837 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
838 SET_SIZE(kstat_waitq_enter)
840 .align 16
841 ENTRY(kstat_waitq_exit)
842 GET_NATIVE_TIME(%g1, %g2, %g3)
843 #if defined(DEBUG)
844 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W)
845 #else
846 KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_W)
847 #endif
848 SET_SIZE(kstat_waitq_exit)
850 .align 16
851 ENTRY(kstat_runq_enter)
852 GET_NATIVE_TIME(%g1, %g2, %g3)
853 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
854 SET_SIZE(kstat_runq_enter)
856 .align 16
857 ENTRY(kstat_runq_exit)
858 GET_NATIVE_TIME(%g1, %g2, %g3)
859 #if defined(DEBUG)
860 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R)
861 #else
862 KSTAT_Q_UPDATE_ND(sub, retl, KSTAT_IO_R)
863 #endif
864 SET_SIZE(kstat_runq_exit)
866 .align 16
867 ENTRY(kstat_waitq_to_runq)
868 GET_NATIVE_TIME(%g1, %g2, %g3)
869 #if defined(DEBUG)
870 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W)
871 #else
872 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_W)
873 #endif
874 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R)
875 SET_SIZE(kstat_waitq_to_runq)
877 .align 16
878 ENTRY(kstat_runq_back_to_waitq)
879 GET_NATIVE_TIME(%g1, %g2, %g3)
880 #if defined(DEBUG)
881 KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R)
882 #else
883 KSTAT_Q_UPDATE_ND(sub, 1:, KSTAT_IO_R)
884 #endif
885 KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W)
886 SET_SIZE(kstat_runq_back_to_waitq)
888 #endif /* !(lint || __lint) */
890 #ifdef lint
892 int64_t timedelta;
893 hrtime_t hres_last_tick;
894 volatile timestruc_t hrestime;
895 int64_t hrestime_adj;
896 volatile int hres_lock;
897 uint_t nsec_scale;
898 hrtime_t hrtime_base;
899 int traptrace_use_stick;
901 #else /* lint */
903 * -- WARNING --
905 * The following variables MUST be together on a 128-byte boundary.
906 * In addition to the primary performance motivation (having them all
907 * on the same cache line(s)), code here and in the GET*TIME() macros
908 * assumes that they all have the same high 22 address bits (so
909 * there's only one sethi).
911 .seg ".data"
912 .global timedelta, hres_last_tick, hrestime, hrestime_adj
913 .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick
914 .global nsec_shift, adj_shift
916 /* XXX - above comment claims 128-bytes is necessary */
917 .align 64
918 timedelta:
919 .word 0, 0 /* int64_t */
920 hres_last_tick:
921 .word 0, 0 /* hrtime_t */
922 hrestime:
923 .nword 0, 0 /* 2 longs */
924 hrestime_adj:
925 .word 0, 0 /* int64_t */
926 hres_lock:
927 .word 0
928 nsec_scale:
929 .word 0
930 hrtime_base:
931 .word 0, 0
932 traptrace_use_stick:
933 .word 0
934 nsec_shift:
935 .word NSEC_SHIFT
936 adj_shift:
937 .word ADJ_SHIFT
939 #endif /* lint */
943 * drv_usecwait(clock_t n) [DDI/DKI - section 9F]
944 * usec_delay(int n) [compatibility - should go one day]
945 * Delay by spinning.
947 * delay for n microseconds. numbers <= 0 delay 1 usec
949 * With UltraSPARC-III the combination of supporting mixed-speed CPUs
950 * and variable clock rate for power management requires that we
951 * use %stick to implement this routine.
953 * For OPL platforms that support the "sleep" instruction, we
954 * conditionally (ifdef'ed) insert a "sleep" instruction in
955 * the loop. Note that theoritically we should have move (duplicated)
956 * the code down to spitfire/us3/opl specific asm files - but this
957 * is alot of code duplication just to add one "sleep" instruction.
958 * We chose less code duplication for this.
961 #if defined(lint)
963 /*ARGSUSED*/
964 void
965 drv_usecwait(clock_t n)
968 /*ARGSUSED*/
969 void
970 usec_delay(int n)
973 #else /* lint */
975 ENTRY(drv_usecwait)
976 ALTENTRY(usec_delay)
977 brlez,a,pn %o0, 0f
978 mov 1, %o0
980 sethi %hi(sticks_per_usec), %o1
981 lduw [%o1 + %lo(sticks_per_usec)], %o1
982 mulx %o1, %o0, %o1 ! Scale usec to ticks
983 inc %o1 ! We don't start on a tick edge
984 GET_NATIVE_TIME(%o2, %o3, %o4)
985 add %o1, %o2, %o1
988 #ifdef _OPL
989 .word 0x81b01060 ! insert "sleep" instruction
990 #endif /* _OPL */ ! use byte code for now
991 cmp %o1, %o2
992 GET_NATIVE_TIME(%o2, %o3, %o4)
993 bgeu,pt %xcc, 1b
995 retl
997 SET_SIZE(usec_delay)
998 SET_SIZE(drv_usecwait)
999 #endif /* lint */
1001 #if defined(lint)
1003 /* ARGSUSED */
1004 void
1005 pil14_interrupt(int level)
1008 #else /* lint */
1011 * Level-14 interrupt prologue.
1013 ENTRY_NP(pil14_interrupt)
1014 CPU_ADDR(%g1, %g2)
1015 rdpr %pil, %g6 ! %g6 = interrupted PIL
1016 stn %g6, [%g1 + CPU_PROFILE_PIL] ! record interrupted PIL
1017 rdpr %tstate, %g6
1018 rdpr %tpc, %g5
1019 btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
1020 bnz,a,pt %xcc, 1f
1021 stn %g5, [%g1 + CPU_PROFILE_PC] ! if so, record kernel PC
1022 stn %g5, [%g1 + CPU_PROFILE_UPC] ! if not, record user PC
1023 ba pil_interrupt_common ! must be large-disp branch
1024 stn %g0, [%g1 + CPU_PROFILE_PC] ! zero kernel PC
1025 1: ba pil_interrupt_common ! must be large-disp branch
1026 stn %g0, [%g1 + CPU_PROFILE_UPC] ! zero user PC
1027 SET_SIZE(pil14_interrupt)
1029 ENTRY_NP(tick_rtt)
1031 ! Load TICK_COMPARE into %o5; if bit 63 is set, then TICK_COMPARE is
1032 ! disabled. If TICK_COMPARE is enabled, we know that we need to
1033 ! reenqueue the interrupt request structure. We'll then check TICKINT
1034 ! in SOFTINT; if it's set, then we know that we were in a TICK_COMPARE
1035 ! interrupt. In this case, TICK_COMPARE may have been rewritten
1036 ! recently; we'll compare %o5 to the current time to verify that it's
1037 ! in the future.
1039 ! Note that %o5 is live until after 1f.
1040 ! XXX - there is a subroutine call while %o5 is live!
1042 RD_TICKCMPR(%o5, %g1)
1043 srlx %o5, TICKINT_DIS_SHFT, %g1
1044 brnz,pt %g1, 2f
1047 rdpr %pstate, %g5
1048 andn %g5, PSTATE_IE, %g1
1049 wrpr %g0, %g1, %pstate ! Disable vec interrupts
1051 sethi %hi(cbe_level14_inum), %o1
1052 ldx [%o1 + %lo(cbe_level14_inum)], %o1
1053 call intr_enqueue_req ! preserves %o5 and %g5
1054 mov PIL_14, %o0
1056 ! Check SOFTINT for TICKINT/STICKINT
1057 rd SOFTINT, %o4
1058 set (TICK_INT_MASK | STICK_INT_MASK), %o0
1059 andcc %o4, %o0, %g0
1060 bz,a,pn %icc, 2f
1061 wrpr %g0, %g5, %pstate ! Enable vec interrupts
1063 ! clear TICKINT/STICKINT
1064 wr %o0, CLEAR_SOFTINT
1067 ! Now that we've cleared TICKINT, we can reread %tick and confirm
1068 ! that the value we programmed is still in the future. If it isn't,
1069 ! we need to reprogram TICK_COMPARE to fire as soon as possible.
1071 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick
1072 sllx %o0, 1, %o0 ! Clear the DIS bit
1073 srlx %o0, 1, %o0
1074 cmp %o5, %o0 ! In the future?
1075 bg,a,pt %xcc, 2f ! Yes, drive on.
1076 wrpr %g0, %g5, %pstate ! delay: enable vec intr
1079 ! If we're here, then we have programmed TICK_COMPARE with a %tick
1080 ! which is in the past; we'll now load an initial step size, and loop
1081 ! until we've managed to program TICK_COMPARE to fire in the future.
1083 mov 8, %o4 ! 8 = arbitrary inital step
1084 1: add %o0, %o4, %o5 ! Add the step
1085 WR_TICKCMPR(%o5,%g1,%g2,__LINE__) ! Write to TICK_CMPR
1086 GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick
1087 sllx %o0, 1, %o0 ! Clear the DIS bit
1088 srlx %o0, 1, %o0
1089 cmp %o5, %o0 ! In the future?
1090 bg,a,pt %xcc, 2f ! Yes, drive on.
1091 wrpr %g0, %g5, %pstate ! delay: enable vec intr
1092 ba 1b ! No, try again.
1093 sllx %o4, 1, %o4 ! delay: double step size
1095 2: ba current_thread_complete
1097 SET_SIZE(tick_rtt)
1099 #endif /* lint */
1101 #if defined(lint)
1103 /* ARGSUSED */
1104 void
1105 pil15_interrupt(int level)
1108 #else /* lint */
1111 * Level-15 interrupt prologue.
1113 ENTRY_NP(pil15_interrupt)
1114 CPU_ADDR(%g1, %g2)
1115 rdpr %tstate, %g6
1116 rdpr %tpc, %g5
1117 btst TSTATE_PRIV, %g6 ! trap from supervisor mode?
1118 bnz,a,pt %xcc, 1f
1119 stn %g5, [%g1 + CPU_CPCPROFILE_PC] ! if so, record kernel PC
1120 stn %g5, [%g1 + CPU_CPCPROFILE_UPC] ! if not, record user PC
1121 ba pil15_epilogue ! must be large-disp branch
1122 stn %g0, [%g1 + CPU_CPCPROFILE_PC] ! zero kernel PC
1123 1: ba pil15_epilogue ! must be large-disp branch
1124 stn %g0, [%g1 + CPU_CPCPROFILE_UPC] ! zero user PC
1125 SET_SIZE(pil15_interrupt)
1127 #endif /* lint */
1129 #if defined(lint) || defined(__lint)
1131 /* ARGSUSED */
1132 uint64_t
1133 find_cpufrequency(volatile uchar_t *clock_ptr)
1135 return (0);
1138 #else /* lint */
1140 #ifdef DEBUG
1141 .seg ".text"
1142 find_cpufreq_panic:
1143 .asciz "find_cpufrequency: interrupts already disabled on entry"
1144 #endif /* DEBUG */
1146 ENTRY_NP(find_cpufrequency)
1147 rdpr %pstate, %g1
1149 #ifdef DEBUG
1150 andcc %g1, PSTATE_IE, %g0 ! If DEBUG, check that interrupts
1151 bnz 0f ! are currently enabled
1152 sethi %hi(find_cpufreq_panic), %o1
1153 call panic
1154 or %o1, %lo(find_cpufreq_panic), %o0
1155 #endif /* DEBUG */
1158 wrpr %g1, PSTATE_IE, %pstate ! Disable interrupts
1160 ldub [%o0], %o1 ! Read the number of seconds
1161 mov %o1, %o2 ! remember initial value in %o2
1163 GET_NATIVE_TIME(%o3, %g4, %g5)
1164 cmp %o1, %o2 ! did the seconds register roll over?
1165 be,pt %icc, 1b ! branch back if unchanged
1166 ldub [%o0], %o2 ! delay: load the new seconds val
1168 brz,pn %o2, 3b ! if the minutes just rolled over,
1169 ! the last second could have been
1170 ! inaccurate; try again.
1171 mov %o2, %o4 ! delay: store init. val. in %o2
1173 GET_NATIVE_TIME(%o5, %g4, %g5)
1174 cmp %o2, %o4 ! did the seconds register roll over?
1175 be,pt %icc, 2b ! branch back if unchanged
1176 ldub [%o0], %o4 ! delay: load the new seconds val
1178 brz,pn %o4, 0b ! if the minutes just rolled over,
1179 ! the last second could have been
1180 ! inaccurate; try again.
1181 wrpr %g0, %g1, %pstate ! delay: re-enable interrupts
1183 retl
1184 sub %o5, %o3, %o0 ! return the difference in ticks
1185 SET_SIZE(find_cpufrequency)
1187 #endif /* lint */
1189 #if defined(lint)
1191 * Prefetch a page_t for write or read, this assumes a linear
1192 * scan of sequential page_t's.
1194 /*ARGSUSED*/
1195 void
1196 prefetch_page_w(void *pp)
1199 /*ARGSUSED*/
1200 void
1201 prefetch_page_r(void *pp)
1203 #else /* lint */
1205 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1206 defined(SERRANO)
1208 ! On US-III, the prefetch instruction queue is 8 entries deep.
1209 ! Also, prefetches for write put data in the E$, which has
1210 ! lines of 512 bytes for an 8MB cache. Each E$ line is further
1211 ! subblocked into 64 byte chunks.
1213 ! Since prefetch can only bring in 64 bytes at a time (See Sparc
1214 ! v9 Architecture Manual pp.204) and a page_t is 128 bytes,
1215 ! then 2 prefetches are required in order to bring an entire
1216 ! page into the E$.
1218 ! Since the prefetch queue is 8 entries deep, we currently can
1219 ! only have 4 prefetches for page_t's outstanding. Thus, we
1220 ! prefetch n+4 ahead of where we are now:
1222 ! 4 * sizeof(page_t) -> 512
1223 ! 4 * sizeof(page_t) +64 -> 576
1225 ! Example
1226 ! =======
1227 ! contiguous page array in memory...
1229 ! |AAA1|AAA2|BBB1|BBB2|CCC1|CCC2|DDD1|DDD2|XXX1|XXX2|YYY1|YYY2|...
1230 ! ^ ^ ^ ^ ^ ^
1231 ! pp | pp+4*sizeof(page)+64
1233 ! pp+4*sizeof(page)
1235 ! Prefetch
1236 ! Queue
1237 ! +-------+<--- In this iteration, we're working with pp (AAA1),
1238 ! |Preftch| but we enqueue prefetch for addr = XXX1
1239 ! | XXX1 |
1240 ! +-------+<--- this queue slot will be a prefetch instruction for
1241 ! |Preftch| for addr = pp + 4*sizeof(page_t) + 64 (or second
1242 ! | XXX2 | half of page XXX)
1243 ! +-------+
1244 ! |Preftch|<-+- The next time around this function, we'll be
1245 ! | YYY1 | | working with pp = BBB1, but will be enqueueing
1246 ! +-------+ | prefetches to for both halves of page YYY,
1247 ! |Preftch| | while both halves of page XXX are in transit
1248 ! | YYY2 |<-+ make their way into the E$.
1249 ! +-------+
1250 ! |Preftch|
1251 ! | ZZZ1 |
1252 ! +-------+
1253 ! . .
1254 ! : :
1256 ! E$
1257 ! +============================================...
1258 ! | XXX1 | XXX2 | YYY1 | YYY2 | ZZZ1 | ZZZ2 |
1259 ! +============================================...
1260 ! | | | | | | |
1261 ! +============================================...
1265 ! So we should expect the first four page accesses to stall
1266 ! while we warm up the cache, afterwhich, most of the pages
1267 ! will have their pp ready in the E$.
1269 ! Also note that if sizeof(page_t) grows beyond 128, then
1270 ! we'll need an additional prefetch to get an entire page
1271 ! into the E$, thus reducing the number of outstanding page
1272 ! prefetches to 2 (ie. 3 prefetches/page = 6 queue slots)
1273 ! etc.
1275 ! Cheetah+
1276 ! ========
1277 ! On Cheetah+ we use "#n_write" prefetches as these avoid
1278 ! unnecessary RTS->RTO bus transaction state change, and
1279 ! just issues RTO transaction. (See pp.77 of Cheetah+ Delta
1280 ! PRM). On Cheetah, #n_write prefetches are reflected with
1281 ! RTS->RTO state transition regardless.
1283 #define STRIDE1 512
1284 #define STRIDE2 576
1286 #if STRIDE1 != (PAGE_SIZE * 4)
1287 #error "STRIDE1 != (PAGE_SIZE * 4)"
1288 #endif /* STRIDE1 != (PAGE_SIZE * 4) */
1290 ENTRY(prefetch_page_w)
1291 prefetch [%o0+STRIDE1], #n_writes
1292 retl
1293 prefetch [%o0+STRIDE2], #n_writes
1294 SET_SIZE(prefetch_page_w)
1297 ! Note on CHEETAH to prefetch for read, we really use #one_write.
1298 ! This fetches to E$ (general use) rather than P$ (floating point use).
1300 ENTRY(prefetch_page_r)
1301 prefetch [%o0+STRIDE1], #one_write
1302 retl
1303 prefetch [%o0+STRIDE2], #one_write
1304 SET_SIZE(prefetch_page_r)
1306 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1309 ! UltraSparcII can have up to 3 prefetches outstanding.
1310 ! A page_t is 128 bytes (2 prefetches of 64 bytes each)
1311 ! So prefetch for pp + 1, which is
1313 ! pp + sizeof(page_t)
1314 ! and
1315 ! pp + sizeof(page_t) + 64
1317 #define STRIDE1 128
1318 #define STRIDE2 192
1320 #if STRIDE1 != PAGE_SIZE
1321 #error "STRIDE1 != PAGE_SIZE"
1322 #endif /* STRIDE1 != PAGE_SIZE */
1324 ENTRY(prefetch_page_w)
1325 prefetch [%o0+STRIDE1], #n_writes
1326 retl
1327 prefetch [%o0+STRIDE2], #n_writes
1328 SET_SIZE(prefetch_page_w)
1330 ENTRY(prefetch_page_r)
1331 prefetch [%o0+STRIDE1], #n_reads
1332 retl
1333 prefetch [%o0+STRIDE2], #n_reads
1334 SET_SIZE(prefetch_page_r)
1336 #elif defined(OLYMPUS_C)
1338 ! Prefetch strides for Olympus-C
1341 #define STRIDE1 0x440
1342 #define STRIDE2 0x640
1344 ENTRY(prefetch_page_w)
1345 prefetch [%o0+STRIDE1], #n_writes
1346 retl
1347 prefetch [%o0+STRIDE2], #n_writes
1348 SET_SIZE(prefetch_page_w)
1350 ENTRY(prefetch_page_r)
1351 prefetch [%o0+STRIDE1], #n_writes
1352 retl
1353 prefetch [%o0+STRIDE2], #n_writes
1354 SET_SIZE(prefetch_page_r)
1355 #else /* OLYMPUS_C */
1357 #error "You need to fix this for your new cpu type."
1359 #endif /* OLYMPUS_C */
1361 #endif /* lint */
1363 #if defined(lint)
1365 * Prefetch struct smap for write.
1367 /*ARGSUSED*/
1368 void
1369 prefetch_smap_w(void *smp)
1371 #else /* lint */
1373 #if defined(CHEETAH) || defined(CHEETAH_PLUS) || defined(JALAPENO) || \
1374 defined(SERRANO)
1376 #define PREFETCH_Q_LEN 8
1378 #elif defined(SPITFIRE) || defined(HUMMINGBIRD)
1380 #define PREFETCH_Q_LEN 3
1382 #elif defined(OLYMPUS_C)
1384 ! Use length of one for now.
1386 #define PREFETCH_Q_LEN 1
1388 #else /* OLYMPUS_C */
1390 #error You need to fix this for your new cpu type.
1392 #endif /* OLYMPUS_C */
1394 #include <vm/kpm.h>
1396 #ifdef SEGKPM_SUPPORT
1398 #define SMAP_SIZE 72
1399 #define SMAP_STRIDE (((PREFETCH_Q_LEN * 64) / SMAP_SIZE) * 64)
1401 #else /* SEGKPM_SUPPORT */
1404 ! The hardware will prefetch the 64 byte cache aligned block
1405 ! that contains the address specified in the prefetch instruction.
1406 ! Since the size of the smap struct is 48 bytes, issuing 1 prefetch
1407 ! per pass will suffice as long as we prefetch far enough ahead to
1408 ! make sure we don't stall for the cases where the smap object
1409 ! spans multiple hardware prefetch blocks. Let's prefetch as far
1410 ! ahead as the hardware will allow.
1412 ! The smap array is processed with decreasing address pointers.
1414 #define SMAP_SIZE 48
1415 #define SMAP_STRIDE (PREFETCH_Q_LEN * SMAP_SIZE)
1417 #endif /* SEGKPM_SUPPORT */
1419 ENTRY(prefetch_smap_w)
1420 retl
1421 prefetch [%o0-SMAP_STRIDE], #n_writes
1422 SET_SIZE(prefetch_smap_w)
1424 #endif /* lint */
1426 #if defined(lint) || defined(__lint)
1428 /* ARGSUSED */
1429 uint64_t
1430 getidsr(void)
1431 { return 0; }
1433 #else /* lint */
1435 ENTRY_NP(getidsr)
1436 retl
1437 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %o0
1438 SET_SIZE(getidsr)
1440 #endif /* lint */