2 * Copyright (c) 2003 Peter Wemm.
3 * Copyright (c) 1993 The Regents of the University of California.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $FreeBSD: src/sys/amd64/include/cpufunc.h,v 1.139 2004/01/28 23:53:04 peter Exp $
35 * $DragonFly: src/sys/cpu/amd64/include/cpufunc.h,v 1.2 2007/09/23 04:29:30 yanyh Exp $
39 * Functions to provide access to special i386 instructions.
40 * This in included in sys/systm.h, and that file should be
41 * used in preference to this.
44 #ifndef _CPU_CPUFUNC_H_
45 #define _CPU_CPUFUNC_H_
47 #include <sys/cdefs.h>
48 #include <machine/psl.h>
51 struct region_descriptor
;
54 #define readb(va) (*(volatile u_int8_t *) (va))
55 #define readw(va) (*(volatile u_int16_t *) (va))
56 #define readl(va) (*(volatile u_int32_t *) (va))
57 #define readq(va) (*(volatile u_int64_t *) (va))
59 #define writeb(va, d) (*(volatile u_int8_t *) (va) = (d))
60 #define writew(va, d) (*(volatile u_int16_t *) (va) = (d))
61 #define writel(va, d) (*(volatile u_int32_t *) (va) = (d))
62 #define writeq(va, d) (*(volatile u_int64_t *) (va) = (d))
69 __asm
__volatile("int $3");
75 __asm
__volatile("pause");
83 __asm
__volatile("bsfl %1,%0" : "=r" (result
) : "rm" (mask
));
87 static __inline u_long
92 __asm
__volatile("bsfq %1,%0" : "=r" (result
) : "rm" (mask
));
101 __asm
__volatile("bsrl %1,%0" : "=r" (result
) : "rm" (mask
));
105 static __inline u_long
110 __asm
__volatile("bsrq %1,%0" : "=r" (result
) : "rm" (mask
));
117 __asm
__volatile("cli" : : : "memory");
121 do_cpuid(u_int ax
, u_int
*p
)
123 __asm
__volatile("cpuid"
124 : "=a" (p
[0]), "=b" (p
[1]), "=c" (p
[2]), "=d" (p
[3])
129 cpuid_count(u_int ax
, u_int cx
, u_int
*p
)
131 __asm
__volatile("cpuid"
132 : "=a" (p
[0]), "=b" (p
[1]), "=c" (p
[2]), "=d" (p
[3])
133 : "0" (ax
), "c" (cx
));
137 cpu_enable_intr(void)
139 __asm
__volatile("sti");
143 * Cpu and compiler memory ordering fence. mfence ensures strong read and
146 * A serializing or fence instruction is required here. A locked bus
147 * cycle on data for which we already own cache mastership is the most
154 __asm
__volatile("lock; addl $0,(%%esp)" : : : "memory");
156 __asm
__volatile("" : : : "memory");
161 * cpu_lfence() ensures strong read ordering for reads issued prior
162 * to the instruction verses reads issued afterwords.
164 * A serializing or fence instruction is required here. A locked bus
165 * cycle on data for which we already own cache mastership is the most
172 __asm
__volatile("lock; addl $0,(%%esp)" : : : "memory");
174 __asm
__volatile("" : : : "memory");
179 * cpu_sfence() ensures strong write ordering for writes issued prior
180 * to the instruction verses writes issued afterwords. Writes are
181 * ordered on intel cpus so we do not actually have to do anything.
186 __asm
__volatile("" : : : "memory");
190 * cpu_ccfence() prevents the compiler from reordering instructions, in
191 * particular stores, relative to the current cpu. Use cpu_sfence() if
192 * you need to guarentee ordering by both the compiler and by the cpu.
194 * This also prevents the compiler from caching memory loads into local
195 * variables across the routine.
200 __asm
__volatile("" : : : "memory");
205 #define HAVE_INLINE_FFS
212 * Note that gcc-2's builtin ffs would be used if we didn't declare
213 * this inline or turn off the builtin. The builtin is faster but
214 * broken in gcc-2.4.5 and slower but working in gcc-2.5 and later
217 return (mask
== 0 ? mask
: (int)bsfl((u_int
)mask
) + 1);
219 /* Actually, the above is way out of date. The builtins use cmov etc */
220 return (__builtin_ffs(mask
));
224 #define HAVE_INLINE_FFSL
229 return (mask
== 0 ? mask
: (int)bsfq((u_long
)mask
) + 1);
232 #define HAVE_INLINE_FLS
237 return (mask
== 0 ? mask
: (int)bsrl((u_int
)mask
) + 1);
240 #define HAVE_INLINE_FLSL
245 return (mask
== 0 ? mask
: (int)bsrq((u_long
)mask
) + 1);
253 __asm
__volatile("hlt");
257 * The following complications are to get around gcc not having a
258 * constraint letter for the range 0..255. We still put "d" in the
259 * constraint because "i" isn't a valid constraint when the port
260 * isn't constant. This only matters for -O0 because otherwise
261 * the non-working version gets optimized away.
263 * Use an expression-statement instead of a conditional expression
264 * because gcc-2.6.0 would promote the operands of the conditional
265 * and produce poor code for "if ((inb(var) & const1) == const2)".
267 * The unnecessary test `(port) < 0x10000' is to generate a warning if
268 * the `port' has type u_short or smaller. Such types are pessimal.
269 * This actually only works for signed types. The range check is
270 * careful to avoid generating warnings.
272 #define inb(port) __extension__ ({ \
274 if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \
275 && (port) < 0x10000) \
276 _data = inbc(port); \
278 _data = inbv(port); \
281 #define outb(port, data) ( \
282 __builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \
283 && (port) < 0x10000 \
284 ? outbc(port, data) : outbv(port, data))
286 static __inline u_char
291 __asm
__volatile("inb %1,%0" : "=a" (data
) : "id" ((u_short
)(port
)));
296 outbc(u_int port
, u_char data
)
298 __asm
__volatile("outb %0,%1" : : "a" (data
), "id" ((u_short
)(port
)));
301 static __inline u_char
306 * We use %%dx and not %1 here because i/o is done at %dx and not at
307 * %edx, while gcc generates inferior code (movw instead of movl)
308 * if we tell it to load (u_short) port.
310 __asm
__volatile("inb %%dx,%0" : "=a" (data
) : "d" (port
));
314 static __inline u_int
319 __asm
__volatile("inl %%dx,%0" : "=a" (data
) : "d" (port
));
324 insb(u_int port
, void *addr
, size_t cnt
)
326 __asm
__volatile("cld; rep; insb"
327 : "+D" (addr
), "+c" (cnt
)
333 insw(u_int port
, void *addr
, size_t cnt
)
335 __asm
__volatile("cld; rep; insw"
336 : "+D" (addr
), "+c" (cnt
)
342 insl(u_int port
, void *addr
, size_t cnt
)
344 __asm
__volatile("cld; rep; insl"
345 : "+D" (addr
), "+c" (cnt
)
353 __asm
__volatile("invd");
356 static __inline u_short
361 __asm
__volatile("inw %%dx,%0" : "=a" (data
) : "d" (port
));
365 static __inline u_int
366 loadandclear(volatile u_int
*addr
)
370 __asm
__volatile("xorl %0,%0; xchgl %1,%0"
371 : "=&r" (result
) : "m" (*addr
));
376 outbv(u_int port
, u_char data
)
380 * Use an unnecessary assignment to help gcc's register allocator.
381 * This make a large difference for gcc-1.40 and a tiny difference
382 * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
383 * best results. gcc-2.6.0 can't handle this.
386 __asm
__volatile("outb %0,%%dx" : : "a" (al
), "d" (port
));
390 outl(u_int port
, u_int data
)
393 * outl() and outw() aren't used much so we haven't looked at
394 * possible micro-optimizations such as the unnecessary
395 * assignment for them.
397 __asm
__volatile("outl %0,%%dx" : : "a" (data
), "d" (port
));
401 outsb(u_int port
, const void *addr
, size_t cnt
)
403 __asm
__volatile("cld; rep; outsb"
404 : "+S" (addr
), "+c" (cnt
)
409 outsw(u_int port
, const void *addr
, size_t cnt
)
411 __asm
__volatile("cld; rep; outsw"
412 : "+S" (addr
), "+c" (cnt
)
417 outsl(u_int port
, const void *addr
, size_t cnt
)
419 __asm
__volatile("cld; rep; outsl"
420 : "+S" (addr
), "+c" (cnt
)
425 outw(u_int port
, u_short data
)
427 __asm
__volatile("outw %0,%%dx" : : "a" (data
), "d" (port
));
433 __asm
__volatile("pause");
436 static __inline u_long
441 __asm
__volatile("pushfq; popq %0" : "=r" (rf
));
445 static __inline u_int64_t
450 __asm
__volatile("rdmsr" : "=a" (low
), "=d" (high
) : "c" (msr
));
451 return (low
| ((u_int64_t
)high
<< 32));
454 static __inline u_int64_t
459 __asm
__volatile("rdpmc" : "=a" (low
), "=d" (high
) : "c" (pmc
));
460 return (low
| ((u_int64_t
)high
<< 32));
463 static __inline u_int64_t
468 __asm
__volatile("rdtsc" : "=a" (low
), "=d" (high
));
469 return (low
| ((u_int64_t
)high
<< 32));
475 __asm
__volatile("wbinvd");
479 write_rflags(u_long rf
)
481 __asm
__volatile("pushq %0; popfq" : : "r" (rf
));
485 wrmsr(u_int msr
, u_int64_t newval
)
491 __asm
__volatile("wrmsr" : : "a" (low
), "d" (high
), "c" (msr
));
495 load_cr0(u_long data
)
498 __asm
__volatile("movq %0,%%cr0" : : "r" (data
));
501 static __inline u_long
506 __asm
__volatile("movq %%cr0,%0" : "=r" (data
));
510 static __inline u_long
515 __asm
__volatile("movq %%cr2,%0" : "=r" (data
));
520 load_cr3(u_long data
)
523 __asm
__volatile("movq %0,%%cr3" : : "r" (data
) : "memory");
526 static __inline u_long
531 __asm
__volatile("movq %%cr3,%0" : "=r" (data
));
536 load_cr4(u_long data
)
538 __asm
__volatile("movq %0,%%cr4" : : "r" (data
));
541 static __inline u_long
546 __asm
__volatile("movq %%cr4,%0" : "=r" (data
));
551 * Global TLB flush (except for thise for pages marked PG_G)
561 * TLB flush for an individual page (even if it has PG_G).
562 * Only works on 486+ CPUs (i386 does not have PG_G).
568 __asm
__volatile("invlpg %0" : : "m" (*(char *)addr
) : "memory");
571 static __inline u_int
575 __asm
__volatile("movl %%fs,%0" : "=rm" (sel
));
579 static __inline u_int
583 __asm
__volatile("movl %%gs,%0" : "=rm" (sel
));
590 __asm
__volatile("movl %0,%%ds" : : "rm" (sel
));
596 __asm
__volatile("movl %0,%%es" : : "rm" (sel
));
600 /* This is defined in <machine/specialreg.h> but is too painful to get to */
602 #define MSR_FSBASE 0xc0000100
607 register u_int32_t fsbase
__asm("ecx");
609 /* Preserve the fsbase value across the selector load */
611 __asm
__volatile("rdmsr; movl %0,%%fs; wrmsr"
612 : : "rm" (sel
), "c" (fsbase
) : "eax", "edx");
616 #define MSR_GSBASE 0xc0000101
621 register u_int32_t gsbase
__asm("ecx");
624 * Preserve the gsbase value across the selector load.
625 * Note that we have to disable interrupts because the gsbase
626 * being trashed happens to be the kernel gsbase at the time.
629 __asm
__volatile("pushfq; cli; rdmsr; movl %0,%%gs; wrmsr; popfq"
630 : : "rm" (sel
), "c" (gsbase
) : "eax", "edx");
633 /* Usable by userland */
637 __asm
__volatile("movl %0,%%fs" : : "rm" (sel
));
643 __asm
__volatile("movl %0,%%gs" : : "rm" (sel
));
647 /* void lidt(struct region_descriptor *addr); */
649 lidt(struct region_descriptor
*addr
)
651 __asm
__volatile("lidt (%0)" : : "r" (addr
));
654 /* void lldt(u_short sel); */
658 __asm
__volatile("lldt %0" : : "r" (sel
));
661 /* void ltr(u_short sel); */
665 __asm
__volatile("ltr %0" : : "r" (sel
));
668 static __inline u_int64_t
672 __asm
__volatile("movq %%dr0,%0" : "=r" (data
));
677 load_dr0(u_int64_t dr0
)
679 __asm
__volatile("movq %0,%%dr0" : : "r" (dr0
));
682 static __inline u_int64_t
686 __asm
__volatile("movq %%dr1,%0" : "=r" (data
));
691 load_dr1(u_int64_t dr1
)
693 __asm
__volatile("movq %0,%%dr1" : : "r" (dr1
));
696 static __inline u_int64_t
700 __asm
__volatile("movq %%dr2,%0" : "=r" (data
));
705 load_dr2(u_int64_t dr2
)
707 __asm
__volatile("movq %0,%%dr2" : : "r" (dr2
));
710 static __inline u_int64_t
714 __asm
__volatile("movq %%dr3,%0" : "=r" (data
));
719 load_dr3(u_int64_t dr3
)
721 __asm
__volatile("movq %0,%%dr3" : : "r" (dr3
));
724 static __inline u_int64_t
728 __asm
__volatile("movq %%dr4,%0" : "=r" (data
));
733 load_dr4(u_int64_t dr4
)
735 __asm
__volatile("movq %0,%%dr4" : : "r" (dr4
));
738 static __inline u_int64_t
742 __asm
__volatile("movq %%dr5,%0" : "=r" (data
));
747 load_dr5(u_int64_t dr5
)
749 __asm
__volatile("movq %0,%%dr5" : : "r" (dr5
));
752 static __inline u_int64_t
756 __asm
__volatile("movq %%dr6,%0" : "=r" (data
));
761 load_dr6(u_int64_t dr6
)
763 __asm
__volatile("movq %0,%%dr6" : : "r" (dr6
));
766 static __inline u_int64_t
770 __asm
__volatile("movq %%dr7,%0" : "=r" (data
));
775 load_dr7(u_int64_t dr7
)
777 __asm
__volatile("movq %0,%%dr7" : : "r" (dr7
));
780 static __inline register_t
785 rflags
= read_rflags();
791 intr_restore(register_t rflags
)
793 write_rflags(rflags
);
796 #else /* !__GNUC__ */
798 int breakpoint(void);
799 void cpu_pause(void);
800 u_int
bsfl(u_int mask
);
801 u_int
bsrl(u_int mask
);
802 void cpu_invlpg(u_long addr
);
803 void cpu_invlpg_range(u_long start
, u_long end
);
804 void disable_intr(void);
805 void do_cpuid(u_int ax
, u_int
*p
);
806 void enable_intr(void);
808 u_char
inb(u_int port
);
809 u_int
inl(u_int port
);
810 void insb(u_int port
, void *addr
, size_t cnt
);
811 void insl(u_int port
, void *addr
, size_t cnt
);
812 void insw(u_int port
, void *addr
, size_t cnt
);
814 void invlpg(u_int addr
);
815 void invlpg_range(u_int start
, u_int end
);
817 u_short
inw(u_int port
);
818 void load_cr0(u_int cr0
);
819 void load_cr3(u_int cr3
);
820 void load_cr4(u_int cr4
);
821 void load_fs(u_int sel
);
822 void load_gs(u_int sel
);
823 struct region_descriptor
;
824 void lidt(struct region_descriptor
*addr
);
825 void lldt(u_short sel
);
826 void ltr(u_short sel
);
827 void outb(u_int port
, u_char data
);
828 void outl(u_int port
, u_int data
);
829 void outsb(u_int port
, void *addr
, size_t cnt
);
830 void outsl(u_int port
, void *addr
, size_t cnt
);
831 void outsw(u_int port
, void *addr
, size_t cnt
);
832 void outw(u_int port
, u_short data
);
833 void ia32_pause(void);
840 u_int64_t
rdmsr(u_int msr
);
841 u_int64_t
rdpmc(u_int pmc
);
842 u_int64_t
rdtsc(void);
843 u_int
read_rflags(void);
845 void write_rflags(u_int rf
);
846 void wrmsr(u_int msr
, u_int64_t newval
);
847 u_int64_t
rdr0(void);
848 void load_dr0(u_int64_t dr0
);
849 u_int64_t
rdr1(void);
850 void load_dr1(u_int64_t dr1
);
851 u_int64_t
rdr2(void);
852 void load_dr2(u_int64_t dr2
);
853 u_int64_t
rdr3(void);
854 void load_dr3(u_int64_t dr3
);
855 u_int64_t
rdr4(void);
856 void load_dr4(u_int64_t dr4
);
857 u_int64_t
rdr5(void);
858 void load_dr5(u_int64_t dr5
);
859 u_int64_t
rdr6(void);
860 void load_dr6(u_int64_t dr6
);
861 u_int64_t
rdr7(void);
862 void load_dr7(u_int64_t dr7
);
863 register_t
intr_disable(void);
864 void intr_restore(register_t rf
);
866 #endif /* __GNUC__ */
868 void reset_dbregs(void);
872 #endif /* !_CPU_CPUFUNC_H_ */