amd64 port: mainly on the pmap headers, identify_cpu and initcpu
[dragonfly/port-amd64.git] / sys / cpu / amd64 / include / cpufunc.h
blob8e9596f784dbf55f414490ee427bb0e480e16535
1 /*-
2 * Copyright (c) 2003 Peter Wemm.
3 * Copyright (c) 1993 The Regents of the University of California.
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $FreeBSD: src/sys/amd64/include/cpufunc.h,v 1.139 2004/01/28 23:53:04 peter Exp $
35 * $DragonFly: src/sys/cpu/amd64/include/cpufunc.h,v 1.2 2007/09/23 04:29:30 yanyh Exp $
39 * Functions to provide access to special i386 instructions.
40 * This in included in sys/systm.h, and that file should be
41 * used in preference to this.
44 #ifndef _CPU_CPUFUNC_H_
45 #define _CPU_CPUFUNC_H_
47 #include <sys/cdefs.h>
48 #include <machine/psl.h>
50 struct thread;
51 struct region_descriptor;
53 __BEGIN_DECLS
54 #define readb(va) (*(volatile u_int8_t *) (va))
55 #define readw(va) (*(volatile u_int16_t *) (va))
56 #define readl(va) (*(volatile u_int32_t *) (va))
57 #define readq(va) (*(volatile u_int64_t *) (va))
59 #define writeb(va, d) (*(volatile u_int8_t *) (va) = (d))
60 #define writew(va, d) (*(volatile u_int16_t *) (va) = (d))
61 #define writel(va, d) (*(volatile u_int32_t *) (va) = (d))
62 #define writeq(va, d) (*(volatile u_int64_t *) (va) = (d))
64 #ifdef __GNUC__
66 static __inline void
67 breakpoint(void)
69 __asm __volatile("int $3");
72 static __inline void
73 cpu_pause(void)
75 __asm __volatile("pause");
78 static __inline u_int
79 bsfl(u_int mask)
81 u_int result;
83 __asm __volatile("bsfl %1,%0" : "=r" (result) : "rm" (mask));
84 return (result);
87 static __inline u_long
88 bsfq(u_long mask)
90 u_long result;
92 __asm __volatile("bsfq %1,%0" : "=r" (result) : "rm" (mask));
93 return (result);
96 static __inline u_int
97 bsrl(u_int mask)
99 u_int result;
101 __asm __volatile("bsrl %1,%0" : "=r" (result) : "rm" (mask));
102 return (result);
105 static __inline u_long
106 bsrq(u_long mask)
108 u_long result;
110 __asm __volatile("bsrq %1,%0" : "=r" (result) : "rm" (mask));
111 return (result);
114 static __inline void
115 disable_intr(void)
117 __asm __volatile("cli" : : : "memory");
120 static __inline void
121 do_cpuid(u_int ax, u_int *p)
123 __asm __volatile("cpuid"
124 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
125 : "0" (ax));
128 static __inline void
129 cpuid_count(u_int ax, u_int cx, u_int *p)
131 __asm __volatile("cpuid"
132 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
133 : "0" (ax), "c" (cx));
136 static __inline void
137 cpu_enable_intr(void)
139 __asm __volatile("sti");
143 * Cpu and compiler memory ordering fence. mfence ensures strong read and
144 * write ordering.
146 * A serializing or fence instruction is required here. A locked bus
147 * cycle on data for which we already own cache mastership is the most
148 * portable.
150 static __inline void
151 cpu_mfence(void)
153 #ifdef SMP
154 __asm __volatile("lock; addl $0,(%%esp)" : : : "memory");
155 #else
156 __asm __volatile("" : : : "memory");
157 #endif
161 * cpu_lfence() ensures strong read ordering for reads issued prior
162 * to the instruction verses reads issued afterwords.
164 * A serializing or fence instruction is required here. A locked bus
165 * cycle on data for which we already own cache mastership is the most
166 * portable.
168 static __inline void
169 cpu_lfence(void)
171 #ifdef SMP
172 __asm __volatile("lock; addl $0,(%%esp)" : : : "memory");
173 #else
174 __asm __volatile("" : : : "memory");
175 #endif
179 * cpu_sfence() ensures strong write ordering for writes issued prior
180 * to the instruction verses writes issued afterwords. Writes are
181 * ordered on intel cpus so we do not actually have to do anything.
183 static __inline void
184 cpu_sfence(void)
186 __asm __volatile("" : : : "memory");
190 * cpu_ccfence() prevents the compiler from reordering instructions, in
191 * particular stores, relative to the current cpu. Use cpu_sfence() if
192 * you need to guarentee ordering by both the compiler and by the cpu.
194 * This also prevents the compiler from caching memory loads into local
195 * variables across the routine.
197 static __inline void
198 cpu_ccfence(void)
200 __asm __volatile("" : : : "memory");
203 #ifdef _KERNEL
205 #define HAVE_INLINE_FFS
207 static __inline int
208 ffs(int mask)
210 #if 0
212 * Note that gcc-2's builtin ffs would be used if we didn't declare
213 * this inline or turn off the builtin. The builtin is faster but
214 * broken in gcc-2.4.5 and slower but working in gcc-2.5 and later
215 * versions.
217 return (mask == 0 ? mask : (int)bsfl((u_int)mask) + 1);
218 #else
219 /* Actually, the above is way out of date. The builtins use cmov etc */
220 return (__builtin_ffs(mask));
221 #endif
224 #define HAVE_INLINE_FFSL
226 static __inline int
227 ffsl(long mask)
229 return (mask == 0 ? mask : (int)bsfq((u_long)mask) + 1);
232 #define HAVE_INLINE_FLS
234 static __inline int
235 fls(int mask)
237 return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1);
240 #define HAVE_INLINE_FLSL
242 static __inline int
243 flsl(long mask)
245 return (mask == 0 ? mask : (int)bsrq((u_long)mask) + 1);
248 #endif /* _KERNEL */
250 static __inline void
251 halt(void)
253 __asm __volatile("hlt");
257 * The following complications are to get around gcc not having a
258 * constraint letter for the range 0..255. We still put "d" in the
259 * constraint because "i" isn't a valid constraint when the port
260 * isn't constant. This only matters for -O0 because otherwise
261 * the non-working version gets optimized away.
263 * Use an expression-statement instead of a conditional expression
264 * because gcc-2.6.0 would promote the operands of the conditional
265 * and produce poor code for "if ((inb(var) & const1) == const2)".
267 * The unnecessary test `(port) < 0x10000' is to generate a warning if
268 * the `port' has type u_short or smaller. Such types are pessimal.
269 * This actually only works for signed types. The range check is
270 * careful to avoid generating warnings.
272 #define inb(port) __extension__ ({ \
273 u_char _data; \
274 if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \
275 && (port) < 0x10000) \
276 _data = inbc(port); \
277 else \
278 _data = inbv(port); \
279 _data; })
281 #define outb(port, data) ( \
282 __builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \
283 && (port) < 0x10000 \
284 ? outbc(port, data) : outbv(port, data))
286 static __inline u_char
287 inbc(u_int port)
289 u_char data;
291 __asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
292 return (data);
295 static __inline void
296 outbc(u_int port, u_char data)
298 __asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
301 static __inline u_char
302 inbv(u_int port)
304 u_char data;
306 * We use %%dx and not %1 here because i/o is done at %dx and not at
307 * %edx, while gcc generates inferior code (movw instead of movl)
308 * if we tell it to load (u_short) port.
310 __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
311 return (data);
314 static __inline u_int
315 inl(u_int port)
317 u_int data;
319 __asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
320 return (data);
323 static __inline void
324 insb(u_int port, void *addr, size_t cnt)
326 __asm __volatile("cld; rep; insb"
327 : "+D" (addr), "+c" (cnt)
328 : "d" (port)
329 : "memory");
332 static __inline void
333 insw(u_int port, void *addr, size_t cnt)
335 __asm __volatile("cld; rep; insw"
336 : "+D" (addr), "+c" (cnt)
337 : "d" (port)
338 : "memory");
341 static __inline void
342 insl(u_int port, void *addr, size_t cnt)
344 __asm __volatile("cld; rep; insl"
345 : "+D" (addr), "+c" (cnt)
346 : "d" (port)
347 : "memory");
350 static __inline void
351 invd(void)
353 __asm __volatile("invd");
356 static __inline u_short
357 inw(u_int port)
359 u_short data;
361 __asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
362 return (data);
365 static __inline u_int
366 loadandclear(volatile u_int *addr)
368 u_int result;
370 __asm __volatile("xorl %0,%0; xchgl %1,%0"
371 : "=&r" (result) : "m" (*addr));
372 return (result);
375 static __inline void
376 outbv(u_int port, u_char data)
378 u_char al;
380 * Use an unnecessary assignment to help gcc's register allocator.
381 * This make a large difference for gcc-1.40 and a tiny difference
382 * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for
383 * best results. gcc-2.6.0 can't handle this.
385 al = data;
386 __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
389 static __inline void
390 outl(u_int port, u_int data)
393 * outl() and outw() aren't used much so we haven't looked at
394 * possible micro-optimizations such as the unnecessary
395 * assignment for them.
397 __asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
400 static __inline void
401 outsb(u_int port, const void *addr, size_t cnt)
403 __asm __volatile("cld; rep; outsb"
404 : "+S" (addr), "+c" (cnt)
405 : "d" (port));
408 static __inline void
409 outsw(u_int port, const void *addr, size_t cnt)
411 __asm __volatile("cld; rep; outsw"
412 : "+S" (addr), "+c" (cnt)
413 : "d" (port));
416 static __inline void
417 outsl(u_int port, const void *addr, size_t cnt)
419 __asm __volatile("cld; rep; outsl"
420 : "+S" (addr), "+c" (cnt)
421 : "d" (port));
424 static __inline void
425 outw(u_int port, u_short data)
427 __asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
430 static __inline void
431 ia32_pause(void)
433 __asm __volatile("pause");
436 static __inline u_long
437 read_rflags(void)
439 u_long rf;
441 __asm __volatile("pushfq; popq %0" : "=r" (rf));
442 return (rf);
445 static __inline u_int64_t
446 rdmsr(u_int msr)
448 u_int32_t low, high;
450 __asm __volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr));
451 return (low | ((u_int64_t)high << 32));
454 static __inline u_int64_t
455 rdpmc(u_int pmc)
457 u_int32_t low, high;
459 __asm __volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (pmc));
460 return (low | ((u_int64_t)high << 32));
463 static __inline u_int64_t
464 rdtsc(void)
466 u_int32_t low, high;
468 __asm __volatile("rdtsc" : "=a" (low), "=d" (high));
469 return (low | ((u_int64_t)high << 32));
472 static __inline void
473 wbinvd(void)
475 __asm __volatile("wbinvd");
478 static __inline void
479 write_rflags(u_long rf)
481 __asm __volatile("pushq %0; popfq" : : "r" (rf));
484 static __inline void
485 wrmsr(u_int msr, u_int64_t newval)
487 u_int32_t low, high;
489 low = newval;
490 high = newval >> 32;
491 __asm __volatile("wrmsr" : : "a" (low), "d" (high), "c" (msr));
494 static __inline void
495 load_cr0(u_long data)
498 __asm __volatile("movq %0,%%cr0" : : "r" (data));
501 static __inline u_long
502 rcr0(void)
504 u_long data;
506 __asm __volatile("movq %%cr0,%0" : "=r" (data));
507 return (data);
510 static __inline u_long
511 rcr2(void)
513 u_long data;
515 __asm __volatile("movq %%cr2,%0" : "=r" (data));
516 return (data);
519 static __inline void
520 load_cr3(u_long data)
523 __asm __volatile("movq %0,%%cr3" : : "r" (data) : "memory");
526 static __inline u_long
527 rcr3(void)
529 u_long data;
531 __asm __volatile("movq %%cr3,%0" : "=r" (data));
532 return (data);
535 static __inline void
536 load_cr4(u_long data)
538 __asm __volatile("movq %0,%%cr4" : : "r" (data));
541 static __inline u_long
542 rcr4(void)
544 u_long data;
546 __asm __volatile("movq %%cr4,%0" : "=r" (data));
547 return (data);
551 * Global TLB flush (except for thise for pages marked PG_G)
553 static __inline void
554 invltlb(void)
557 load_cr3(rcr3());
561 * TLB flush for an individual page (even if it has PG_G).
562 * Only works on 486+ CPUs (i386 does not have PG_G).
564 static __inline void
565 invlpg(u_long addr)
568 __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
571 static __inline u_int
572 rfs(void)
574 u_int sel;
575 __asm __volatile("movl %%fs,%0" : "=rm" (sel));
576 return (sel);
579 static __inline u_int
580 rgs(void)
582 u_int sel;
583 __asm __volatile("movl %%gs,%0" : "=rm" (sel));
584 return (sel);
587 static __inline void
588 load_ds(u_int sel)
590 __asm __volatile("movl %0,%%ds" : : "rm" (sel));
593 static __inline void
594 load_es(u_int sel)
596 __asm __volatile("movl %0,%%es" : : "rm" (sel));
599 #ifdef _KERNEL
600 /* This is defined in <machine/specialreg.h> but is too painful to get to */
601 #ifndef MSR_FSBASE
602 #define MSR_FSBASE 0xc0000100
603 #endif
604 static __inline void
605 load_fs(u_int sel)
607 register u_int32_t fsbase __asm("ecx");
609 /* Preserve the fsbase value across the selector load */
610 fsbase = MSR_FSBASE;
611 __asm __volatile("rdmsr; movl %0,%%fs; wrmsr"
612 : : "rm" (sel), "c" (fsbase) : "eax", "edx");
615 #ifndef MSR_GSBASE
616 #define MSR_GSBASE 0xc0000101
617 #endif
618 static __inline void
619 load_gs(u_int sel)
621 register u_int32_t gsbase __asm("ecx");
624 * Preserve the gsbase value across the selector load.
625 * Note that we have to disable interrupts because the gsbase
626 * being trashed happens to be the kernel gsbase at the time.
628 gsbase = MSR_GSBASE;
629 __asm __volatile("pushfq; cli; rdmsr; movl %0,%%gs; wrmsr; popfq"
630 : : "rm" (sel), "c" (gsbase) : "eax", "edx");
632 #else
633 /* Usable by userland */
634 static __inline void
635 load_fs(u_int sel)
637 __asm __volatile("movl %0,%%fs" : : "rm" (sel));
640 static __inline void
641 load_gs(u_int sel)
643 __asm __volatile("movl %0,%%gs" : : "rm" (sel));
645 #endif
647 /* void lidt(struct region_descriptor *addr); */
648 static __inline void
649 lidt(struct region_descriptor *addr)
651 __asm __volatile("lidt (%0)" : : "r" (addr));
654 /* void lldt(u_short sel); */
655 static __inline void
656 lldt(u_short sel)
658 __asm __volatile("lldt %0" : : "r" (sel));
661 /* void ltr(u_short sel); */
662 static __inline void
663 ltr(u_short sel)
665 __asm __volatile("ltr %0" : : "r" (sel));
668 static __inline u_int64_t
669 rdr0(void)
671 u_int64_t data;
672 __asm __volatile("movq %%dr0,%0" : "=r" (data));
673 return (data);
676 static __inline void
677 load_dr0(u_int64_t dr0)
679 __asm __volatile("movq %0,%%dr0" : : "r" (dr0));
682 static __inline u_int64_t
683 rdr1(void)
685 u_int64_t data;
686 __asm __volatile("movq %%dr1,%0" : "=r" (data));
687 return (data);
690 static __inline void
691 load_dr1(u_int64_t dr1)
693 __asm __volatile("movq %0,%%dr1" : : "r" (dr1));
696 static __inline u_int64_t
697 rdr2(void)
699 u_int64_t data;
700 __asm __volatile("movq %%dr2,%0" : "=r" (data));
701 return (data);
704 static __inline void
705 load_dr2(u_int64_t dr2)
707 __asm __volatile("movq %0,%%dr2" : : "r" (dr2));
710 static __inline u_int64_t
711 rdr3(void)
713 u_int64_t data;
714 __asm __volatile("movq %%dr3,%0" : "=r" (data));
715 return (data);
718 static __inline void
719 load_dr3(u_int64_t dr3)
721 __asm __volatile("movq %0,%%dr3" : : "r" (dr3));
724 static __inline u_int64_t
725 rdr4(void)
727 u_int64_t data;
728 __asm __volatile("movq %%dr4,%0" : "=r" (data));
729 return (data);
732 static __inline void
733 load_dr4(u_int64_t dr4)
735 __asm __volatile("movq %0,%%dr4" : : "r" (dr4));
738 static __inline u_int64_t
739 rdr5(void)
741 u_int64_t data;
742 __asm __volatile("movq %%dr5,%0" : "=r" (data));
743 return (data);
746 static __inline void
747 load_dr5(u_int64_t dr5)
749 __asm __volatile("movq %0,%%dr5" : : "r" (dr5));
752 static __inline u_int64_t
753 rdr6(void)
755 u_int64_t data;
756 __asm __volatile("movq %%dr6,%0" : "=r" (data));
757 return (data);
760 static __inline void
761 load_dr6(u_int64_t dr6)
763 __asm __volatile("movq %0,%%dr6" : : "r" (dr6));
766 static __inline u_int64_t
767 rdr7(void)
769 u_int64_t data;
770 __asm __volatile("movq %%dr7,%0" : "=r" (data));
771 return (data);
774 static __inline void
775 load_dr7(u_int64_t dr7)
777 __asm __volatile("movq %0,%%dr7" : : "r" (dr7));
780 static __inline register_t
781 intr_disable(void)
783 register_t rflags;
785 rflags = read_rflags();
786 disable_intr();
787 return (rflags);
790 static __inline void
791 intr_restore(register_t rflags)
793 write_rflags(rflags);
796 #else /* !__GNUC__ */
798 int breakpoint(void);
799 void cpu_pause(void);
800 u_int bsfl(u_int mask);
801 u_int bsrl(u_int mask);
802 void cpu_invlpg(u_long addr);
803 void cpu_invlpg_range(u_long start, u_long end);
804 void disable_intr(void);
805 void do_cpuid(u_int ax, u_int *p);
806 void enable_intr(void);
807 void halt(void);
808 u_char inb(u_int port);
809 u_int inl(u_int port);
810 void insb(u_int port, void *addr, size_t cnt);
811 void insl(u_int port, void *addr, size_t cnt);
812 void insw(u_int port, void *addr, size_t cnt);
813 void invd(void);
814 void invlpg(u_int addr);
815 void invlpg_range(u_int start, u_int end);
816 void invltlb(void);
817 u_short inw(u_int port);
818 void load_cr0(u_int cr0);
819 void load_cr3(u_int cr3);
820 void load_cr4(u_int cr4);
821 void load_fs(u_int sel);
822 void load_gs(u_int sel);
823 struct region_descriptor;
824 void lidt(struct region_descriptor *addr);
825 void lldt(u_short sel);
826 void ltr(u_short sel);
827 void outb(u_int port, u_char data);
828 void outl(u_int port, u_int data);
829 void outsb(u_int port, void *addr, size_t cnt);
830 void outsl(u_int port, void *addr, size_t cnt);
831 void outsw(u_int port, void *addr, size_t cnt);
832 void outw(u_int port, u_short data);
833 void ia32_pause(void);
834 u_int rcr0(void);
835 u_int rcr2(void);
836 u_int rcr3(void);
837 u_int rcr4(void);
838 u_int rfs(void);
839 u_int rgs(void);
840 u_int64_t rdmsr(u_int msr);
841 u_int64_t rdpmc(u_int pmc);
842 u_int64_t rdtsc(void);
843 u_int read_rflags(void);
844 void wbinvd(void);
845 void write_rflags(u_int rf);
846 void wrmsr(u_int msr, u_int64_t newval);
847 u_int64_t rdr0(void);
848 void load_dr0(u_int64_t dr0);
849 u_int64_t rdr1(void);
850 void load_dr1(u_int64_t dr1);
851 u_int64_t rdr2(void);
852 void load_dr2(u_int64_t dr2);
853 u_int64_t rdr3(void);
854 void load_dr3(u_int64_t dr3);
855 u_int64_t rdr4(void);
856 void load_dr4(u_int64_t dr4);
857 u_int64_t rdr5(void);
858 void load_dr5(u_int64_t dr5);
859 u_int64_t rdr6(void);
860 void load_dr6(u_int64_t dr6);
861 u_int64_t rdr7(void);
862 void load_dr7(u_int64_t dr7);
863 register_t intr_disable(void);
864 void intr_restore(register_t rf);
866 #endif /* __GNUC__ */
868 void reset_dbregs(void);
870 __END_DECLS
872 #endif /* !_CPU_CPUFUNC_H_ */