From 6f6f1215f68e5ae15ad18373234815fe7b2acc9e Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 31 Jul 2009 11:53:35 -0700 Subject: [PATCH] Support multiarch for i686. This patch adds multiarch support when configured for i686. I modified some x86-64 functions to support 32bit. I will contribute 32bit SSE string and memory functions later. --- ChangeLog | 30 ++++++++++ sysdeps/i386/i686/Makefile | 4 ++ sysdeps/i386/i686/cacheinfo.c | 8 +++ sysdeps/i386/i686/multiarch/Makefile | 4 ++ sysdeps/i386/i686/multiarch/ifunc-defines.sym | 17 ++++++ sysdeps/i386/i686/multiarch/init-arch.c | 3 + sysdeps/i386/i686/multiarch/init-arch.h | 1 + sysdeps/i386/i686/multiarch/sched_cpucount.c | 1 + sysdeps/unix/sysv/linux/i386/i686/sysconf.c | 1 + sysdeps/x86_64/cacheinfo.c | 79 ++++++++++++++------------- sysdeps/x86_64/multiarch/init-arch.c | 18 +++--- sysdeps/x86_64/multiarch/init-arch.h | 6 ++ sysdeps/x86_64/multiarch/sched_cpucount.c | 2 +- 13 files changed, 125 insertions(+), 49 deletions(-) create mode 100644 sysdeps/i386/i686/cacheinfo.c create mode 100644 sysdeps/i386/i686/multiarch/Makefile create mode 100644 sysdeps/i386/i686/multiarch/ifunc-defines.sym create mode 100644 sysdeps/i386/i686/multiarch/init-arch.c create mode 100644 sysdeps/i386/i686/multiarch/init-arch.h create mode 100644 sysdeps/i386/i686/multiarch/sched_cpucount.c create mode 100644 sysdeps/unix/sysv/linux/i386/i686/sysconf.c diff --git a/ChangeLog b/ChangeLog index 2ff87e5335..8759b2c08d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,33 @@ +2009-07-31 H.J. Lu + + * sysdeps/i386/i686/Makefile (sysdep_routines): Add cacheinfo. + * sysdeps/i386/i686/cacheinfo.c: New file. + * sysdeps/i386/i686/multiarch/Makefile: New file. + * sysdeps/i386/i686/multiarch/ifunc-defines.sym: New file. + * sysdeps/i386/i686/multiarch/init-arch.c: New file. + * sysdeps/i386/i686/multiarch/init-arch.h: New file. + * sysdeps/i386/i686/multiarch/sched_cpucount.c: New file. + * sysdeps/unix/sysv/linux/i386/i686/sysconf.c: New file. + * sysdeps/x86_64/cacheinfo.c: Include . + (__cpuid_count): New. Provide the default. + (__x86_64_prefetchw): Define only if DISABLE_PREFETCHW is not defined. + (__x86_64_preferred_memory_instruction): Define only if + DISABLE_PREFERRED_MEMORY_INSTRUCTION is not defined. + (intel_check_word): Use __cpuid. + (handle_intel): Likewise. + (handle_amd): Likewise. + (__cache_sysconf): Likewise. + (init_cacheinfo): Updated. Use __cpuid and __cpuid_count. + (__cache_sysconf): Likewise. + (init_cacheinfo): Updated. Use __cpuid and __cpuid_count. + * sysdeps/x86_64/multiarch/init-arch.c: Include . + (get_common_indeces): Use __cpuid. + (__init_cpu_features): Likewise. Disable SSSE3 on Atom only + if ENABLE_SSSE3_ON_ATOM is not defined. + * sysdeps/x86_64/multiarch/init-arch.h (HAS_SSE2): Define. + * sysdeps/x86_64/multiarch/sched_cpucount.c (POPCNT): Use + popcnt instead of popcntq. + 2009-07-31 Jakub Jelinek * malloc/Makefile (CFLAGS-obstack.c): Add $(uses-callbacks). diff --git a/sysdeps/i386/i686/Makefile b/sysdeps/i386/i686/Makefile index c7378ab21e..dbcf1c33d3 100644 --- a/sysdeps/i386/i686/Makefile +++ b/sysdeps/i386/i686/Makefile @@ -5,3 +5,7 @@ endif # So that we can test __m128's alignment stack-align-test-flags += -msse + +ifeq ($(subdir),string) +sysdep_routines += cacheinfo +endif diff --git a/sysdeps/i386/i686/cacheinfo.c b/sysdeps/i386/i686/cacheinfo.c new file mode 100644 index 0000000000..82e4cd223e --- /dev/null +++ b/sysdeps/i386/i686/cacheinfo.c @@ -0,0 +1,8 @@ +#define __x86_64_data_cache_size_half __x86_data_cache_size_half +#define __x86_64_shared_cache_size __x86_shared_cache_size +#define __x86_64_shared_cache_size_half __x86_shared_cache_size_half + +#define DISABLE_PREFETCHW +#define DISABLE_PREFERRED_MEMORY_INSTRUCTION + +#include diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile new file mode 100644 index 0000000000..33d98c36e6 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/Makefile @@ -0,0 +1,4 @@ +ifeq ($(subdir),csu) +aux += init-arch +gen-as-const-headers += ifunc-defines.sym +endif diff --git a/sysdeps/i386/i686/multiarch/ifunc-defines.sym b/sysdeps/i386/i686/multiarch/ifunc-defines.sym new file mode 100644 index 0000000000..e2021cdf87 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/ifunc-defines.sym @@ -0,0 +1,17 @@ +#include "init-arch.h" +#include + +-- + +CPU_FEATURES_SIZE sizeof (struct cpu_features) +KIND_OFFSET offsetof (struct cpu_features, kind) +CPUID_OFFSET offsetof (struct cpu_features, cpuid) +CPUID_SIZE sizeof (struct cpuid_registers) +CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax) +CPUID_EBX_OFFSET offsetof (struct cpuid_registers, ebx) +CPUID_ECX_OFFSET offsetof (struct cpuid_registers, ecx) +CPUID_EDX_OFFSET offsetof (struct cpuid_registers, edx) +FAMILY_OFFSET offsetof (struct cpu_features, family) +MODEL_OFFSET offsetof (struct cpu_features, model) + +COMMON_CPUID_INDEX_1 diff --git a/sysdeps/i386/i686/multiarch/init-arch.c b/sysdeps/i386/i686/multiarch/init-arch.c new file mode 100644 index 0000000000..b371bae1dc --- /dev/null +++ b/sysdeps/i386/i686/multiarch/init-arch.c @@ -0,0 +1,3 @@ +#define ENABLE_SSSE3_ON_ATOM + +#include diff --git a/sysdeps/i386/i686/multiarch/init-arch.h b/sysdeps/i386/i686/multiarch/init-arch.h new file mode 100644 index 0000000000..cd2d0befee --- /dev/null +++ b/sysdeps/i386/i686/multiarch/init-arch.h @@ -0,0 +1 @@ +#include diff --git a/sysdeps/i386/i686/multiarch/sched_cpucount.c b/sysdeps/i386/i686/multiarch/sched_cpucount.c new file mode 100644 index 0000000000..7db31b02f8 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/sched_cpucount.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/unix/sysv/linux/i386/i686/sysconf.c b/sysdeps/unix/sysv/linux/i386/i686/sysconf.c new file mode 100644 index 0000000000..cf79750de3 --- /dev/null +++ b/sysdeps/unix/sysv/linux/i386/i686/sysconf.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c index 75b81958dd..f252fc2c6c 100644 --- a/sysdeps/x86_64/cacheinfo.c +++ b/sysdeps/x86_64/cacheinfo.c @@ -22,6 +22,26 @@ #include #include #include +#include + +#ifndef __cpuid_count +/* FIXME: Provide __cpuid_count if it isn't defined. Copied from gcc + 4.4.0. Remove this if gcc 4.4 is the minimum requirement. */ +# if defined(__i386__) && defined(__PIC__) +/* %ebx may be the PIC register. */ +# define __cpuid_count(level, count, a, b, c, d) \ + __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchg{l}\t{%%}ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) +# else +# define __cpuid_count(level, count, a, b, c, d) \ + __asm__ ("cpuid\n\t" \ + : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) +# endif +#endif #ifdef USE_MULTIARCH # include "multiarch/init-arch.h" @@ -176,9 +196,7 @@ intel_check_word (int name, unsigned int value, bool *has_level_2, unsigned int ebx; unsigned int ecx; unsigned int edx; - asm volatile ("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "0" (1)); + __cpuid (1, eax, ebx, ecx, edx); family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf); model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf); @@ -250,9 +268,7 @@ handle_intel (int name, unsigned int maxidx) unsigned int ebx; unsigned int ecx; unsigned int edx; - asm volatile ("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "0" (2)); + __cpuid (2, eax, ebx, ecx, edx); /* The low byte of EAX in the first round contain the number of rounds we have to make. At least one, the one we are already @@ -296,9 +312,7 @@ handle_amd (int name) unsigned int ebx; unsigned int ecx; unsigned int edx; - asm volatile ("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "0" (0x80000000)); + __cpuid (0x80000000, eax, ebx, ecx, edx); /* No level 4 cache (yet). */ if (name > _SC_LEVEL3_CACHE_LINESIZE) @@ -308,9 +322,7 @@ handle_amd (int name) if (eax < fn) return 0; - asm volatile ("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "0" (fn)); + __cpuid (fn, eax, ebx, ecx, edx); if (name < _SC_LEVEL1_DCACHE_SIZE) { @@ -424,9 +436,7 @@ __cache_sysconf (int name) unsigned int ebx; unsigned int ecx; unsigned int edx; - asm volatile ("cpuid" - : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "0" (0)); + __cpuid (0, max_cpuid, ebx, ecx, edx); #endif if (is_intel) @@ -449,9 +459,13 @@ long int __x86_64_data_cache_size_half attribute_hidden = 32 * 1024 / 2; L2 or L3 size. */ long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2; long int __x86_64_shared_cache_size attribute_hidden = 1024 * 1024; + +#ifndef DISABLE_PREFETCHW /* PREFETCHW support flag for use in memory and string routines. */ int __x86_64_prefetchw attribute_hidden; +#endif +#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION /* Instructions preferred for memory and string routines. 0: Regular instructions @@ -461,6 +475,7 @@ int __x86_64_prefetchw attribute_hidden; */ int __x86_64_preferred_memory_instruction attribute_hidden; +#endif static void @@ -483,9 +498,7 @@ init_cacheinfo (void) __init_cpu_features (); #else int max_cpuid; - asm volatile ("cpuid" - : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "0" (0)); + __cpuid (0, max_cpuid, ebx, ecx, edx); #endif if (is_intel) @@ -509,17 +522,17 @@ init_cacheinfo (void) ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx; #else - asm volatile ("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "0" (1)); + __cpuid (1, eax, ebx, ecx, edx); #endif +#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION /* Intel prefers SSSE3 instructions for memory/string routines if they are avaiable. */ if ((ecx & 0x200)) __x86_64_preferred_memory_instruction = 3; else __x86_64_preferred_memory_instruction = 2; +#endif /* Figure out the number of logical threads that share the highest cache level. */ @@ -530,9 +543,7 @@ init_cacheinfo (void) /* Query until desired cache level is enumerated. */ do { - asm volatile ("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "0" (4), "2" (i++)); + __cpuid_count (4, i++, eax, ebx, ecx, edx); /* There seems to be a bug in at least some Pentium Ds which sometimes fail to iterate all cache parameters. @@ -566,9 +577,7 @@ init_cacheinfo (void) shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); /* Get maximum extended function. */ - asm volatile ("cpuid" - : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "0" (0x80000000)); + __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx); if (shared <= 0) /* No shared L3 cache. All we have is the L2 cache. */ @@ -579,10 +588,7 @@ init_cacheinfo (void) if (max_cpuid_ex >= 0x80000008) { /* Get width of APIC ID. */ - asm volatile ("cpuid" - : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), - "=d" (edx) - : "0" (0x80000008)); + __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx); threads = 1 << ((ecx >> 12) & 0x0f); } @@ -590,10 +596,7 @@ init_cacheinfo (void) { /* If APIC ID width is not available, use logical processor count. */ - asm volatile ("cpuid" - : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), - "=d" (edx) - : "0" (0x00000001)); + __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx); if ((edx & (1 << 28)) != 0) threads = (ebx >> 16) & 0xff; @@ -608,15 +611,15 @@ init_cacheinfo (void) shared += core; } +#ifndef DISABLE_PREFETCHW if (max_cpuid_ex >= 0x80000001) { - asm volatile ("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "0" (0x80000001)); + __cpuid (0x80000001, eax, ebx, ecx, edx); /* PREFETCHW || 3DNow! */ if ((ecx & 0x100) || (edx & 0x80000000)) __x86_64_prefetchw = -1; } +#endif } if (data > 0) diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c index 49b421eac8..c152ab29eb 100644 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ b/sysdeps/x86_64/multiarch/init-arch.c @@ -18,6 +18,7 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ +#include #include "init-arch.h" @@ -27,12 +28,10 @@ struct cpu_features __cpu_features attribute_hidden; static void get_common_indeces (void) { - asm volatile ("cpuid" - : "=a" (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax), - "=b" (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx), - "=c" (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx), - "=d" (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx) - : "0" (1)); + __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax, + __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx, + __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx, + __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx); unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; __cpu_features.family = (eax >> 8) & 0x0f; @@ -47,10 +46,7 @@ __init_cpu_features (void) unsigned int ecx; unsigned int edx; - asm volatile ("cpuid" - : "=a" (__cpu_features.max_cpuid), "=b" (ebx), "=c" (ecx), - "=d" (edx) - : "0" (0)); + __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx); /* This spells out "GenuineIntel". */ if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) @@ -71,9 +67,11 @@ __init_cpu_features (void) { __cpu_features.model += extended_model; +#ifndef ENABLE_SSSE3_ON_ATOM if (__cpu_features.model == 0x1c) /* Avoid SSSE3 on Atom since it is slow. */ __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~(1 << 9); +#endif } } /* This spells out "AuthenticAMD". */ diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h index 0151e8b95b..8d9b1e8d8c 100644 --- a/sysdeps/x86_64/multiarch/init-arch.h +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -61,6 +61,9 @@ extern const struct cpu_features *__get_cpu_features (void) /* Following are the feature tests used throughout libc. */ #ifndef NOT_IN_libc +# define HAS_SSE2 \ + ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx & (1 << 26)) != 0) + # define HAS_POPCOUNT \ ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 23)) != 0) @@ -70,6 +73,9 @@ extern const struct cpu_features *__get_cpu_features (void) # define HAS_FMA \ ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 12)) != 0) #else +# define HAS_SSE2 \ + ((__get_cpu_features ()->cpuid[COMMON_CPUID_INDEX_1].edx & (1 << 26)) != 0) + # define HAS_POPCOUNT \ ((__get_cpu_features ()->cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 23)) != 0) diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c index b6f425e948..fde6dcca60 100644 --- a/sysdeps/x86_64/multiarch/sched_cpucount.c +++ b/sysdeps/x86_64/multiarch/sched_cpucount.c @@ -27,7 +27,7 @@ #define POPCNT(l) \ ({ __cpu_mask r; \ - asm ("popcntq %1, %0" : "=r" (r) : "0" (l));\ + asm ("popcnt %1, %0" : "=r" (r) : "0" (l));\ r; }) #define __sched_cpucount static popcount_cpucount #include -- 2.11.4.GIT