From 7c95b251e0f9c1cfff29a74c0a38bf8918104eff Mon Sep 17 00:00:00 2001 From: Alexandre Julliard Date: Wed, 26 Jan 2011 20:48:44 +0100 Subject: [PATCH] msvcrt: Add a non-optimized implementation for the SSE2 math functions. --- dlls/msvcr100/msvcr100.spec | 42 ++++---- dlls/msvcr80/msvcr80.spec | 42 ++++---- dlls/msvcr90/msvcr90.spec | 42 ++++---- dlls/msvcrt/math.c | 231 ++++++++++++++++++++++++++++++++++++++++++++ dlls/msvcrt/msvcrt.spec | 42 ++++---- 5 files changed, 315 insertions(+), 84 deletions(-) diff --git a/dlls/msvcr100/msvcr100.spec b/dlls/msvcr100/msvcr100.spec index 9b4cecda13c..a601f2382f7 100644 --- a/dlls/msvcr100/msvcr100.spec +++ b/dlls/msvcr100/msvcr100.spec @@ -413,27 +413,27 @@ @ stub __iswcsymf @ stub __lconv @ cdecl __lconv_init() msvcrt.__lconv_init -@ stub __libm_sse2_acos -@ stub __libm_sse2_acosf -@ stub __libm_sse2_asin -@ stub __libm_sse2_asinf -@ stub __libm_sse2_atan -@ stub __libm_sse2_atan2 -@ stub __libm_sse2_atanf -@ stub __libm_sse2_cos -@ stub __libm_sse2_cosf -@ stub __libm_sse2_exp -@ stub __libm_sse2_expf -@ stub __libm_sse2_log -@ stub __libm_sse2_log10 -@ stub __libm_sse2_log10f -@ stub __libm_sse2_logf -@ stub __libm_sse2_pow -@ stub __libm_sse2_powf -@ stub __libm_sse2_sin -@ stub __libm_sse2_sinf -@ stub __libm_sse2_tan -@ stub __libm_sse2_tanf +@ cdecl -arch=i386 __libm_sse2_acos() msvcrt.__libm_sse2_acos +@ cdecl -arch=i386 __libm_sse2_acosf() msvcrt.__libm_sse2_acosf +@ cdecl -arch=i386 __libm_sse2_asin() msvcrt.__libm_sse2_asin +@ cdecl -arch=i386 __libm_sse2_asinf() msvcrt.__libm_sse2_asinf +@ cdecl -arch=i386 __libm_sse2_atan() msvcrt.__libm_sse2_atan +@ cdecl -arch=i386 __libm_sse2_atan2() msvcrt.__libm_sse2_atan2 +@ cdecl -arch=i386 __libm_sse2_atanf() msvcrt.__libm_sse2_atanf +@ cdecl -arch=i386 __libm_sse2_cos() msvcrt.__libm_sse2_cos +@ cdecl -arch=i386 __libm_sse2_cosf() msvcrt.__libm_sse2_cosf +@ cdecl -arch=i386 __libm_sse2_exp() msvcrt.__libm_sse2_exp +@ cdecl -arch=i386 __libm_sse2_expf() msvcrt.__libm_sse2_expf +@ cdecl -arch=i386 __libm_sse2_log() msvcrt.__libm_sse2_log +@ cdecl -arch=i386 __libm_sse2_log10() msvcrt.__libm_sse2_log10 +@ cdecl -arch=i386 __libm_sse2_log10f() msvcrt.__libm_sse2_log10f +@ cdecl -arch=i386 __libm_sse2_logf() msvcrt.__libm_sse2_logf +@ cdecl -arch=i386 __libm_sse2_pow() msvcrt.__libm_sse2_pow +@ cdecl -arch=i386 __libm_sse2_powf() msvcrt.__libm_sse2_powf +@ cdecl -arch=i386 __libm_sse2_sin() msvcrt.__libm_sse2_sin +@ cdecl -arch=i386 __libm_sse2_sinf() msvcrt.__libm_sse2_sinf +@ cdecl -arch=i386 __libm_sse2_tan() msvcrt.__libm_sse2_tan +@ cdecl -arch=i386 __libm_sse2_tanf() msvcrt.__libm_sse2_tanf @ extern __mb_cur_max msvcrt.__mb_cur_max @ cdecl -arch=i386 __p___argc() msvcrt.__p___argc @ cdecl -arch=i386 __p___argv() msvcrt.__p___argv diff --git a/dlls/msvcr80/msvcr80.spec b/dlls/msvcr80/msvcr80.spec index 4eaed927f74..b4787ad5e72 100644 --- a/dlls/msvcr80/msvcr80.spec +++ b/dlls/msvcr80/msvcr80.spec @@ -231,27 +231,27 @@ @ extern __lc_handle msvcrt.__lc_handle @ stub __lconv @ cdecl __lconv_init() msvcrt.__lconv_init -@ stub __libm_sse2_acos -@ stub __libm_sse2_acosf -@ stub __libm_sse2_asin -@ stub __libm_sse2_asinf -@ stub __libm_sse2_atan -@ stub __libm_sse2_atan2 -@ stub __libm_sse2_atanf -@ stub __libm_sse2_cos -@ stub __libm_sse2_cosf -@ stub __libm_sse2_exp -@ stub __libm_sse2_expf -@ stub __libm_sse2_log -@ stub __libm_sse2_log10 -@ stub __libm_sse2_log10f -@ stub __libm_sse2_logf -@ stub __libm_sse2_pow -@ stub __libm_sse2_powf -@ stub __libm_sse2_sin -@ stub __libm_sse2_sinf -@ stub __libm_sse2_tan -@ stub __libm_sse2_tanf +@ cdecl -arch=i386 __libm_sse2_acos() msvcrt.__libm_sse2_acos +@ cdecl -arch=i386 __libm_sse2_acosf() msvcrt.__libm_sse2_acosf +@ cdecl -arch=i386 __libm_sse2_asin() msvcrt.__libm_sse2_asin +@ cdecl -arch=i386 __libm_sse2_asinf() msvcrt.__libm_sse2_asinf +@ cdecl -arch=i386 __libm_sse2_atan() msvcrt.__libm_sse2_atan +@ cdecl -arch=i386 __libm_sse2_atan2() msvcrt.__libm_sse2_atan2 +@ cdecl -arch=i386 __libm_sse2_atanf() msvcrt.__libm_sse2_atanf +@ cdecl -arch=i386 __libm_sse2_cos() msvcrt.__libm_sse2_cos +@ cdecl -arch=i386 __libm_sse2_cosf() msvcrt.__libm_sse2_cosf +@ cdecl -arch=i386 __libm_sse2_exp() msvcrt.__libm_sse2_exp +@ cdecl -arch=i386 __libm_sse2_expf() msvcrt.__libm_sse2_expf +@ cdecl -arch=i386 __libm_sse2_log() msvcrt.__libm_sse2_log +@ cdecl -arch=i386 __libm_sse2_log10() msvcrt.__libm_sse2_log10 +@ cdecl -arch=i386 __libm_sse2_log10f() msvcrt.__libm_sse2_log10f +@ cdecl -arch=i386 __libm_sse2_logf() msvcrt.__libm_sse2_logf +@ cdecl -arch=i386 __libm_sse2_pow() msvcrt.__libm_sse2_pow +@ cdecl -arch=i386 __libm_sse2_powf() msvcrt.__libm_sse2_powf +@ cdecl -arch=i386 __libm_sse2_sin() msvcrt.__libm_sse2_sin +@ cdecl -arch=i386 __libm_sse2_sinf() msvcrt.__libm_sse2_sinf +@ cdecl -arch=i386 __libm_sse2_tan() msvcrt.__libm_sse2_tan +@ cdecl -arch=i386 __libm_sse2_tanf() msvcrt.__libm_sse2_tanf @ extern __mb_cur_max msvcrt.__mb_cur_max @ cdecl -arch=i386 __p___argc() msvcrt.__p___argc @ cdecl -arch=i386 __p___argv() msvcrt.__p___argv diff --git a/dlls/msvcr90/msvcr90.spec b/dlls/msvcr90/msvcr90.spec index a2b8359e839..809265b0554 100644 --- a/dlls/msvcr90/msvcr90.spec +++ b/dlls/msvcr90/msvcr90.spec @@ -228,27 +228,27 @@ @ extern __lc_handle msvcrt.__lc_handle @ stub __lconv @ cdecl __lconv_init() msvcrt.__lconv_init -@ stub __libm_sse2_acos -@ stub __libm_sse2_acosf -@ stub __libm_sse2_asin -@ stub __libm_sse2_asinf -@ stub __libm_sse2_atan -@ stub __libm_sse2_atan2 -@ stub __libm_sse2_atanf -@ stub __libm_sse2_cos -@ stub __libm_sse2_cosf -@ stub __libm_sse2_exp -@ stub __libm_sse2_expf -@ stub __libm_sse2_log -@ stub __libm_sse2_log10 -@ stub __libm_sse2_log10f -@ stub __libm_sse2_logf -@ stub __libm_sse2_pow -@ stub __libm_sse2_powf -@ stub __libm_sse2_sin -@ stub __libm_sse2_sinf -@ stub __libm_sse2_tan -@ stub __libm_sse2_tanf +@ cdecl -arch=i386 __libm_sse2_acos() msvcrt.__libm_sse2_acos +@ cdecl -arch=i386 __libm_sse2_acosf() msvcrt.__libm_sse2_acosf +@ cdecl -arch=i386 __libm_sse2_asin() msvcrt.__libm_sse2_asin +@ cdecl -arch=i386 __libm_sse2_asinf() msvcrt.__libm_sse2_asinf +@ cdecl -arch=i386 __libm_sse2_atan() msvcrt.__libm_sse2_atan +@ cdecl -arch=i386 __libm_sse2_atan2() msvcrt.__libm_sse2_atan2 +@ cdecl -arch=i386 __libm_sse2_atanf() msvcrt.__libm_sse2_atanf +@ cdecl -arch=i386 __libm_sse2_cos() msvcrt.__libm_sse2_cos +@ cdecl -arch=i386 __libm_sse2_cosf() msvcrt.__libm_sse2_cosf +@ cdecl -arch=i386 __libm_sse2_exp() msvcrt.__libm_sse2_exp +@ cdecl -arch=i386 __libm_sse2_expf() msvcrt.__libm_sse2_expf +@ cdecl -arch=i386 __libm_sse2_log() msvcrt.__libm_sse2_log +@ cdecl -arch=i386 __libm_sse2_log10() msvcrt.__libm_sse2_log10 +@ cdecl -arch=i386 __libm_sse2_log10f() msvcrt.__libm_sse2_log10f +@ cdecl -arch=i386 __libm_sse2_logf() msvcrt.__libm_sse2_logf +@ cdecl -arch=i386 __libm_sse2_pow() msvcrt.__libm_sse2_pow +@ cdecl -arch=i386 __libm_sse2_powf() msvcrt.__libm_sse2_powf +@ cdecl -arch=i386 __libm_sse2_sin() msvcrt.__libm_sse2_sin +@ cdecl -arch=i386 __libm_sse2_sinf() msvcrt.__libm_sse2_sinf +@ cdecl -arch=i386 __libm_sse2_tan() msvcrt.__libm_sse2_tan +@ cdecl -arch=i386 __libm_sse2_tanf() msvcrt.__libm_sse2_tanf @ extern __mb_cur_max msvcrt.__mb_cur_max @ cdecl -arch=i386 __p___argc() msvcrt.__p___argc @ cdecl -arch=i386 __p___argv() msvcrt.__p___argv diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index eac3f087685..c0fc11cbbbc 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -1764,4 +1764,235 @@ void _safe_fprem1(void) TRACE("(): stub\n"); } +/*********************************************************************** + * __libm_sse2_acos (MSVCRT.@) + */ +void __cdecl __libm_sse2_acos(void) +{ + double d; + __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) ); + d = acos( d ); + __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) ); +} + +/*********************************************************************** + * __libm_sse2_acosf (MSVCRT.@) + */ +void __cdecl __libm_sse2_acosf(void) +{ + float f; + __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) ); + f = acosf( f ); + __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) ); +} + +/*********************************************************************** + * __libm_sse2_asin (MSVCRT.@) + */ +void __cdecl __libm_sse2_asin(void) +{ + double d; + __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) ); + d = asin( d ); + __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) ); +} + +/*********************************************************************** + * __libm_sse2_asinf (MSVCRT.@) + */ +void __cdecl __libm_sse2_asinf(void) +{ + float f; + __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) ); + f = asinf( f ); + __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) ); +} + +/*********************************************************************** + * __libm_sse2_atan (MSVCRT.@) + */ +void __cdecl __libm_sse2_atan(void) +{ + double d; + __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) ); + d = atan( d ); + __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) ); +} + +/*********************************************************************** + * __libm_sse2_atan2 (MSVCRT.@) + */ +void __cdecl __libm_sse2_atan2(void) +{ + double d1, d2; + __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) ); + d1 = atan2( d1, d2 ); + __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) ); +} + +/*********************************************************************** + * __libm_sse2_atanf (MSVCRT.@) + */ +void __cdecl __libm_sse2_atanf(void) +{ + float f; + __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) ); + f = atanf( f ); + __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) ); +} + +/*********************************************************************** + * __libm_sse2_cos (MSVCRT.@) + */ +void __cdecl __libm_sse2_cos(void) +{ + double d; + __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) ); + d = cos( d ); + __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) ); +} + +/*********************************************************************** + * __libm_sse2_cosf (MSVCRT.@) + */ +void __cdecl __libm_sse2_cosf(void) +{ + float f; + __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) ); + f = cosf( f ); + __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) ); +} + +/*********************************************************************** + * __libm_sse2_exp (MSVCRT.@) + */ +void __cdecl __libm_sse2_exp(void) +{ + double d; + __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) ); + d = exp( d ); + __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) ); +} + +/*********************************************************************** + * __libm_sse2_expf (MSVCRT.@) + */ +void __cdecl __libm_sse2_expf(void) +{ + float f; + __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) ); + f = expf( f ); + __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) ); +} + +/*********************************************************************** + * __libm_sse2_log (MSVCRT.@) + */ +void __cdecl __libm_sse2_log(void) +{ + double d; + __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) ); + d = log( d ); + __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) ); +} + +/*********************************************************************** + * __libm_sse2_log10 (MSVCRT.@) + */ +void __cdecl __libm_sse2_log10(void) +{ + double d; + __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) ); + d = log10( d ); + __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) ); +} + +/*********************************************************************** + * __libm_sse2_log10f (MSVCRT.@) + */ +void __cdecl __libm_sse2_log10f(void) +{ + float f; + __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) ); + f = log10f( f ); + __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) ); +} + +/*********************************************************************** + * __libm_sse2_logf (MSVCRT.@) + */ +void __cdecl __libm_sse2_logf(void) +{ + float f; + __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) ); + f = logf( f ); + __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) ); +} + +/*********************************************************************** + * __libm_sse2_pow (MSVCRT.@) + */ +void __cdecl __libm_sse2_pow(void) +{ + double d1, d2; + __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) ); + d1 = pow( d1, d2 ); + __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) ); +} + +/*********************************************************************** + * __libm_sse2_powf (MSVCRT.@) + */ +void __cdecl __libm_sse2_powf(void) +{ + float f1, f2; + __asm__ __volatile__( "movd %%xmm0,%0; movd %%xmm1,%1" : "=g" (f1), "=g" (f2) ); + f1 = powf( f1, f2 ); + __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f1) ); +} + +/*********************************************************************** + * __libm_sse2_sin (MSVCRT.@) + */ +void __cdecl __libm_sse2_sin(void) +{ + double d; + __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) ); + d = sin( d ); + __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) ); +} + +/*********************************************************************** + * __libm_sse2_sinf (MSVCRT.@) + */ +void __cdecl __libm_sse2_sinf(void) +{ + float f; + __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) ); + f = sinf( f ); + __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) ); +} + +/*********************************************************************** + * __libm_sse2_tan (MSVCRT.@) + */ +void __cdecl __libm_sse2_tan(void) +{ + double d; + __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) ); + d = tan( d ); + __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) ); +} + +/*********************************************************************** + * __libm_sse2_tanf (MSVCRT.@) + */ +void __cdecl __libm_sse2_tanf(void) +{ + float f; + __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) ); + f = tanf( f ); + __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) ); +} + #endif /* __i386__ */ diff --git a/dlls/msvcrt/msvcrt.spec b/dlls/msvcrt/msvcrt.spec index 7fd3dfab8db..5582e6344cf 100644 --- a/dlls/msvcrt/msvcrt.spec +++ b/dlls/msvcrt/msvcrt.spec @@ -196,27 +196,27 @@ @ extern __lc_collate_cp MSVCRT___lc_collate_cp @ extern __lc_handle MSVCRT___lc_handle @ cdecl __lconv_init() -# stub __libm_sse2_acos -# stub __libm_sse2_acosf -# stub __libm_sse2_asin -# stub __libm_sse2_asinf -# stub __libm_sse2_atan -# stub __libm_sse2_atan2 -# stub __libm_sse2_atanf -# stub __libm_sse2_cos -# stub __libm_sse2_cosf -# stub __libm_sse2_exp -# stub __libm_sse2_expf -# stub __libm_sse2_log -# stub __libm_sse2_log10 -# stub __libm_sse2_log10f -# stub __libm_sse2_logf -# stub __libm_sse2_pow -# stub __libm_sse2_powf -# stub __libm_sse2_sin -# stub __libm_sse2_sinf -# stub __libm_sse2_tan -# stub __libm_sse2_tanf +@ cdecl -arch=i386 __libm_sse2_acos() +@ cdecl -arch=i386 __libm_sse2_acosf() +@ cdecl -arch=i386 __libm_sse2_asin() +@ cdecl -arch=i386 __libm_sse2_asinf() +@ cdecl -arch=i386 __libm_sse2_atan() +@ cdecl -arch=i386 __libm_sse2_atan2() +@ cdecl -arch=i386 __libm_sse2_atanf() +@ cdecl -arch=i386 __libm_sse2_cos() +@ cdecl -arch=i386 __libm_sse2_cosf() +@ cdecl -arch=i386 __libm_sse2_exp() +@ cdecl -arch=i386 __libm_sse2_expf() +@ cdecl -arch=i386 __libm_sse2_log() +@ cdecl -arch=i386 __libm_sse2_log10() +@ cdecl -arch=i386 __libm_sse2_log10f() +@ cdecl -arch=i386 __libm_sse2_logf() +@ cdecl -arch=i386 __libm_sse2_pow() +@ cdecl -arch=i386 __libm_sse2_powf() +@ cdecl -arch=i386 __libm_sse2_sin() +@ cdecl -arch=i386 __libm_sse2_sinf() +@ cdecl -arch=i386 __libm_sse2_tan() +@ cdecl -arch=i386 __libm_sse2_tanf() @ extern __mb_cur_max MSVCRT___mb_cur_max @ cdecl -arch=i386 __p___argc() @ cdecl -arch=i386 __p___argv() -- 2.11.4.GIT