From 402bf0695218bbe290418b9486b1dd5fe284d903 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 19 May 2017 10:46:29 -0700 Subject: [PATCH] x86: Optimize SSE2 memchr overflow calculation SSE2 memchr computes "edx + ecx - 16" where ecx is less than 16. Use "edx - (16 - ecx)", instead of satured math, to avoid possible addition overflow. This replaces add %ecx, %edx sbb %eax, %eax or %eax, %edx sub $16, %edx with neg %ecx add $16, %ecx sub %ecx, %edx It is the same for x86_64, except for rcx/rdx, instead of ecx/edx. * sysdeps/i386/i686/multiarch/memchr-sse2.S (MEMCHR): Use "edx + ecx - 16" to avoid possible addition overflow. * sysdeps/x86_64/memchr.S (memchr): Likewise. --- ChangeLog | 6 ++++++ sysdeps/i386/i686/multiarch/memchr-sse2.S | 14 ++++++-------- sysdeps/x86_64/memchr.S | 14 ++++++-------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3c8d9f14d5..b9fbdb8351 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2017-05-19 H.J. Lu + + * sysdeps/i386/i686/multiarch/memchr-sse2.S (MEMCHR): Use + "edx + ecx - 16" to avoid possible addition overflow. + * sysdeps/x86_64/memchr.S (memchr): Likewise. + 2017-05-19 Adhemerval Zanella * misc/Makefile (CFLAGS-vmsplice.c): Remove rule. diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2.S b/sysdeps/i386/i686/multiarch/memchr-sse2.S index e41f324a77..172d70de13 100644 --- a/sysdeps/i386/i686/multiarch/memchr-sse2.S +++ b/sysdeps/i386/i686/multiarch/memchr-sse2.S @@ -117,14 +117,12 @@ L(crosscache): # ifndef USE_AS_RAWMEMCHR jnz L(match_case2_prolog1) - /* Calculate the last acceptable address and check for possible - addition overflow by using satured math: - edx = ecx + edx - edx |= -(edx < ecx) */ - add %ecx, %edx - sbb %eax, %eax - or %eax, %edx - sub $16, %edx + /* "ecx" is less than 16. Calculate "edx + ecx - 16" by using + "edx - (16 - ecx)" instead of "(edx + ecx) - 16" to void + possible addition overflow. */ + neg %ecx + add $16, %ecx + sub %ecx, %edx jbe L(return_null) lea 16(%edi), %edi # else diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S index a205a25998..f82e1c5bf7 100644 --- a/sysdeps/x86_64/memchr.S +++ b/sysdeps/x86_64/memchr.S @@ -76,14 +76,12 @@ L(crosscache): .p2align 4 L(unaligned_no_match): - /* Calculate the last acceptable address and check for possible - addition overflow by using satured math: - rdx = rcx + rdx - rdx |= -(rdx < rcx) */ - add %rcx, %rdx - sbb %rax, %rax - or %rax, %rdx - sub $16, %rdx + /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using + "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void + possible addition overflow. */ + neg %rcx + add $16, %rcx + sub %rcx, %rdx jbe L(return_null) add $16, %rdi sub $64, %rdx -- 2.11.4.GIT