From 462a5227b0d3220ab68f65272bd5b9d6d4f49b1f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Mar 2010 06:42:53 -0800 Subject: [PATCH] sparc: Optimize strlen using techniques from powerpc implementation. --- ChangeLog | 5 + sysdeps/sparc/sparc32/sparcv9/strlen.S | 3 - sysdeps/sparc/sparc32/strlen.S | 182 ++++++++++------------- sysdeps/sparc/sparc64/strlen.S | 260 +++++++++++---------------------- 4 files changed, 167 insertions(+), 283 deletions(-) rewrite sysdeps/sparc/sparc32/strlen.S (67%) rewrite sysdeps/sparc/sparc64/strlen.S (83%) diff --git a/ChangeLog b/ChangeLog index afb549b93f..7e5e7bb5b2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,11 @@ * sysdeps/sparc/sparc32/memcpy.S: Fix build. + * sysdeps/sparc/sparc32/strlen.S: Optimize. + * sysdeps/sparc/sparc64/strlen.S: Likewise. + * sysdeps/sparc/sparc32/sparcv9/strlen.S (ASI_PNF, ASI_BLK_P, + XCC): Delete definitions, not needed. + 2010-03-07 Ulrich Drepper * sysdeps/unix/sysv/linux/internal_statvfs.c (__statvfs_getflags): diff --git a/sysdeps/sparc/sparc32/sparcv9/strlen.S b/sysdeps/sparc/sparc32/sparcv9/strlen.S index b8f4dba4f4..28a216c076 100644 --- a/sysdeps/sparc/sparc32/sparcv9/strlen.S +++ b/sysdeps/sparc/sparc32/sparcv9/strlen.S @@ -1,4 +1 @@ -#define ASI_PNF 0x82 -#define ASI_BLK_P 0xf0 -#define XCC icc #include diff --git a/sysdeps/sparc/sparc32/strlen.S b/sysdeps/sparc/sparc32/strlen.S dissimilarity index 67% index ed92f20e28..2945bb5484 100644 --- a/sysdeps/sparc/sparc32/strlen.S +++ b/sysdeps/sparc/sparc32/strlen.S @@ -1,106 +1,76 @@ -/* Determine the length of a string. - For SPARC v7. - Copyright (C) 1996, 1999, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Jakub Jelinek . - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include - - /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x01010101) & (~xword) & 0x80808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ - - .text - .align 4 - -ENTRY(strlen) - mov %o0, %o1 - andcc %o0, 3, %g0 - be 20f - sethi %hi(0x80808080), %o4 - - ldub [%o0], %o5 - cmp %o5, 0 - be 21f - add %o0, 1, %o0 - andcc %o0, 3, %g0 - be 4f - or %o4, %lo(0x80808080), %o3 - ldub [%o0], %o5 - cmp %o5, 0 - be 22f - add %o0, 1, %o0 - andcc %o0, 3, %g0 - be 5f - sethi %hi(0x01010101), %o4 - ldub [%o0], %o5 - cmp %o5, 0 - be 23f - add %o0, 1, %o0 - b 11f - or %o4, %lo(0x01010101), %o2 -21: retl - mov 0, %o0 -22: retl - mov 1, %o0 -23: retl - mov 2, %o0 - -20: or %o4, %lo(0x80808080), %o3 -4: sethi %hi(0x01010101), %o4 -5: or %o4, %lo(0x01010101), %o2 -11: ld [%o0], %o5 -12: sub %o5, %o2, %o4 -#ifdef EIGHTBIT_NOT_RARE - andn %o4, %o5, %o4 -#endif - andcc %o4, %o3, %g0 - be 11b - add %o0, 4, %o0 - - srl %o5, 24, %g5 - andcc %g5, 0xff, %g0 - be 13f - add %o0, -4, %o4 - srl %o5, 16, %g5 - andcc %g5, 0xff, %g0 - be 13f - add %o4, 1, %o4 - srl %o5, 8, %g5 - andcc %g5, 0xff, %g0 - be 13f - add %o4, 1, %o4 - andcc %o5, 0xff, %g0 - bne,a 12b - ld [%o0], %o5 - add %o4, 1, %o4 -13: retl - sub %o4, %o1, %o0 -END(strlen) -libc_hidden_builtin_def (strlen) +/* Determine the length of a string. + For SPARC v7. + Copyright (C) 1996, 1999, 2003, 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek and + David S. Miller . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + + .text + .align 4 + +ENTRY(strlen) + mov %o0, %o1 + andn %o0, 0x3, %o0 + + ld [%o0], %o5 + and %o1, 0x3, %g1 + mov -1, %g5 + + sethi %hi(0x01010101), %o2 + sll %g1, 3, %g1 + + or %o2, %lo(0x01010101), %o2 + srl %g5, %g1, %g2 + + orn %o5, %g2, %o5 + sll %o2, 7, %o3 +10: add %o0, 4, %o0 + + andn %o3, %o5, %g1 + sub %o5, %o2, %g2 + + andcc %g1, %g2, %g0 + be,a 10b + ld [%o0], %o5 + + srl %o5, 24, %g1 + + andcc %g1, 0xff, %g0 + be 90f + sub %o0, 4, %o0 + + srl %o5, 16, %g2 + + andcc %g2, 0xff, %g0 + be 90f + add %o0, 1, %o0 + + srl %o5, 8, %g1 + + andcc %g1, 0xff, %g0 + be 90f + add %o0, 1, %o0 + + add %o0, 1, %o0 + +90: retl + sub %o0, %o1, %o0 +END(strlen) +libc_hidden_builtin_def (strlen) diff --git a/sysdeps/sparc/sparc64/strlen.S b/sysdeps/sparc/sparc64/strlen.S dissimilarity index 83% index cc15e4e3fb..64350fb05e 100644 --- a/sysdeps/sparc/sparc64/strlen.S +++ b/sysdeps/sparc/sparc64/strlen.S @@ -1,174 +1,86 @@ -/* Determine the length of a string. For SPARC v9. - Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Jan Vondrak and - Jakub Jelinek . - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include -#include - - /* Normally, this uses - ((xword - 0x0101010101010101) & 0x8080808080808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ - - .text - .align 32 -ENTRY(strlen) - sethi %hi(0x01010101), %g1 /* IEU0 Group */ - ldub [%o0], %o3 /* Load */ - or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ - mov %o0, %o1 /* IEU1 */ - - sllx %g1, 32, %g4 /* IEU0 Group */ - andcc %o0, 7, %g0 /* IEU1 */ - or %g1, %g4, %g1 /* IEU0 Group */ - brz,pn %o3, 13f /* CTI+IEU1 */ - - sllx %g1, 7, %g4 /* IEU0 Group */ - bne,a,pn %icc, 15f /* CTI */ - add %o0, 1, %o0 /* IEU1 */ - /* %g1 = 0x0101010101010101 * - * %g4 = 0x8080808080808080 * - * %o0 = string pointer * - * %o1 = start of string */ -1: ldx [%o0], %o3 /* Load Group */ - - add %o0, 8, %o0 /* IEU1 */ -2: sub %o3, %g1, %o2 /* IEU0 Group */ -#ifdef EIGHTBIT_NOT_RARE - andn %o2, %o3, %o5 /* IEU0 Group */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o5, %g4, %g0 /* IEU1 Group */ -#else - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o2, %g4, %g0 /* IEU1 Group */ -#endif - - be,pt %xcc, 2b /* CTI */ - add %o0, 8, %o0 /* IEU0 */ - addcc %o2, %g1, %g5 /* IEU1 Group */ -#ifdef EIGHTBIT_NOT_RARE - srlx %o5, 32, %o5 /* IEU0 */ - -3: andcc %o5, %g4, %g0 /* IEU1 Group */ -#else - srlx %o2, 32, %o2 /* IEU0 */ - -3: andcc %o2, %g4, %g0 /* IEU1 Group */ -#endif - be,pn %xcc, 4f /* CTI */ - srlx %g5, 56, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 12f /* CTI */ - srlx %g5, 48, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 11f /* CTI */ - - srlx %g5, 40, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 10f /* CTI */ - srlx %g5, 32, %o2 /* IEU0 */ - - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 9f /* CTI */ -4: srlx %g5, 24, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 8f /* CTI */ - srlx %g5, 16, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 7f /* CTI */ - - srlx %g5, 8, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 6f /* CTI */ - sub %o3, %g1, %o2 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o2, %g4, %g0 /* IEU1 Group */ - - be,pt %xcc, 2b /* CTI */ - add %o0, 8, %o0 /* IEU0 */ - addcc %o2, %g1, %g5 /* IEU1 Group */ - ba,pt %xcc, 3b /* CTI */ - - srlx %o2, 32, %o2 /* IEU0 */ -5: add %o0, -9, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ - -6: add %o0, -10, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -7: add %o0, -11, %o0 /* IEU0 Group */ - - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -8: add %o0, -12, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - - sub %o0, %o1, %o0 /* IEU0 */ -9: add %o0, -13, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ - -10: add %o0, -14, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -11: add %o0, -15, %o0 /* IEU0 Group */ - - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -12: add %o0, -16, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - - sub %o0, %o1, %o0 /* IEU0 */ -13: retl /* CTI+IEU1 Group */ - mov 0, %o0 /* IEU0 */ - nop - -15: ldub [%o0], %o3 /* Load Group */ -16: andcc %o0, 7, %g0 /* IEU1 */ - be,pn %icc, 1b /* CTI */ - nop /* IEU0 Group */ - - add %o0, 1, %o0 /* IEU1 */ - andcc %o3, 0xff, %g0 /* IEU1 Group */ - bne,a,pt %icc, 16b /* CTI */ - lduba [%o0] ASI_PNF, %o3 /* Load */ - - add %o0, -1, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -END(strlen) -libc_hidden_builtin_def (strlen) +/* Determine the length of a string. For SPARC v9. + Copyright (C) 1998, 1999, 2003, 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jan Vondrak , + Jakub Jelinek , and + David S. Miller . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + + .register %g2, #scratch + .register %g3, #scratch + + .text + .align 32 +ENTRY(strlen) + mov %o0, %o1 + andn %o0, 0x7, %o0 + + ldx [%o0], %o5 + and %o1, 0x7, %g1 + mov -1, %g5 + + sethi %hi(0x01010101), %o2 + sll %g1, 3, %g1 + + or %o2, %lo(0x01010101), %o2 + srlx %g5, %g1, %o3 + + sllx %o2, 32, %g1 + sethi %hi(0x0000ff00), %g5 + + orn %o5, %o3, %o5 + or %o2, %g1, %o2 + + sllx %o2, 7, %o3 +10: add %o0, 8, %o0 + + andn %o3, %o5, %g1 + sub %o5, %o2, %g2 + + andcc %g1, %g2, %g0 + be,a,pt %xcc, 10b + ldx [%o0], %o5 + srlx %o5, 32, %g1 + + andn %o3, %g1, %o4 + sub %g1, %o2, %g2 + + add %o0, 4, %g3 + andcc %o4, %g2, %g0 + movne %icc, %g1, %o5 + + move %icc, %g3, %o0 + or %g5, %lo(0x0000ff00), %g5 + mov 3 - 8, %g2 + + andcc %o5, %g5, %g0 + srlx %o5, 16, %g1 + move %icc, 2 - 8, %g2 + + andcc %g1, 0xff, %g0 + srl %o5, 24, %o5 + move %icc, 1 - 8, %g2 + + movrz %o5, 0 - 8, %g2 + sub %o0, %o1, %o0 + + retl + add %o0, %g2, %o0 +END(strlen) +libc_hidden_builtin_def (strlen) -- 2.11.4.GIT