Replace FSF snail mail address with URLs.
[glibc.git] / sysdeps / powerpc / powerpc32 / power7 / strncmp.S
blob0eba40333a0a2fafe4e1c58f394eefc3f91e20a4
1 /* Optimized strcmp implementation for POWER7/PowerPC32.
2    Copyright (C) 2010, 2011 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <http://www.gnu.org/licenses/>.  */
19 #include <sysdep.h>
20 #include <bp-sym.h>
21 #include <bp-asm.h>
23 /* See strlen.s for comments on how the end-of-string testing works.  */
25 /* int [r3] strncmp (const char *s1 [r3],
26                      const char *s2 [r4],
27                      size_t size [r5])  */
29 EALIGN (BP_SYM(strncmp),5,0)
31 #define rTMP    r0
32 #define rRTN    r3
33 #define rSTR1   r3      /* first string arg */
34 #define rSTR2   r4      /* second string arg */
35 #define rN      r5      /* max string length */
36 /* Note:  The Bounded pointer support in this code is broken.  This code
37    was inherited from PPC32 and that support was never completed.
38    Current PPC gcc does not support -fbounds-check or -fbounded-pointers.  */
39 #define rWORD1  r6      /* current word in s1 */
40 #define rWORD2  r7      /* current word in s2 */
41 #define rWORD3  r10
42 #define rWORD4  r11
43 #define rFEFE   r8      /* constant 0xfefefeff (-0x01010101) */
44 #define r7F7F   r9      /* constant 0x7f7f7f7f */
45 #define rNEG    r10     /* ~(word in s1 | 0x7f7f7f7f) */
46 #define rBITDIF r11     /* bits that differ in s1 & s2 words */
48         dcbt    0,rSTR1
49         nop
50         or      rTMP,rSTR2,rSTR1
51         lis     r7F7F,0x7f7f
52         dcbt    0,rSTR2
53         nop
54         clrlwi. rTMP,rTMP,30
55         cmplwi  cr1,rN,0
56         lis     rFEFE,-0x101
57         bne     L(unaligned)
58 /* We are word alligned so set up for two loops.  first a word
59    loop, then fall into the byte loop if any residual.  */
60         srwi.   rTMP,rN,2
61         clrlwi  rN,rN,30
62         addi    rFEFE,rFEFE,-0x101
63         addi    r7F7F,r7F7F,0x7f7f
64         cmplwi  cr1,rN,0
65         beq     L(unaligned)
67         mtctr   rTMP
68         lwz     rWORD1,0(rSTR1)
69         lwz     rWORD2,0(rSTR2)
70         b       L(g1)
72 L(g0):
73         lwzu    rWORD1,4(rSTR1)
74         bne     cr1,L(different)
75         lwzu    rWORD2,4(rSTR2)
76 L(g1):  add     rTMP,rFEFE,rWORD1
77         nor     rNEG,r7F7F,rWORD1
78         bdz     L(tail)
79         and.    rTMP,rTMP,rNEG
80         cmpw    cr1,rWORD1,rWORD2
81         beq     L(g0)
83 /* OK. We've hit the end of the string. We need to be careful that
84    we don't compare two strings as different because of gunk beyond
85    the end of the strings...  */
87 L(endstring):
88         and     rTMP,r7F7F,rWORD1
89         beq     cr1,L(equal)
90         add     rTMP,rTMP,r7F7F
91         xor.    rBITDIF,rWORD1,rWORD2
93         andc    rNEG,rNEG,rTMP
94         blt     L(highbit)
95         cntlzw  rBITDIF,rBITDIF
96         cntlzw  rNEG,rNEG
97         addi    rNEG,rNEG,7
98         cmpw    cr1,rNEG,rBITDIF
99         sub     rRTN,rWORD1,rWORD2
100         blt     cr1,L(equal)
101         srawi   rRTN,rRTN,31
102         ori     rRTN,rRTN,1
103         blr
104 L(equal):
105         li      rRTN,0
106         blr
108 L(different):
109         lwzu    rWORD1,-4(rSTR1)
110         xor.    rBITDIF,rWORD1,rWORD2
111         sub     rRTN,rWORD1,rWORD2
112         blt     L(highbit)
113         srawi   rRTN,rRTN,31
114         ori     rRTN,rRTN,1
115         blr
116 L(highbit):
117         srwi    rWORD2,rWORD2,24
118         srwi    rWORD1,rWORD1,24
119         sub     rRTN,rWORD1,rWORD2
120         blr
123 /* Oh well. In this case, we just do a byte-by-byte comparison.  */
124         .align  4
125 L(tail):
126         and.    rTMP,rTMP,rNEG
127         cmpw    cr1,rWORD1,rWORD2
128         bne     L(endstring)
129         addi    rSTR1,rSTR1,4
130         bne     cr1,L(different)
131         addi    rSTR2,rSTR2,4
132         cmplwi  cr1,rN,0
133 L(unaligned):
134         mtctr   rN
135         ble     cr1,L(ux)
136 L(uz):
137         lbz     rWORD1,0(rSTR1)
138         lbz     rWORD2,0(rSTR2)
139         .align  4
140 L(u1):
141         cmpwi   cr1,rWORD1,0
142         bdz     L(u4)
143         cmpw    rWORD1,rWORD2
144         beq     cr1,L(u4)
145         bne     L(u4)
146         lbzu    rWORD3,1(rSTR1)
147         lbzu    rWORD4,1(rSTR2)
148         cmpwi   cr1,rWORD3,0
149         bdz     L(u3)
150         cmpw    rWORD3,rWORD4
151         beq     cr1,L(u3)
152         bne     L(u3)
153         lbzu    rWORD1,1(rSTR1)
154         lbzu    rWORD2,1(rSTR2)
155         cmpwi   cr1,rWORD1,0
156         bdz     L(u4)
157         cmpw    rWORD1,rWORD2
158         beq     cr1,L(u4)
159         bne     L(u4)
160         lbzu    rWORD3,1(rSTR1)
161         lbzu    rWORD4,1(rSTR2)
162         cmpwi   cr1,rWORD3,0
163         bdz     L(u3)
164         cmpw    rWORD3,rWORD4
165         beq     cr1,L(u3)
166         bne     L(u3)
167         lbzu    rWORD1,1(rSTR1)
168         lbzu    rWORD2,1(rSTR2)
169         b       L(u1)
171 L(u3):  sub     rRTN,rWORD3,rWORD4
172         blr
173 L(u4):  sub     rRTN,rWORD1,rWORD2
174         blr
175 L(ux):
176         li      rRTN,0
177         blr
178 END (BP_SYM (strncmp))
179 libc_hidden_builtin_def (strncmp)