1 /* Copyright (C) 2012-2017 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <http://www.gnu.org/licenses/>. */
26 #define REP8_01 0x0101010101010101
27 #define REP8_7f 0x7f7f7f7f7f7f7f7f
28 #define REP8_80 0x8080808080808080
30 /* Parameters and result. */
35 /* Internal variables. */
49 /* Start of performance-critical section -- one 64B cache line. */
50 ENTRY_ALIGN(strcmp, 6)
55 mov zeroones, #REP8_01
60 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
61 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
62 can be done in parallel across the entire word. */
67 sub tmp1, data1, zeroones
68 orr tmp2, data1, #REP8_7f
69 eor diff, data1, data2 /* Non-zero if differences found. */
70 bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
71 orr syndrome, diff, has_nul
72 cbz syndrome, L(loop_aligned)
73 /* End of performance-critical section -- one 64B cache line. */
76 rev syndrome, syndrome
78 /* The MS-non-zero bit of the syndrome marks either the first bit
79 that is different, or the top bit of the first zero byte.
80 Shifting left now will bring the critical information into the
86 /* But we need to zero-extend (char is unsigned) the value and then
87 perform a signed 32-bit subtraction. */
89 sub result, data1, data2, lsr #56
92 /* For big-endian we cannot use the trick with the syndrome value
93 as carry-propagation can corrupt the upper bits if the trailing
94 bytes in the string contain 0x01. */
95 /* However, if there is no NUL byte in the dword, we can generate
96 the result directly. We can't just subtract the bytes as the
97 MSB might be significant. */
101 cneg result, result, lo
104 /* Re-compute the NUL-byte detection, using a byte-reversed value. */
106 sub tmp1, tmp3, zeroones
107 orr tmp2, tmp3, #REP8_7f
108 bic has_nul, tmp1, tmp2
110 orr syndrome, diff, has_nul
112 /* The MS-non-zero bit of the syndrome marks either the first bit
113 that is different, or the top bit of the first zero byte.
114 Shifting left now will bring the critical information into the
116 lsl data1, data1, pos
117 lsl data2, data2, pos
118 /* But we need to zero-extend (char is unsigned) the value and then
119 perform a signed 32-bit subtraction. */
120 lsr data1, data1, #56
121 sub result, data1, data2, lsr #56
126 /* Sources are mutually aligned, but are not currently at an
127 alignment boundary. Round down the addresses and then mask off
128 the bytes that preceed the start point. */
131 lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
132 ldr data1, [src1], #8
133 neg tmp1, tmp1 /* Bits to alignment -64. */
134 ldr data2, [src2], #8
137 /* Big-endian. Early bytes are at MSB. */
138 lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
140 /* Little-endian. Early bytes are at LSB. */
141 lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
143 orr data1, data1, tmp2
144 orr data2, data2, tmp2
148 /* We can do better than this. */
149 ldrb data1w, [src1], #1
150 ldrb data2w, [src2], #1
152 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
154 sub result, data1, data2
157 libc_hidden_builtin_def (strcmp)