1 /* Copyright (C) 2012-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <http://www.gnu.org/licenses/>. */
26 #define REP8_01 0x0101010101010101
27 #define REP8_7f 0x7f7f7f7f7f7f7f7f
28 #define REP8_80 0x8080808080808080
30 /* Parameters and result. */
35 /* Internal variables. */
49 /* Start of performance-critical section -- one 64B cache line. */
50 ENTRY_ALIGN(strcmp, 6)
53 mov zeroones, #REP8_01
58 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
59 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
60 can be done in parallel across the entire word. */
65 sub tmp1, data1, zeroones
66 orr tmp2, data1, #REP8_7f
67 eor diff, data1, data2 /* Non-zero if differences found. */
68 bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
69 orr syndrome, diff, has_nul
70 cbz syndrome, L(loop_aligned)
71 /* End of performance-critical section -- one 64B cache line. */
74 rev syndrome, syndrome
76 /* The MS-non-zero bit of the syndrome marks either the first bit
77 that is different, or the top bit of the first zero byte.
78 Shifting left now will bring the critical information into the
84 /* But we need to zero-extend (char is unsigned) the value and then
85 perform a signed 32-bit subtraction. */
87 sub result, data1, data2, lsr #56
90 /* For big-endian we cannot use the trick with the syndrome value
91 as carry-propagation can corrupt the upper bits if the trailing
92 bytes in the string contain 0x01. */
93 /* However, if there is no NUL byte in the dword, we can generate
94 the result directly. We can't just subtract the bytes as the
95 MSB might be significant. */
99 cneg result, result, lo
102 /* Re-compute the NUL-byte detection, using a byte-reversed value. */
104 sub tmp1, tmp3, zeroones
105 orr tmp2, tmp3, #REP8_7f
106 bic has_nul, tmp1, tmp2
108 orr syndrome, diff, has_nul
110 /* The MS-non-zero bit of the syndrome marks either the first bit
111 that is different, or the top bit of the first zero byte.
112 Shifting left now will bring the critical information into the
114 lsl data1, data1, pos
115 lsl data2, data2, pos
116 /* But we need to zero-extend (char is unsigned) the value and then
117 perform a signed 32-bit subtraction. */
118 lsr data1, data1, #56
119 sub result, data1, data2, lsr #56
124 /* Sources are mutually aligned, but are not currently at an
125 alignment boundary. Round down the addresses and then mask off
126 the bytes that preceed the start point. */
129 lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
130 ldr data1, [src1], #8
131 neg tmp1, tmp1 /* Bits to alignment -64. */
132 ldr data2, [src2], #8
135 /* Big-endian. Early bytes are at MSB. */
136 lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
138 /* Little-endian. Early bytes are at LSB. */
139 lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
141 orr data1, data1, tmp2
142 orr data2, data2, tmp2
146 /* We can do better than this. */
147 ldrb data1w, [src1], #1
148 ldrb data2w, [src2], #1
150 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
152 sub result, data1, data2
155 libc_hidden_builtin_def (strcmp)