1 /* Copyright (C) 1996-2021 Free Software Foundation, Inc.
2 Contributed by Richard Henderson (rth@tamu.edu)
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <https://www.gnu.org/licenses/>. */
19 /* Bytewise compare two null-terminated strings of length no longer than N. */
26 /* EV6 only predicts one branch per octaword. We'll use these to push
27 subsequent branches back to the next bundle. This will generally add
28 a fetch+decode cycle to older machines, so skip in that case. */
30 # define ev6_unop unop
47 xor a0, a1, t2 # are s1 and s2 co-aligned?
49 ldq_u t0, 0(a0) # load asap to give cache time to catch up
54 and a0, 7, t4 # find s1 misalignment
55 and a1, 7, t5 # find s2 misalignment
56 cmovlt a2, t6, a2 # bound neg count to LONG_MAX
57 addq a1, a2, a3 # s2+count
58 addq a2, t4, a2 # bias count by s1 misalignment
59 and a2, 7, t10 # ofs of last byte in s1 last word
60 srl a2, 3, a2 # remaining full words in s1 count
63 /* On entry to this basic block:
64 t0 == the first word of s1.
65 t1 == the first word of s2.
68 mskqh t3, a1, t8 # mask off leading garbage
71 cmpbge zero, t1, t7 # bits set iff null found
72 beq a2, $eoc # check end of count
76 /* Aligned compare main loop.
77 On entry to this basic block:
79 t1 == an s2 word not containing a null. */
84 bne t2, $wordcmp # .. e1 (zdb)
85 ldq_u t1, 8(a1) # e0 :
86 ldq_u t0, 8(a0) # .. e1 :
89 addq a1, 8, a1 # .. e1 :
91 beq a2, $eoc # .. e1 :
93 cmpbge zero, t1, t7 # e0 :
94 beq t7, $a_loop # .. e1 :
98 /* Alternate aligned compare loop, for when there's no trailing
99 bytes on the count. We have to avoid reading too much data. */
102 xor t0, t1, t2 # e0 :
105 bne t2, $wordcmp # .. e1 (zdb)
107 subq a2, 1, a2 # e0 :
108 beq a2, $zerolength # .. e1 :
109 ldq_u t1, 8(a1) # e0 :
110 ldq_u t0, 8(a0) # .. e1 :
112 addq a1, 8, a1 # e0 :
113 addq a0, 8, a0 # .. e1 :
114 cmpbge zero, t1, t7 # e0 :
115 beq t7, $ant_loop # .. e1 :
119 /* The two strings are not co-aligned. Align s1 and cope. */
120 /* On entry to this basic block:
121 t0 == the first word of s1.
122 t1 == the first word of s2.
124 t4 == misalignment of s1.
125 t5 == misalignment of s2.
126 t10 == misalignment of s1 end. */
129 /* If s1 misalignment is larger than s2 misalignment, we need
130 extra startup checks to avoid SEGV. */
131 subq a1, t4, a1 # adjust s2 for s1 misalignment
133 subq a3, 1, a3 # last byte of s2
135 mskqh t3, t5, t7 # mask garbage in s2
138 srl a3, 3, a3 # remaining full words in s2 count
141 /* Failing that, we need to look for both eos and eoc within the
142 first word of s2. If we find either, we can continue by
143 pretending that the next word of s2 is all zeros. */
144 lda t2, 0 # next = zero
145 cmpeq a3, 0, t8 # eoc in the first word of s2?
146 cmpbge zero, t7, t7 # eos in the first word of s2?
150 /* We know just enough now to be able to assemble the first
151 full word of s2. We can still find a zero at the end of it.
153 On entry to this basic block:
154 t0 == first word of s1
155 t1 == first partial word of s2.
157 t10 == ofs of last byte in s1 last word.
158 t11 == ofs of last byte in s2 last word. */
160 ldq_u t2, 8(a1) # load second partial s2 word
163 extql t1, a1, t1 # create first s2 word
166 ornot t0, t8, t0 # kill s1 garbage
167 or t1, t4, t1 # s2 word now complete
168 cmpbge zero, t0, t7 # find eos in first s1 word
169 ornot t1, t8, t1 # kill s2 garbage
173 mskql t3, a1, t8 # mask out s2[1] bits we have seen
174 xor t0, t1, t4 # compare aligned words
177 cmpbge zero, t8, t7 # eos in high bits of s2[1]?
178 cmpeq a3, 0, t8 # eoc in s2[1]?
182 /* Unaligned copy main loop. In order to avoid reading too much,
183 the loop is structured to detect zeros in aligned words from s2.
184 This has, unfortunately, effectively pulled half of a loop
185 iteration out into the head and half into the tail, but it does
186 prevent nastiness from accumulating in the very thing we want
187 to run as fast as possible.
189 On entry to this basic block:
190 t2 == the unshifted low-bits from the next s2 word.
191 t10 == ofs of last byte in s1 last word.
192 t11 == ofs of last byte in s2 last word. */
195 extql t2, a1, t3 # e0 :
196 ldq_u t2, 16(a1) # .. e1 : load next s2 high bits
197 ldq_u t0, 8(a0) # e0 : load next s1 word
198 addq a1, 8, a1 # .. e1 :
200 addq a0, 8, a0 # e0 :
201 subq a3, 1, a3 # .. e1 :
202 extqh t2, a1, t1 # e0 :
203 cmpbge zero, t0, t7 # .. e1 : eos in current s1 word
206 beq a2, $eoc # .. e1 : eoc in current s1 word
207 subq a2, 1, a2 # e0 :
208 cmpbge zero, t2, t4 # .. e1 : eos in s2[1]
210 xor t0, t1, t3 # e0 : compare the words
213 bne t7, $eos # .. e1 :
215 cmpeq a3, 0, t5 # e0 : eoc in s2[1]
218 bne t3, $wordcmp # .. e1 :
220 or t4, t5, t4 # e0 : eos or eoc in s2[1].
221 beq t4, $u_loop # .. e1 (zdb)
223 /* We've found a zero in the low bits of the last s2 word. Get
224 the next s1 word and align them. */
232 /* We've hit end of count. Zero everything after the count
233 and compare whats left. */
240 /* We've found a zero somewhere in a word we just read.
241 On entry to this basic block:
244 t7 == cmpbge mask containing the zero. */
247 negq t7, t6 # create bytemask of valid data
251 zapnot t0, t7, t0 # kill the garbage
253 xor t0, t1, v0 # ... and compare
256 /* Here we have two differing co-aligned words in t0 & t1.
257 Bytewise compare them and return (t0 > t1 ? 1 : -1). */
260 cmpbge t0, t1, t2 # comparison yields bit mask of ge
262 xor t2, t3, t0 # bits set iff t0/t1 bytes differ
263 negq t0, t1 # clear all but least bit
266 and t0, t2, t1 # was bit set in t0 > t1?
277 libc_hidden_builtin_def (strncmp)