1 /* Optimized version of the standard memchr() function.
2 This file is part of the GNU C Library.
3 Copyright (C) 2000-2023 Free Software Foundation, Inc.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 /* Return: the address of the first occurrence of chr in str or NULL
26 This implementation assumes little endian mode. For big endian mode,
27 the instruction czx1.r should be replaced by czx1.l.
29 The algorithm is fairly straightforward: search byte by byte until we
30 we get to a word aligned address, then search word by word as much as
31 possible; the remaining few bytes are searched one at a time.
33 The word by word search is performed by xor-ing the word with a word
34 containing chr in every byte. If there is a hit, the result will
35 contain a zero byte in the corresponding position. The presence and
36 position of that zero byte is detected with a czx instruction.
38 All the loops in this function could have had the internal branch removed
39 if br.ctop and br.cloop could be predicated :-(. */
58 alloc r2 = ar.pfs, 3, 0, 29, 32
60 .rotr value[MEMLAT+1], addr[MEMLAT+3], aux[2], poschr[2]
63 mov saved_lc = ar.lc // save the loop counter
65 mov saved_pr = pr // save the predicates
68 add last = str, in2 // last byte
70 cmp.ltu p6, p0 = last, str
73 and tmp = 7, str // tmp = str % 8
74 cmp.ne p7, p0 = r0, r0 // clear p7
75 extr.u chr = in1, 0, 8 // chr = (unsigned char) in1
77 cmp.gtu p6, p0 = 16, in2 // use a simple loop for short
78 (p6) br.cond.spnt .srchfew ;; // searches
79 sub loopcnt = 8, tmp // loopcnt = 8 - tmp
80 cmp.eq p6, p0 = tmp, r0
81 (p6) br.cond.sptk .str_aligned;;
82 sub len = len, loopcnt
83 adds loopcnt = -1, loopcnt;;
88 cmp.eq p6, p0 = val, chr
89 (p6) br.cond.spnt .foundit
92 cmp.ne p6, p0 = r0, r0 // clear p6
93 shr.u loopcnt = len, 3 // loopcnt = len / 8
94 and len = 7, len ;; // remaining len = len & 7
95 adds loopcnt = -1, loopcnt
96 mov ar.ec = MEMLAT + 3
97 mux1 chrx8 = chr, @brcst ;; // get a word full of chr
99 mov pr.rot = 1 << 16 ;;
101 (p[0]) mov addr[0] = ret0
102 (p[0]) ld8.s value[0] = [ret0], 8 // speculative load
103 (p[MEMLAT]) chk.s value[MEMLAT], .recovery // check and recovery
104 (p[MEMLAT]) xor aux[0] = value[MEMLAT], chrx8
105 (p[MEMLAT+1]) czx1.r poschr[0] = aux[1]
106 (p[MEMLAT+2]) cmp.ne p7, p0 = 8, poschr[1]
107 (p7) br.cond.dpnt .foundit
110 adds loopcnt = -1, len
111 cmp.eq p6, p0 = len, r0
112 (p6) br.cond.spnt .notfound ;;
117 cmp.eq p6, p0 = val, chr
118 (p6) br.cond.dpnt .foundit
121 cmp.ne p6, p0 = r0, r0 // clear p6 (p7 was already 0 when we got here)
122 mov ret0 = r0 ;; // return NULL
124 .pred.rel "mutex" p6, p7
125 (p6) adds ret0 = -1, ret0 // if we got here from l1 or l3
126 (p7) add ret0 = addr[MEMLAT+2], poschr[1] // if we got here from l2
127 mov pr = saved_pr, -1
133 # error "MEMLAT must be 6!"
135 (p[MEMLAT-6]) add ret0 = -8, ret0;;
136 (p[MEMLAT-5]) add ret0 = -8, ret0;;
137 (p[MEMLAT-4]) add ret0 = -8, ret0;;
138 (p[MEMLAT-3]) add ret0 = -8, ret0;;
139 (p[MEMLAT-2]) add ret0 = -8, ret0;;
140 (p[MEMLAT-1]) add ret0 = -8, ret0;;
141 (p[MEMLAT]) add ret0 = -8, ret0;;
142 (p[MEMLAT+1]) add ret0 = -8, ret0;;
143 (p[MEMLAT+2]) add ret0 = -8, ret0;;
145 mov addr[MEMLAT+2] = ret0
146 ld8 tmp = [ret0];; // load the first unchecked 8byte
147 xor aux[1] = tmp, chrx8;;
148 czx1.r poschr[1] = aux[1];;
149 cmp.ne p7, p0 = 8, poschr[1];;
150 (p7) add ret0 = addr[MEMLAT+2], poschr[1];;
151 (p7) cmp.geu p6, p7 = ret0, last // don't go over the last byte
152 (p6) br.cond.spnt .notfound;;
153 (p7) br.cond.spnt .foundit;;
154 adds ret0 = 8, ret0 // load the next unchecked 8byte
159 weak_alias (__memchr, memchr)
160 libc_hidden_builtin_def (memchr)