1 /* memchr - find a character in a memory zone
3 Copyright (C) 2015-2017 Free Software Foundation, Inc.
5 This file is part of the GNU C Library.
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
19 <http://www.gnu.org/licenses/>. */
29 /* Arguments and results. */
54 * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
55 * per byte. For each tuple, bit 0 is set if the relevant byte matched the
56 * requested character and bit 1 is not used (faster than using a 32bit
57 * syndrome). Since the bits in the syndrome reflect exactly the order in which
58 * things occur in the original string, counting trailing zeros allows to
59 * identify exactly which byte has matched.
63 /* Do not dereference srcin if no bytes to compare. */
64 cbz cntin, L(zero_length)
66 * Magic constant 0x40100401 allows us to identify which lane matches
70 movk wtmp2, #0x4010, lsl #16
71 dup vrepchr.16b, chrin
72 /* Work with aligned 32-byte chunks */
74 dup vrepmask.4s, wtmp2
76 and cntrem, cntin, #31
80 * Input string is not 32-byte aligned. We calculate the syndrome
81 * value for the aligned 32 bytes block containing the first bytes
82 * and mask the irrelevant part.
85 ld1 {vdata1.16b, vdata2.16b}, [src], #32
87 adds cntin, cntin, tmp
88 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
89 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
90 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
91 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
92 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
93 addp vend.16b, vend.16b, vend.16b /* 128->64 */
95 /* Clear the soff*2 lower bits */
99 /* The first block can also be the last */
101 /* Have we found something already? */
105 ld1 {vdata1.16b, vdata2.16b}, [src], #32
106 subs cntin, cntin, #32
107 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
108 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
109 /* If we're out of data we finish regardless of the result */
111 /* Use a fast check for the termination condition */
112 orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
113 addp vend.2d, vend.2d, vend.2d
115 /* We're not out of data, loop if we haven't found the character */
119 /* Termination condition found, let's calculate the syndrome value */
120 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
121 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
122 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
123 addp vend.16b, vend.16b, vend.16b /* 128->64 */
125 /* Only do the clear for the last possible block */
129 /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
130 add tmp, cntrem, soff
138 /* Count the trailing zeros using bit reversing */
140 /* Compensate the last post-increment */
142 /* Check that we have found a character */
144 /* And count the leading zeros */
146 /* Compute the potential result */
147 add result, src, synd, lsr #1
148 /* Select result or NULL */
149 csel result, xzr, result, eq
156 weak_alias (__memchr, memchr)
157 libc_hidden_builtin_def (memchr)