1 /* Copyright (C) 2010-2017 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library. If not, see
16 <http://www.gnu.org/licenses/>. */
20 typedef unsigned long word
;
25 return *(const word
*)((word
)s
& -8);
28 #define unlikely(X) __builtin_expect ((X), 0)
29 #define prefetch(X) __builtin_prefetch ((void *)(X), 0)
31 #define cmpbeq0(X) __builtin_alpha_cmpbge(0, (X))
32 #define find(X, Y) cmpbeq0 ((X) ^ (Y))
34 /* Search no more than N bytes of S for C. */
37 __memchr (const void *s
, int xc
, size_t n
)
40 word t
, current
, found
, mask
, offset
;
42 if (unlikely (n
== 0))
47 /* Replicate low byte of XC into all bytes of C. */
48 t
= xc
& 0xff; /* 0000000c */
49 t
= (t
<< 8) | t
; /* 000000cc */
50 t
= (t
<< 16) | t
; /* 0000cccc */
51 const word c
= (t
<< 32) | t
; /* cccccccc */
53 /* Align the source, and decrement the count by the number
54 of bytes searched in the first word. */
55 s_align
= (const word
*)((word
)s
& -8);
57 size_t inc
= n
+ ((word
)s
& 7);
61 /* Deal with misalignment in the first word for the comparison. */
62 mask
= (1ul << ((word
)s
& 7)) - 1;
64 /* If the entire string fits within one word, we may need masking
65 at both the front and the back of the string. */
66 if (unlikely (n
<= 8))
72 found
= find (current
, c
) & ~mask
;
79 /* If the block is sufficiently large, align to cacheline and prefetch. */
80 if (unlikely (n
>= 256))
82 /* Prefetch 3 cache lines beyond the one we're working on. */
83 prefetch (s_align
+ 8);
84 prefetch (s_align
+ 16);
85 prefetch (s_align
+ 24);
87 while ((word
)s_align
& 63)
90 found
= find (current
, c
);
97 /* Within each cacheline, advance the load for the next word
98 before the test for the previous word is complete. This
99 allows us to hide the 3 cycle L1 cache load latency. We
100 only perform this advance load within a cacheline to prevent
101 reading across page boundary. */
102 #define CACHELINE_LOOP \
104 word i, next = s_align[0]; \
105 for (i = 0; i < 7; ++i) \
109 found = find (current, c); \
110 if (unlikely (found)) \
115 found = find (current, c); \
116 if (unlikely (found)) \
122 /* While there's still lots more data to potentially be read,
123 continue issuing prefetches for the 4th cacheline out. */
126 prefetch (s_align
+ 24);
130 /* Up to 3 cache lines remaining. Continue issuing advanced
131 loads, but stop prefetching. */
135 /* We may have exhausted the buffer. */
140 /* Quadword aligned loop. */
144 found
= find (current
, c
);
145 if (unlikely (found
))
147 current
= *++s_align
;
151 /* The last word may need masking at the tail of the compare. */
154 found
= find (current
, c
) & ~mask
;
160 offset
= __builtin_alpha_cttz (found
);
165 /* Binary search for the LSB. */
166 offset
= (found
& 0x0f ? 0 : 4);
167 offset
+= (found
& 0x33 ? 0 : 2);
168 offset
+= (found
& 0x55 ? 0 : 1);
171 return (void *)((word
)s_align
+ offset
);
175 weak_alias (__memchr
, memchr
)
177 libc_hidden_builtin_def (memchr
)