alpha: Fix compile errors in memchr
[glibc.git] / sysdeps / alpha / memchr.c
blob7e16f8a17cbd18c96f60c95f5a67b7cde98602c9
1 /* Copyright (C) 2010 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307 USA. */
19 #include <string.h>
20 #include <bp-sym.h>
22 typedef unsigned long word;
24 static inline word
25 ldq_u(const void *s)
27 return *(const word *)((word)s & -8);
30 #define unlikely(X) __builtin_expect ((X), 0)
31 #define prefetch(X) __builtin_prefetch ((void *)(X), 0)
33 #define cmpbeq0(X) __builtin_alpha_cmpbge(0, (X))
34 #define find(X, Y) cmpbeq0 ((X) ^ (Y))
36 /* Search no more than N bytes of S for C. */
38 void *
39 __memchr (const void *s, int xc, size_t n)
41 const word *s_align;
42 word t, current, found, mask, offset;
44 if (unlikely (n == 0))
45 return 0;
47 current = ldq_u (s);
49 /* Replicate low byte of XC into all bytes of C. */
50 t = xc & 0xff; /* 0000000c */
51 t = (t << 8) | t; /* 000000cc */
52 t = (t << 16) | t; /* 0000cccc */
53 const word c = (t << 32) | t; /* cccccccc */
55 /* Align the source, and decrement the count by the number
56 of bytes searched in the first word. */
57 s_align = (const word *)((word)s & -8);
58 n += ((word)s & 7);
60 /* Deal with misalignment in the first word for the comparison. */
61 mask = (1ul << ((word)s & 7)) - 1;
63 /* If the entire string fits within one word, we may need masking
64 at both the front and the back of the string. */
65 if (unlikely (n <= 8))
67 mask |= -1ul << n;
68 goto last_quad;
71 found = find (current, c) & ~mask;
72 if (unlikely (found))
73 goto found_it;
75 s_align++;
76 n -= 8;
78 /* If the block is sufficiently large, align to cacheline and prefetch. */
79 if (unlikely (n >= 256))
81 /* Prefetch 3 cache lines beyond the one we're working on. */
82 prefetch (s_align + 8);
83 prefetch (s_align + 16);
84 prefetch (s_align + 24);
86 while ((word)s_align & 63)
88 current = *s_align;
89 found = find (current, c);
90 if (found)
91 goto found_it;
92 s_align++;
93 n -= 8;
96 /* Within each cacheline, advance the load for the next word
97 before the test for the previous word is complete. This
98 allows us to hide the 3 cycle L1 cache load latency. We
99 only perform this advance load within a cacheline to prevent
100 reading across page boundary. */
101 #define CACHELINE_LOOP \
102 do { \
103 word i, next = s_align[0]; \
104 for (i = 0; i < 7; ++i) \
106 current = next; \
107 next = s_align[1]; \
108 found = find (current, c); \
109 if (unlikely (found)) \
110 goto found_it; \
111 s_align++; \
113 current = next; \
114 found = find (current, c); \
115 if (unlikely (found)) \
116 goto found_it; \
117 s_align++; \
118 n -= 64; \
119 } while (0)
121 /* While there's still lots more data to potentially be read,
122 continue issuing prefetches for the 4th cacheline out. */
123 while (n >= 256)
125 prefetch (s_align + 24);
126 CACHELINE_LOOP;
129 /* Up to 3 cache lines remaining. Continue issuing advanced
130 loads, but stop prefetching. */
131 while (n >= 64)
132 CACHELINE_LOOP;
134 /* We may have exhausted the buffer. */
135 if (n == 0)
136 return NULL;
139 /* Quadword aligned loop. */
140 current = *s_align;
141 while (n > 8)
143 found = find (current, c);
144 if (unlikely (found))
145 goto found_it;
146 current = *++s_align;
147 n -= 8;
150 /* The last word may need masking at the tail of the compare. */
151 mask = -1ul << n;
152 last_quad:
153 found = find (current, c) & ~mask;
154 if (found == 0)
155 return NULL;
157 found_it:
158 #ifdef __alpha_cix__
159 offset = __builtin_alpha_cttz (found);
160 #else
161 /* Extract LSB. */
162 found &= -found;
164 /* Binary search for the LSB. */
165 offset = (found & 0x0f ? 0 : 4);
166 offset += (found & 0x33 ? 0 : 2);
167 offset += (found & 0x55 ? 0 : 1);
168 #endif
170 return (void *)((word)s_align + offset);
173 #ifdef weak_alias
174 weak_alias (__memchr, BP_SYM (memchr))
175 #endif
176 libc_hidden_builtin_def (memchr)