4 void *memset(void *dest
, int c
, size_t n
)
6 unsigned char *s
= dest
;
9 /* Fill head and tail with minimal branching. Each
10 * conditional ensures that all the subsequently used
11 * offsets are well-defined and in the dest region. */
16 if (n
<= 2) return dest
;
21 if (n
<= 6) return dest
;
24 if (n
<= 8) return dest
;
26 /* Advance pointer to align it at a 4-byte boundary,
27 * and truncate n to a multiple of 4. The previous code
28 * already took care of any head/tail that get cut off
29 * by the alignment. */
31 k
= -(uintptr_t)s
& 3;
37 typedef uint32_t __attribute__((__may_alias__
)) u32
;
38 typedef uint64_t __attribute__((__may_alias__
)) u64
;
40 u32 c32
= ((u32
)-1)/255 * (unsigned char)c
;
42 /* In preparation to copy 32 bytes at a time, aligned on
43 * an 8-byte bounary, fill head/tail up to 28 bytes each.
44 * As in the initial byte-based head/tail fill, each
45 * conditional below ensures that the subsequent offsets
46 * are valid (e.g. !(n<=24) implies n>=28). */
49 *(u32
*)(s
+n
-4) = c32
;
50 if (n
<= 8) return dest
;
53 *(u32
*)(s
+n
-12) = c32
;
54 *(u32
*)(s
+n
-8) = c32
;
55 if (n
<= 24) return dest
;
60 *(u32
*)(s
+n
-28) = c32
;
61 *(u32
*)(s
+n
-24) = c32
;
62 *(u32
*)(s
+n
-20) = c32
;
63 *(u32
*)(s
+n
-16) = c32
;
65 /* Align to a multiple of 8 so we can fill 64 bits at a time,
66 * and avoid writing the same bytes twice as much as is
67 * practical without introducing additional branching. */
69 k
= 24 + ((uintptr_t)s
& 4);
73 /* If this loop is reached, 28 tail bytes have already been
74 * filled, so any remainder when n drops below 32 can be
77 u64 c64
= c32
| ((u64
)c32
<< 32);
78 for (; n
>= 32; n
-=32, s
+=32) {
85 /* Pure C fallback with no aliasing violations. */
86 for (; n
; n
--, s
++) *s
= c
;