libc/string/i386/memset.c

   1 /*
   2  * This string-include defines all string functions as inline
   3  * functions. Use gcc. It also assumes ds=es=data space, this should be
   4  * normal. Most of the string-functions are rather heavily hand-optimized,
   5  * see especially strtok,strstr,str[c]spn. They should work, but are not
   6  * very easy to understand. Everything is done entirely within the register
   7  * set, making the functions fast and clean. String instructions have been
   8  * used through-out, making for "slightly" unclear code :-)
   9  *
  10  *              NO Copyright (C) 1991, 1992 Linus Torvalds,
  11  *              consider these trivial functions to be PD.
  12  */
  13
  14 /*
  15  * Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
  16  *
  17  * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
  18  */
  19
  20 /*
  21  * Modified for uClibc by Erik Andersen <andersen@codepoet.org>
  22  * These make no attempt to use nifty things like mmx/3dnow/etc.
  23  * These are not inline, and will therefore not be as fast as
  24  * modifying the headers to use inlines (and cannot therefore
  25  * do tricky things when dealing with const memory).  But they
  26  * should (I hope!) be faster than their generic equivalents....
  27  *
  28  * More importantly, these should provide a good example for
  29  * others to follow when adding arch specific optimizations.
  30  *  -Erik
  31  *
  32  * 2009-04: modified by Denys Vlasenko <vda.linux@googlemail.com>
  33  * Fill byte-by-byte is a bit too slow. I prefer 46 byte function
  34  * which fills x4 faster than 21 bytes one.
  35  */
  36
  37 #include <string.h>
  38
  39 #undef memset
  40 void *memset(void *s, int c, size_t count)
  41 {
  42         int reg, edi;
  43         __asm__ __volatile__(
  44
  45                 /* Most of the time, count is divisible by 4 and nonzero */
  46                 /* It's better to make this case faster */
  47         /*      "       jecxz   9f\n" - (optional) count == 0: goto ret */
  48                 "       mov     %%ecx, %1\n"
  49                 "       shr     $2, %%ecx\n"
  50                 "       jz      1f\n" /* zero words: goto fill_bytes */
  51                 /* extend 8-bit fill to 32 bits */
  52                 "       movzx   %%al, %%eax\n" /* 3 bytes */
  53         /* or:  "       and     $0xff, %%eax\n" - 5 bytes */
  54                 "       imul    $0x01010101, %%eax\n" /* 6 bytes */
  55                 /* fill full words */
  56                 "       rep; stosl\n"
  57                 /* fill 0-3 bytes */
  58                 "1:     and     $3, %1\n"
  59                 "       jz      9f\n" /* (count & 3) == 0: goto end */
  60                 "2:     stosb\n"
  61                 "       dec     %1\n"
  62                 "       jnz     2b\n"
  63                 /* end */
  64                 "9:\n"
  65
  66                 : "=&D" (edi), "=&r" (reg)
  67                 : "0" (s), "a" (c), "c" (count)
  68                 : "memory"
  69         );
  70         return s;
  71 }
  72 libc_hidden_def(memset)
  73
  74 /*
  75 gcc 4.3.1
  76 =========
  77 57                     push   %edi
  78 8b 7c 24 08            mov    0x8(%esp),%edi
  79 8b 4c 24 10            mov    0x10(%esp),%ecx
  80 8b 44 24 0c            mov    0xc(%esp),%eax
  81 89 ca                  mov    %ecx,%edx
  82 c1 e9 02               shr    $0x2,%ecx
  83 74 0b                  je     1f <__GI_memset+0x1f>
  84 0f b6 c0               movzbl %al,%eax
  85 69 c0 01 01 01 01      imul   $0x1010101,%eax,%eax
  86 f3 ab                  rep stos %eax,%es:(%edi)
  87 83 e2 03               and    $0x3,%edx
  88 74 04                  je     28 <__GI_memset+0x28>
  89 aa                     stos   %al,%es:(%edi)
  90 4a                     dec    %edx
  91 75 fc                  jne    24 <__GI_memset+0x24>
  92 8b 44 24 08            mov    0x8(%esp),%eax
  93 5f                     pop    %edi
  94 c3                     ret
  95 */