Benchtests: Remove broken walk benchmarks
[glibc.git] / sysdeps / sparc / sparc64 / strcpy.S
blob3b40c1e238b7902470862afa2bd000576c9ec507
1 /* Copy SRC to DEST returning DEST.
2    For SPARC v9.
3    Copyright (C) 1998-2024 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, see
18    <https://www.gnu.org/licenses/>.  */
20 #include <sysdep.h>
21 #include <asm/asi.h>
22 #ifndef XCC
23         .register       %g2, #scratch
24         .register       %g3, #scratch
25         .register       %g6, #scratch
26 #endif
28         /* Normally, this uses
29            ((xword - 0x0101010101010101) & 0x8080808080808080) test
30            to find out if any byte in xword could be zero. This is fast, but
31            also gives false alarm for any byte in range 0x81-0xff. It does
32            not matter for correctness, as if this test tells us there could
33            be some zero byte, we check it byte by byte, but if bytes with
34            high bits set are common in the strings, then this will give poor
35            performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
36            will use one tick slower, but more precise test
37            ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
38            which does not give any false alarms (but if some bits are set,
39            one cannot assume from it which bytes are zero and which are not).
40            It is yet to be measured, what is the correct default for glibc
41            in these days for an average user.
42          */
44         .text
45         .align          32
46 ENTRY(strcpy)
47         sethi           %hi(0x01010101), %g1            /* IEU0         Group           */
48         mov             %o0, %g6                        /* IEU1                         */
49         or              %g1, %lo(0x01010101), %g1       /* IEU0         Group           */
50         andcc           %o0, 7, %g0                     /* IEU1                         */
52         sllx            %g1, 32, %g2                    /* IEU0         Group           */
53         bne,pn          %icc, 12f                       /* CTI                          */
54          andcc          %o1, 7, %g3                     /* IEU1                         */
55         or              %g1, %g2, %g1                   /* IEU0         Group           */
57         bne,pn          %icc, 14f                       /* CTI                          */
58          sllx           %g1, 7, %g2                     /* IEU0         Group           */
59 1:      ldx             [%o1], %o3                      /* Load                         */
60         add             %o1, 8, %o1                     /* IEU1                         */
62 2:      mov             %o3, %g3                        /* IEU0         Group           */
63 3:      sub             %o3, %g1, %o2                   /* IEU1                         */
64         ldxa            [%o1] ASI_PNF, %o3              /* Load                         */
65 #ifdef EIGHTBIT_NOT_RARE
66         andn            %o2, %g3, %o2                   /* IEU0         Group           */
67 #endif
68         add             %o0, 8, %o0                     /* IEU0         Group           */
70         andcc           %o2, %g2, %g0                   /* IEU1                         */
71         add             %o1, 8, %o1                     /* IEU0         Group           */
72         be,a,pt         %xcc, 2b                        /* CTI                          */
73          stx            %g3, [%o0 - 8]                  /* Store                        */
75         srlx            %g3, 56, %g5                    /* IEU0         Group           */
76         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
77         be,pn           %icc, 11f                       /* CTI                          */
78          srlx           %g3, 48, %g4                    /* IEU0                         */
80         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
81         be,pn           %icc, 10f                       /* CTI                          */
82          srlx           %g3, 40, %g5                    /* IEU0                         */
83         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
85         be,pn           %icc, 9f                        /* CTI                          */
86          srlx           %g3, 32, %g4                    /* IEU0                         */
87         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
88         be,pn           %icc, 8f                        /* CTI                          */
90          srlx           %g3, 24, %g5                    /* IEU0                         */
91         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
92         be,pn           %icc, 7f                        /* CTI                          */
93          srlx           %g3, 16, %g4                    /* IEU0                         */
95         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
96         be,pn           %icc, 6f                        /* CTI                          */
97          srlx           %g3, 8, %g5                     /* IEU0                         */
98         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
100         be,pn           %icc, 5f                        /* CTI                          */
101          sub            %o3, %g1, %o2                   /* IEU0                         */
102         stx             %g3, [%o0 - 8]                  /* Store        Group           */
103         andcc           %g3, 0xff, %g0                  /* IEU1                         */
105         bne,pt          %icc, 3b                        /* CTI                          */
106          mov            %o3, %g3                        /* IEU0         Group           */
107 4:      retl                                            /* CTI+IEU1     Group           */
108          mov            %g6, %o0                        /* IEU0                         */
110         .align          16
111 5:      stb             %g5, [%o0 - 2]                  /* Store        Group           */
112         srlx            %g3, 16, %g4                    /* IEU0                         */
113 6:      sth             %g4, [%o0 - 4]                  /* Store        Group           */
114         srlx            %g3, 32, %g4                    /* IEU0                         */
116         stw             %g4, [%o0 - 8]                  /* Store        Group           */
117         retl                                            /* CTI+IEU1     Group           */
118          mov            %g6, %o0                        /* IEU0                         */
119 7:      stb             %g5, [%o0 - 4]                  /* Store        Group           */
121         srlx            %g3, 32, %g4                    /* IEU0                         */
122 8:      stw             %g4, [%o0 - 8]                  /* Store        Group           */
123         retl                                            /* CTI+IEU1     Group           */
124          mov            %g6, %o0                        /* IEU0                         */
126 9:      stb             %g5, [%o0 - 6]                  /* Store        Group           */
127         srlx            %g3, 48, %g4                    /* IEU0                         */
128 10:     sth             %g4, [%o0 - 8]                  /* Store        Group           */
129         retl                                            /* CTI+IEU1     Group           */
131          mov            %g6, %o0                        /* IEU0                         */
132 11:     stb             %g5, [%o0 - 8]                  /* Store        Group           */
133         retl                                            /* CTI+IEU1     Group           */
134          mov            %g6, %o0                        /* IEU0                         */
136 12:     or              %g1, %g2, %g1                   /* IEU0         Group           */
137         ldub            [%o1], %o3                      /* Load                         */
138         sllx            %g1, 7, %g2                     /* IEU0         Group           */
139         stb             %o3, [%o0]                      /* Store        Group           */
141 13:     add             %o0, 1, %o0                     /* IEU0                         */
142         add             %o1, 1, %o1                     /* IEU1                         */
143         andcc           %o3, 0xff, %g0                  /* IEU1         Group           */
144         be,pn           %icc, 4b                        /* CTI                          */
146          lduba          [%o1] ASI_PNF, %o3              /* Load                         */
147         andcc           %o0, 7, %g0                     /* IEU1         Group           */
148         bne,a,pt        %icc, 13b                       /* CTI                          */
149          stb            %o3, [%o0]                      /* Store                        */
151         andcc           %o1, 7, %g3                     /* IEU1         Group           */
152         be,a,pt         %icc, 1b                        /* CTI                          */
153          ldx            [%o1], %o3                      /* Load                         */
154 14:     orcc            %g0, 64, %g4                    /* IEU1         Group           */
156         sllx            %g3, 3, %g5                     /* IEU0                         */
157         sub             %o1, %g3, %o1                   /* IEU0         Group           */
158         sub             %g4, %g5, %g4                   /* IEU1                         */
159                                                         /* %g1 = 0101010101010101       *
160                                                          * %g2 = 8080808080808080       *
161                                                          * %g3 = source alignment       *
162                                                          * %g5 = number of bits to shift left  *
163                                                          * %g4 = number of bits to shift right */
164         ldxa            [%o1] ASI_PNF, %o5              /* Load         Group           */
166         addcc           %o1, 8, %o1                     /* IEU1                         */
167 15:     sllx            %o5, %g5, %o3                   /* IEU0         Group           */
168         ldxa            [%o1] ASI_PNF, %o5              /* Load                         */
169         srlx            %o5, %g4, %o4                   /* IEU0         Group           */
171         add             %o0, 8, %o0                     /* IEU1                         */
172         or              %o3, %o4, %o3                   /* IEU0         Group           */
173         add             %o1, 8, %o1                     /* IEU1                         */
174         sub             %o3, %g1, %o4                   /* IEU0         Group           */
176 #ifdef EIGHTBIT_NOT_RARE
177         andn            %o4, %o3, %o4                   /* IEU0         Group           */
178 #endif
179         andcc           %o4, %g2, %g0                   /* IEU1         Group           */
180         be,a,pt         %xcc, 15b                       /* CTI                          */
181          stx            %o3, [%o0 - 8]                  /* Store                        */
182         srlx            %o3, 56, %o4                    /* IEU0         Group           */
184         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
185         be,pn           %icc, 22f                       /* CTI                          */
186          srlx           %o3, 48, %o4                    /* IEU0                         */
187         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
189         be,pn           %icc, 21f                       /* CTI                          */
190          srlx           %o3, 40, %o4                    /* IEU0                         */
191         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
192         be,pn           %icc, 20f                       /* CTI                          */
194          srlx           %o3, 32, %o4                    /* IEU0                         */
195         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
196         be,pn           %icc, 19f                       /* CTI                          */
197          srlx           %o3, 24, %o4                    /* IEU0                         */
199         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
200         be,pn           %icc, 18f                       /* CTI                          */
201          srlx           %o3, 16, %o4                    /* IEU0                         */
202         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
204         be,pn           %icc, 17f                       /* CTI                          */
205          srlx           %o3, 8, %o4                     /* IEU0                         */
206         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
207         be,pn           %icc, 16f                       /* CTI                          */
209          andcc          %o3, 0xff, %g0                  /* IEU1         Group           */
210         bne,pn          %icc, 15b                       /* CTI                          */
211          stx            %o3, [%o0 - 8]                  /* Store                        */
212         retl                                            /* CTI+IEU1     Group           */
214          mov            %g6, %o0                        /* IEU0                         */
216         .align          16
217 16:     srlx            %o3, 8, %o4                     /* IEU0         Group           */
218         stb             %o4, [%o0 - 2]                  /* Store                        */
219 17:     srlx            %o3, 16, %o4                    /* IEU0         Group           */
220         stb             %o4, [%o0 - 3]                  /* Store                        */
222 18:     srlx            %o3, 24, %o4                    /* IEU0         Group           */
223         stb             %o4, [%o0 - 4]                  /* Store                        */
224 19:     srlx            %o3, 32, %o4                    /* IEU0         Group           */
225         stw             %o4, [%o0 - 8]                  /* Store                        */
227         retl                                            /* CTI+IEU1     Group           */
228          mov            %g6, %o0                        /* IEU0                         */
229         nop
230         nop
232 20:     srlx            %o3, 40, %o4                    /* IEU0         Group           */
233         stb             %o4, [%o0 - 6]                  /* Store                        */
234 21:     srlx            %o3, 48, %o4                    /* IEU0         Group           */
235         stb             %o4, [%o0 - 7]                  /* Store                        */
237 22:     srlx            %o3, 56, %o4                    /* IEU0         Group           */
238         stb             %o4, [%o0 - 8]                  /* Store                        */
239         retl                                            /* CTI+IEU1     Group           */
240          mov            %g6, %o0                        /* IEU0                         */
241 END(strcpy)
242 libc_hidden_builtin_def (strcpy)