Update copyright notices with scripts/update-copyrights
[glibc.git] / sysdeps / sparc / sparc64 / memcpy.S
blob97e4b024c69290200f90124e2de11e41100daa18
1 /* Copy SIZE bytes from SRC to DEST.
2    For UltraSPARC.
3    Copyright (C) 1996-2014 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by David S. Miller (davem@caip.rutgers.edu) and
6                   Jakub Jelinek (jakub@redhat.com).
8    The GNU C Library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public
10    License as published by the Free Software Foundation; either
11    version 2.1 of the License, or (at your option) any later version.
13    The GNU C Library is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16    Lesser General Public License for more details.
18    You should have received a copy of the GNU Lesser General Public
19    License along with the GNU C Library; if not, see
20    <http://www.gnu.org/licenses/>.  */
22 #include <sysdep.h>
23 #include <asm/asi.h>
24 #ifndef XCC
25 #define USE_BPR
26         .register       %g2, #scratch
27         .register       %g3, #scratch
28         .register       %g6, #scratch
29 #define XCC     xcc
30 #endif
31 #define FPRS_FEF        4
33 #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9)           \
34         faligndata      %f1, %f2, %f48;                         \
35         faligndata      %f2, %f3, %f50;                         \
36         faligndata      %f3, %f4, %f52;                         \
37         faligndata      %f4, %f5, %f54;                         \
38         faligndata      %f5, %f6, %f56;                         \
39         faligndata      %f6, %f7, %f58;                         \
40         faligndata      %f7, %f8, %f60;                         \
41         faligndata      %f8, %f9, %f62;
43 #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt)    \
44         ldda            [%src] %asi, %fdest;                    \
45         add             %src, 0x40, %src;                       \
46         add             %dest, 0x40, %dest;                     \
47         subcc           %len, 0x40, %len;                       \
48         be,pn           %xcc, jmptgt;                           \
49          stda           %fsrc, [%dest - 0x40] %asi;
51 #define LOOP_CHUNK1(src, dest, len, branch_dest)                \
52         MAIN_LOOP_CHUNK(src, dest, f0,  f48, len, branch_dest)
53 #define LOOP_CHUNK2(src, dest, len, branch_dest)                \
54         MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
55 #define LOOP_CHUNK3(src, dest, len, branch_dest)                \
56         MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
58 #define STORE_SYNC(dest, fsrc)                                  \
59         stda            %fsrc, [%dest] %asi;                    \
60         add             %dest, 0x40, %dest;
62 #define STORE_JUMP(dest, fsrc, target)                          \
63         stda            %fsrc, [%dest] %asi;                    \
64         add             %dest, 0x40, %dest;                     \
65         ba,pt           %xcc, target;
67 #define VISLOOP_PAD nop; nop; nop; nop;                         \
68                     nop; nop; nop; nop;                         \
69                     nop; nop; nop; nop;                         \
70                     nop; nop; nop;
72 #define FINISH_VISCHUNK(dest, f0, f1, left)                     \
73         subcc           %left, 8, %left;                        \
74         bl,pn           %xcc, 205f;                             \
75          faligndata     %f0, %f1, %f48;                         \
76         std             %f48, [%dest];                          \
77         add             %dest, 8, %dest;
79 #define UNEVEN_VISCHUNK(dest, f0, f1, left)                     \
80         subcc           %left, 8, %left;                        \
81         bl,pn           %xcc, 205f;                             \
82          fsrc2          %f0, %f1;                               \
83         ba,a,pt         %xcc, 204f;
85         /* Macros for non-VIS memcpy code. */
86 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)         \
87         ldx             [%src + offset + 0x00], %t0;            \
88         ldx             [%src + offset + 0x08], %t1;            \
89         ldx             [%src + offset + 0x10], %t2;            \
90         ldx             [%src + offset + 0x18], %t3;            \
91         stw             %t0, [%dst + offset + 0x04];            \
92         srlx            %t0, 32, %t0;                           \
93         stw             %t0, [%dst + offset + 0x00];            \
94         stw             %t1, [%dst + offset + 0x0c];            \
95         srlx            %t1, 32, %t1;                           \
96         stw             %t1, [%dst + offset + 0x08];            \
97         stw             %t2, [%dst + offset + 0x14];            \
98         srlx            %t2, 32, %t2;                           \
99         stw             %t2, [%dst + offset + 0x10];            \
100         stw             %t3, [%dst + offset + 0x1c];            \
101         srlx            %t3, 32, %t3;                           \
102         stw             %t3, [%dst + offset + 0x18];
104 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)    \
105         ldx             [%src + offset + 0x00], %t0;            \
106         ldx             [%src + offset + 0x08], %t1;            \
107         ldx             [%src + offset + 0x10], %t2;            \
108         ldx             [%src + offset + 0x18], %t3;            \
109         stx             %t0, [%dst + offset + 0x00];            \
110         stx             %t1, [%dst + offset + 0x08];            \
111         stx             %t2, [%dst + offset + 0x10];            \
112         stx             %t3, [%dst + offset + 0x18];            \
113         ldx             [%src + offset + 0x20], %t0;            \
114         ldx             [%src + offset + 0x28], %t1;            \
115         ldx             [%src + offset + 0x30], %t2;            \
116         ldx             [%src + offset + 0x38], %t3;            \
117         stx             %t0, [%dst + offset + 0x20];            \
118         stx             %t1, [%dst + offset + 0x28];            \
119         stx             %t2, [%dst + offset + 0x30];            \
120         stx             %t3, [%dst + offset + 0x38];
122 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)        \
123         ldx             [%src - offset - 0x10], %t0;            \
124         ldx             [%src - offset - 0x08], %t1;            \
125         stw             %t0, [%dst - offset - 0x0c];            \
126         srlx            %t0, 32, %t2;                           \
127         stw             %t2, [%dst - offset - 0x10];            \
128         stw             %t1, [%dst - offset - 0x04];            \
129         srlx            %t1, 32, %t3;                           \
130         stw             %t3, [%dst - offset - 0x08];
132 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)           \
133         ldx             [%src - offset - 0x10], %t0;            \
134         ldx             [%src - offset - 0x08], %t1;            \
135         stx             %t0, [%dst - offset - 0x10];            \
136         stx             %t1, [%dst - offset - 0x08];
138         .text
139         .align          32
140 ENTRY(__memcpy_large)
141 200:    be,pt           %xcc, 201f                      /* CTI                          */
142          andcc          %o0, 0x38, %g5                  /* IEU1         Group           */
143         mov             8, %g1                          /* IEU0                         */
144         sub             %g1, %g2, %g2                   /* IEU0         Group           */
145         andcc           %o0, 1, %g0                     /* IEU1                         */
146         be,pt           %icc, 2f                        /* CTI                          */
147          sub            %o2, %g2, %o2                   /* IEU0         Group           */
148 1:      ldub            [%o1], %o5                      /* Load         Group           */
149         add             %o1, 1, %o1                     /* IEU0                         */
150         add             %o0, 1, %o0                     /* IEU1                         */
151         subcc           %g2, 1, %g2                     /* IEU1         Group           */
152         be,pn           %xcc, 3f                        /* CTI                          */
153          stb            %o5, [%o0 - 1]                  /* Store                        */
154 2:      ldub            [%o1], %o5                      /* Load         Group           */
155         add             %o0, 2, %o0                     /* IEU0                         */
156         ldub            [%o1 + 1], %g3                  /* Load         Group           */
157         subcc           %g2, 2, %g2                     /* IEU1         Group           */
158         stb             %o5, [%o0 - 2]                  /* Store                        */
159         add             %o1, 2, %o1                     /* IEU0                         */
160         bne,pt          %xcc, 2b                        /* CTI          Group           */
161          stb            %g3, [%o0 - 1]                  /* Store                        */
162 3:      andcc           %o0, 0x38, %g5                  /* IEU1         Group           */
163 201:    be,pt           %icc, 202f                      /* CTI                          */
164          mov            64, %g1                         /* IEU0                         */
165         fsrc2           %f0, %f2                        /* FPU                          */
166         sub             %g1, %g5, %g5                   /* IEU0         Group           */
167         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
168         ldd             [%g1], %f4                      /* Load         Group           */
169         sub             %o2, %g5, %o2                   /* IEU0                         */
170 1:      ldd             [%g1 + 0x8], %f6                /* Load         Group           */
171         add             %g1, 0x8, %g1                   /* IEU0         Group           */
172         subcc           %g5, 8, %g5                     /* IEU1                         */
173         faligndata      %f4, %f6, %f0                   /* GRU          Group           */
174         std             %f0, [%o0]                      /* Store                        */
175         add             %o1, 8, %o1                     /* IEU0         Group           */
176         be,pn           %xcc, 202f                      /* CTI                          */
177          add            %o0, 8, %o0                     /* IEU1                         */
178         ldd             [%g1 + 0x8], %f4                /* Load         Group           */
179         add             %g1, 8, %g1                     /* IEU0                         */
180         subcc           %g5, 8, %g5                     /* IEU1                         */
181         faligndata      %f6, %f4, %f0                   /* GRU          Group           */
182         std             %f0, [%o0]                      /* Store                        */
183         add             %o1, 8, %o1                     /* IEU0                         */
184         bne,pt          %xcc, 1b                        /* CTI          Group           */
185          add            %o0, 8, %o0                     /* IEU0                         */
186 202:    membar    #LoadStore | #StoreStore | #StoreLoad /* LSU          Group           */
187         wr              %g0, ASI_BLK_P, %asi            /* LSU          Group           */
188         subcc           %o2, 0x40, %g6                  /* IEU1         Group           */
189         mov             %o1, %g1                        /* IEU0                         */
190         andncc          %g6, (0x40 - 1), %g6            /* IEU1         Group           */
191         srl             %g1, 3, %g2                     /* IEU0                         */
192         sub             %o2, %g6, %g3                   /* IEU0         Group           */
193         andn            %o1, (0x40 - 1), %o1            /* IEU1                         */
194         and             %g2, 7, %g2                     /* IEU0         Group           */
195         andncc          %g3, 0x7, %g3                   /* IEU1                         */
196         fsrc2           %f0, %f2                        /* FPU                          */
197         sub             %g3, 0x10, %g3                  /* IEU0         Group           */
198         sub             %o2, %g6, %o2                   /* IEU1                         */
199         alignaddr       %g1, %g0, %g0                   /* GRU          Group           */
200         add             %g1, %g6, %g1                   /* IEU0         Group           */
201         subcc           %o2, %g3, %o2                   /* IEU1                         */
202         ldda            [%o1 + 0x00] %asi, %f0          /* LSU          Group           */
203         add             %g1, %g3, %g1                   /* IEU0                         */
204         ldda            [%o1 + 0x40] %asi, %f16         /* LSU          Group           */
205         sub             %g6, 0x80, %g6                  /* IEU0                         */
206         ldda            [%o1 + 0x80] %asi, %f32         /* LSU          Group           */
207                                                         /* Clk1         Group 8-(       */
208                                                         /* Clk2         Group 8-(       */
209                                                         /* Clk3         Group 8-(       */
210                                                         /* Clk4         Group 8-(       */
211 203:    rd              %pc, %g5                        /* PDU          Group 8-(       */
212         addcc           %g5, %lo(300f - 203b), %g5      /* IEU1         Group           */
213         sll             %g2, 9, %g2                     /* IEU0                         */
214         jmpl            %g5 + %g2, %g0                  /* CTI          Group brk forced*/
215          addcc          %o1, 0xc0, %o1                  /* IEU1         Group           */
217         .align          512             /* OK, here comes the fun part... */
218 300:    FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  LOOP_CHUNK1(o1, o0, g6, 301f)
219         FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  LOOP_CHUNK2(o1, o0, g6, 302f)
220         FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   LOOP_CHUNK3(o1, o0, g6, 303f)
221         b,pt            %xcc, 300b+4; faligndata %f0, %f2, %f48
222 301:    FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  STORE_SYNC(o0, f48) membar #Sync
223         FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   STORE_JUMP(o0, f48, 400f) membar #Sync
224 302:    FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   STORE_SYNC(o0, f48) membar #Sync
225         FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  STORE_JUMP(o0, f48, 416f) membar #Sync
226 303:    FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  STORE_SYNC(o0, f48) membar #Sync
227         FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  STORE_JUMP(o0, f48, 432f) membar #Sync
228         VISLOOP_PAD
229 310:    FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  LOOP_CHUNK1(o1, o0, g6, 311f)
230         FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  LOOP_CHUNK2(o1, o0, g6, 312f)
231         FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   LOOP_CHUNK3(o1, o0, g6, 313f)
232         b,pt            %xcc, 310b+4; faligndata %f2, %f4, %f48
233 311:    FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  STORE_SYNC(o0, f48) membar #Sync
234         FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   STORE_JUMP(o0, f48, 402f) membar #Sync
235 312:    FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   STORE_SYNC(o0, f48) membar #Sync
236         FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  STORE_JUMP(o0, f48, 418f) membar #Sync
237 313:    FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  STORE_SYNC(o0, f48) membar #Sync
238         FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  STORE_JUMP(o0, f48, 434f) membar #Sync
239         VISLOOP_PAD
240 320:    FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  LOOP_CHUNK1(o1, o0, g6, 321f)
241         FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  LOOP_CHUNK2(o1, o0, g6, 322f)
242         FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   LOOP_CHUNK3(o1, o0, g6, 323f)
243         b,pt            %xcc, 320b+4; faligndata %f4, %f6, %f48
244 321:    FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  STORE_SYNC(o0, f48) membar #Sync
245         FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   STORE_JUMP(o0, f48, 404f) membar #Sync
246 322:    FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   STORE_SYNC(o0, f48) membar #Sync
247         FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  STORE_JUMP(o0, f48, 420f) membar #Sync
248 323:    FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  STORE_SYNC(o0, f48) membar #Sync
249         FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  STORE_JUMP(o0, f48, 436f) membar #Sync
250         VISLOOP_PAD
251 330:    FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  LOOP_CHUNK1(o1, o0, g6, 331f)
252         FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  LOOP_CHUNK2(o1, o0, g6, 332f)
253         FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   LOOP_CHUNK3(o1, o0, g6, 333f)
254         b,pt            %xcc, 330b+4; faligndata %f6, %f8, %f48
255 331:    FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  STORE_SYNC(o0, f48) membar #Sync
256         FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   STORE_JUMP(o0, f48, 406f) membar #Sync
257 332:    FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   STORE_SYNC(o0, f48) membar #Sync
258         FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  STORE_JUMP(o0, f48, 422f) membar #Sync
259 333:    FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  STORE_SYNC(o0, f48) membar #Sync
260         FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  STORE_JUMP(o0, f48, 438f) membar #Sync
261         VISLOOP_PAD
262 340:    FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  LOOP_CHUNK1(o1, o0, g6, 341f)
263         FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  LOOP_CHUNK2(o1, o0, g6, 342f)
264         FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   LOOP_CHUNK3(o1, o0, g6, 343f)
265         b,pt            %xcc, 340b+4; faligndata %f8, %f10, %f48
266 341:    FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  STORE_SYNC(o0, f48) membar #Sync
267         FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   STORE_JUMP(o0, f48, 408f) membar #Sync
268 342:    FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   STORE_SYNC(o0, f48) membar #Sync
269         FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  STORE_JUMP(o0, f48, 424f) membar #Sync
270 343:    FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  STORE_SYNC(o0, f48) membar #Sync
271         FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  STORE_JUMP(o0, f48, 440f) membar #Sync
272         VISLOOP_PAD
273 350:    FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  LOOP_CHUNK1(o1, o0, g6, 351f)
274         FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  LOOP_CHUNK2(o1, o0, g6, 352f)
275         FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  LOOP_CHUNK3(o1, o0, g6, 353f)
276         b,pt            %xcc, 350b+4; faligndata %f10, %f12, %f48
277 351:    FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  STORE_SYNC(o0, f48) membar #Sync
278         FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  STORE_JUMP(o0, f48, 410f) membar #Sync
279 352:    FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  STORE_SYNC(o0, f48) membar #Sync
280         FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  STORE_JUMP(o0, f48, 426f) membar #Sync
281 353:    FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  STORE_SYNC(o0, f48) membar #Sync
282         FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  STORE_JUMP(o0, f48, 442f) membar #Sync
283         VISLOOP_PAD
284 360:    FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  LOOP_CHUNK1(o1, o0, g6, 361f)
285         FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  LOOP_CHUNK2(o1, o0, g6, 362f)
286         FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  LOOP_CHUNK3(o1, o0, g6, 363f)
287         b,pt            %xcc, 360b+4; faligndata %f12, %f14, %f48
288 361:    FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  STORE_SYNC(o0, f48) membar #Sync
289         FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  STORE_JUMP(o0, f48, 412f) membar #Sync
290 362:    FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  STORE_SYNC(o0, f48) membar #Sync
291         FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  STORE_JUMP(o0, f48, 428f) membar #Sync
292 363:    FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  STORE_SYNC(o0, f48) membar #Sync
293         FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  STORE_JUMP(o0, f48, 444f) membar #Sync
294         VISLOOP_PAD
295 370:    FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  LOOP_CHUNK1(o1, o0, g6, 371f)
296         FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  LOOP_CHUNK2(o1, o0, g6, 372f)
297         FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  LOOP_CHUNK3(o1, o0, g6, 373f)
298         b,pt            %xcc, 370b+4; faligndata %f14, %f16, %f48
299 371:    FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  STORE_SYNC(o0, f48) membar #Sync
300         FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  STORE_JUMP(o0, f48, 414f) membar #Sync
301 372:    FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  STORE_SYNC(o0, f48) membar #Sync
302         FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  STORE_JUMP(o0, f48, 430f) membar #Sync
303 373:    FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  STORE_SYNC(o0, f48) membar #Sync
304         FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  STORE_JUMP(o0, f48, 446f) membar #Sync
305         VISLOOP_PAD
306 400:    FINISH_VISCHUNK(o0, f0,  f2,  g3)
307 402:    FINISH_VISCHUNK(o0, f2,  f4,  g3)
308 404:    FINISH_VISCHUNK(o0, f4,  f6,  g3)
309 406:    FINISH_VISCHUNK(o0, f6,  f8,  g3)
310 408:    FINISH_VISCHUNK(o0, f8,  f10, g3)
311 410:    FINISH_VISCHUNK(o0, f10, f12, g3)
312 412:    FINISH_VISCHUNK(o0, f12, f14, g3)
313 414:    UNEVEN_VISCHUNK(o0, f14, f0,  g3)
314 416:    FINISH_VISCHUNK(o0, f16, f18, g3)
315 418:    FINISH_VISCHUNK(o0, f18, f20, g3)
316 420:    FINISH_VISCHUNK(o0, f20, f22, g3)
317 422:    FINISH_VISCHUNK(o0, f22, f24, g3)
318 424:    FINISH_VISCHUNK(o0, f24, f26, g3)
319 426:    FINISH_VISCHUNK(o0, f26, f28, g3)
320 428:    FINISH_VISCHUNK(o0, f28, f30, g3)
321 430:    UNEVEN_VISCHUNK(o0, f30, f0,  g3)
322 432:    FINISH_VISCHUNK(o0, f32, f34, g3)
323 434:    FINISH_VISCHUNK(o0, f34, f36, g3)
324 436:    FINISH_VISCHUNK(o0, f36, f38, g3)
325 438:    FINISH_VISCHUNK(o0, f38, f40, g3)
326 440:    FINISH_VISCHUNK(o0, f40, f42, g3)
327 442:    FINISH_VISCHUNK(o0, f42, f44, g3)
328 444:    FINISH_VISCHUNK(o0, f44, f46, g3)
329 446:    UNEVEN_VISCHUNK(o0, f46, f0,  g3)
330 204:    ldd             [%o1], %f2                      /* Load         Group           */
331         add             %o1, 8, %o1                     /* IEU0                         */
332         subcc           %g3, 8, %g3                     /* IEU1                         */
333         faligndata      %f0, %f2, %f8                   /* GRU          Group           */
334         std             %f8, [%o0]                      /* Store                        */
335         bl,pn           %xcc, 205f                      /* CTI                          */
336          add            %o0, 8, %o0                     /* IEU0         Group           */
337         ldd             [%o1], %f0                      /* Load         Group           */
338         add             %o1, 8, %o1                     /* IEU0                         */
339         subcc           %g3, 8, %g3                     /* IEU1                         */
340         faligndata      %f2, %f0, %f8                   /* GRU          Group           */
341         std             %f8, [%o0]                      /* Store                        */
342         bge,pt          %xcc, 204b                      /* CTI                          */
343          add            %o0, 8, %o0                     /* IEU0         Group           */
344 205:    brz,pt          %o2, 207f                       /* CTI          Group           */
345          mov            %g1, %o1                        /* IEU0                         */
346 206:    ldub            [%o1], %g5                      /* LOAD                         */
347         add             %o1, 1, %o1                     /* IEU0                         */
348         add             %o0, 1, %o0                     /* IEU1                         */
349         subcc           %o2, 1, %o2                     /* IEU1                         */
350         bne,pt          %xcc, 206b                      /* CTI                          */
351          stb            %g5, [%o0 - 1]                  /* Store        Group           */
352 207:    membar          #StoreLoad | #StoreStore        /* LSU          Group           */
353         wr              %g0, FPRS_FEF, %fprs
354         retl
355          mov            %g4, %o0
357 208:    andcc           %o2, 1, %g0                     /* IEU1         Group           */
358         be,pt           %icc, 2f+4                      /* CTI                          */
359 1:       ldub           [%o1], %g5                      /* LOAD         Group           */
360         add             %o1, 1, %o1                     /* IEU0                         */
361         add             %o0, 1, %o0                     /* IEU1                         */
362         subcc           %o2, 1, %o2                     /* IEU1         Group           */
363         be,pn           %xcc, 209f                      /* CTI                          */
364          stb            %g5, [%o0 - 1]                  /* Store                        */
365 2:      ldub            [%o1], %g5                      /* LOAD         Group           */
366         add             %o0, 2, %o0                     /* IEU0                         */
367         ldub            [%o1 + 1], %o5                  /* LOAD         Group           */
368         add             %o1, 2, %o1                     /* IEU0                         */
369         subcc           %o2, 2, %o2                     /* IEU1         Group           */
370         stb             %g5, [%o0 - 2]                  /* Store                        */
371         bne,pt          %xcc, 2b                        /* CTI                          */
372          stb            %o5, [%o0 - 1]                  /* Store                        */
373 209:    retl
374          mov            %g4, %o0
375 END(__memcpy_large)
377 ENTRY(__mempcpy)
378         ba,pt           %xcc, 210f
379          add            %o0, %o2, %g4
380 END(__mempcpy)
382         .align          32
383 ENTRY(memcpy)
384          mov            %o0, %g4                        /* IEU0         Group           */
385 210:
386 #ifndef USE_BPR
387         srl             %o2, 0, %o2                     /* IEU1                         */
388 #endif
389         brz,pn          %o2, 209b                       /* CTI          Group           */
390 218:     cmp            %o2, 15                         /* IEU1                         */
391         bleu,pn         %xcc, 208b                      /* CTI          Group           */
392          cmp            %o2, (64 * 6)                   /* IEU1                         */
393         bgeu,pn         %xcc, 200b                      /* CTI          Group           */
394          andcc          %o0, 7, %g2                     /* IEU1                         */
395         sub             %o0, %o1, %g5                   /* IEU0                         */
396         andcc           %g5, 3, %o5                     /* IEU1         Group           */
397         bne,pn          %xcc, 212f                      /* CTI                          */
398          andcc          %o1, 3, %g0                     /* IEU1         Group           */
399         be,a,pt         %xcc, 216f                      /* CTI                          */
400          andcc          %o1, 4, %g0                     /* IEU1         Group           */
401         andcc           %o1, 1, %g0                     /* IEU1         Group           */
402         be,pn           %xcc, 4f                        /* CTI                          */
403          andcc          %o1, 2, %g0                     /* IEU1         Group           */
404         ldub            [%o1], %g2                      /* Load         Group           */
405         add             %o1, 1, %o1                     /* IEU0                         */
406         add             %o0, 1, %o0                     /* IEU1                         */
407         sub             %o2, 1, %o2                     /* IEU0         Group           */
408         bne,pn          %xcc, 5f                        /* CTI          Group           */
409          stb            %g2, [%o0 - 1]                  /* Store                        */
410 4:      lduh            [%o1], %g2                      /* Load         Group           */
411         add             %o1, 2, %o1                     /* IEU0                         */
412         add             %o0, 2, %o0                     /* IEU1                         */
413         sub             %o2, 2, %o2                     /* IEU0                         */
414         sth             %g2, [%o0 - 2]                  /* Store        Group + bubble  */
415 5:      andcc           %o1, 4, %g0                     /* IEU1                         */
416 216:    be,a,pn         %xcc, 2f                        /* CTI                          */
417          andcc          %o2, -128, %g6                  /* IEU1         Group           */
418         lduw            [%o1], %g5                      /* Load         Group           */
419         add             %o1, 4, %o1                     /* IEU0                         */
420         add             %o0, 4, %o0                     /* IEU1                         */
421         sub             %o2, 4, %o2                     /* IEU0         Group           */
422         stw             %g5, [%o0 - 4]                  /* Store                        */
423         andcc           %o2, -128, %g6                  /* IEU1         Group           */
424 2:      be,pn           %xcc, 215f                      /* CTI                          */
425          andcc          %o0, 4, %g0                     /* IEU1         Group           */
426         be,pn           %xcc, 82f + 4                   /* CTI          Group           */
427 5:      MOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
428         MOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
429         MOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
430         MOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
431 35:     subcc           %g6, 128, %g6                   /* IEU1         Group           */
432         add             %o1, 128, %o1                   /* IEU0                         */
433         bne,pt          %xcc, 5b                        /* CTI                          */
434          add            %o0, 128, %o0                   /* IEU0         Group           */
435 215:    andcc           %o2, 0x70, %g6                  /* IEU1         Group           */
436 41:     be,pn           %xcc, 80f                       /* CTI                          */
437          andcc          %o2, 8, %g0                     /* IEU1         Group           */
438                                                         /* Clk1 8-(                     */
439                                                         /* Clk2 8-(                     */
440                                                         /* Clk3 8-(                     */
441                                                         /* Clk4 8-(                     */
442 79:     rd              %pc, %o5                        /* PDU          Group           */
443         sll             %g6, 1, %g5                     /* IEU0         Group           */
444         add             %o1, %g6, %o1                   /* IEU1                         */
445         sub             %o5, %g5, %o5                   /* IEU0         Group           */
446         jmpl            %o5 + %lo(80f - 79b), %g0       /* CTI          Group brk forced*/
447          add            %o0, %g6, %o0                   /* IEU0         Group           */
448 36:     MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
449         MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
450         MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
451         MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
452         MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
453         MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
454         MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
455 80:     be,pt           %xcc, 81f                       /* CTI                          */
456          andcc          %o2, 4, %g0                     /* IEU1                         */
457         ldx             [%o1], %g2                      /* Load         Group           */
458         add             %o0, 8, %o0                     /* IEU0                         */
459         stw             %g2, [%o0 - 0x4]                /* Store        Group           */
460         add             %o1, 8, %o1                     /* IEU1                         */
461         srlx            %g2, 32, %g2                    /* IEU0         Group           */
462         stw             %g2, [%o0 - 0x8]                /* Store                        */
463 81:     be,pt           %xcc, 1f                        /* CTI                          */
464          andcc          %o2, 2, %g0                     /* IEU1         Group           */
465         lduw            [%o1], %g2                      /* Load         Group           */
466         add             %o1, 4, %o1                     /* IEU0                         */
467         stw             %g2, [%o0]                      /* Store        Group           */
468         add             %o0, 4, %o0                     /* IEU0                         */
469 1:      be,pt           %xcc, 1f                        /* CTI                          */
470          andcc          %o2, 1, %g0                     /* IEU1         Group           */
471         lduh            [%o1], %g2                      /* Load         Group           */
472         add             %o1, 2, %o1                     /* IEU0                         */
473         sth             %g2, [%o0]                      /* Store        Group           */
474         add             %o0, 2, %o0                     /* IEU0                         */
475 1:      be,pt           %xcc, 211f                      /* CTI                          */
476          nop                                            /* IEU1                         */
477         ldub            [%o1], %g2                      /* Load         Group           */
478         stb             %g2, [%o0]                      /* Store        Group + bubble  */
479 211:    retl
480          mov            %g4, %o0
482 82:     MOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
483         MOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
484 37:     subcc           %g6, 128, %g6                   /* IEU1         Group           */
485         add             %o1, 128, %o1                   /* IEU0                         */
486         bne,pt          %xcc, 82b                       /* CTI                          */
487          add            %o0, 128, %o0                   /* IEU0         Group           */
488         andcc           %o2, 0x70, %g6                  /* IEU1                         */
489         be,pn           %xcc, 84f                       /* CTI                          */
490          andcc          %o2, 8, %g0                     /* IEU1         Group           */
491                                                         /* Clk1 8-(                     */
492                                                         /* Clk2 8-(                     */
493                                                         /* Clk3 8-(                     */
494                                                         /* Clk4 8-(                     */
495 83:     rd              %pc, %o5                        /* PDU          Group           */
496         add             %o1, %g6, %o1                   /* IEU0         Group           */
497         sub             %o5, %g6, %o5                   /* IEU1                         */
498         jmpl            %o5 + %lo(84f - 83b), %g0       /* CTI          Group brk forced*/
499          add            %o0, %g6, %o0                   /* IEU0         Group           */
500 38:     MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
501         MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
502         MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
503         MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
504         MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
505         MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
506         MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
507 84:     be,pt           %xcc, 85f                       /* CTI          Group           */
508          andcc          %o2, 4, %g0                     /* IEU1                         */
509         ldx             [%o1], %g2                      /* Load         Group           */
510         add             %o0, 8, %o0                     /* IEU0                         */
511         add             %o1, 8, %o1                     /* IEU0         Group           */
512         stx             %g2, [%o0 - 0x8]                /* Store                        */
513 85:     be,pt           %xcc, 1f                        /* CTI                          */
514          andcc          %o2, 2, %g0                     /* IEU1         Group           */
515         lduw            [%o1], %g2                      /* Load         Group           */
516         add             %o0, 4, %o0                     /* IEU0                         */
517         add             %o1, 4, %o1                     /* IEU0         Group           */
518         stw             %g2, [%o0 - 0x4]                /* Store                        */
519 1:      be,pt           %xcc, 1f                        /* CTI                          */
520          andcc          %o2, 1, %g0                     /* IEU1         Group           */
521         lduh            [%o1], %g2                      /* Load         Group           */
522         add             %o0, 2, %o0                     /* IEU0                         */
523         add             %o1, 2, %o1                     /* IEU0         Group           */
524         sth             %g2, [%o0 - 0x2]                /* Store                        */
525 1:      be,pt           %xcc, 1f                        /* CTI                          */
526          nop                                            /* IEU0         Group           */
527         ldub            [%o1], %g2                      /* Load         Group           */
528         stb             %g2, [%o0]                      /* Store        Group + bubble  */
529 1:      retl
530          mov            %g4, %o0
532 212:    brz,pt          %g2, 2f                         /* CTI          Group           */
533          mov            8, %g1                          /* IEU0                         */
534         sub             %g1, %g2, %g2                   /* IEU0         Group           */
535         sub             %o2, %g2, %o2                   /* IEU0         Group           */
536 1:      ldub            [%o1], %g5                      /* Load         Group           */
537         add             %o1, 1, %o1                     /* IEU0                         */
538         add             %o0, 1, %o0                     /* IEU1                         */
539         subcc           %g2, 1, %g2                     /* IEU1         Group           */
540         bne,pt          %xcc, 1b                        /* CTI                          */
541          stb            %g5, [%o0 - 1]                  /* Store                        */
542 2:      andn            %o2, 7, %g5                     /* IEU0         Group           */
543         and             %o2, 7, %o2                     /* IEU1                         */
544         fsrc2           %f0, %f2                        /* FPU                          */
545         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
546         ldd             [%g1], %f4                      /* Load         Group           */
547 1:      ldd             [%g1 + 0x8], %f6                /* Load         Group           */
548         add             %g1, 0x8, %g1                   /* IEU0         Group           */
549         subcc           %g5, 8, %g5                     /* IEU1                         */
550         faligndata      %f4, %f6, %f0                   /* GRU          Group           */
551         std             %f0, [%o0]                      /* Store                        */
552         add             %o1, 8, %o1                     /* IEU0         Group           */
553         be,pn           %xcc, 213f                      /* CTI                          */
554          add            %o0, 8, %o0                     /* IEU1                         */
555         ldd             [%g1 + 0x8], %f4                /* Load         Group           */
556         add             %g1, 8, %g1                     /* IEU0                         */
557         subcc           %g5, 8, %g5                     /* IEU1                         */
558         faligndata      %f6, %f4, %f0                   /* GRU          Group           */
559         std             %f0, [%o0]                      /* Store                        */
560         add             %o1, 8, %o1                     /* IEU0                         */
561         bne,pn          %xcc, 1b                        /* CTI          Group           */
562          add            %o0, 8, %o0                     /* IEU0                         */
563 213:    brz,pn          %o2, 214f                       /* CTI          Group           */
564          nop                                            /* IEU0                         */
565         ldub            [%o1], %g5                      /* LOAD                         */
566         add             %o1, 1, %o1                     /* IEU0                         */
567         add             %o0, 1, %o0                     /* IEU1                         */
568         subcc           %o2, 1, %o2                     /* IEU1                         */
569         bne,pt          %xcc, 206b                      /* CTI                          */
570          stb            %g5, [%o0 - 1]                  /* Store        Group           */
571 214:    wr              %g0, FPRS_FEF, %fprs
572         retl
573          mov            %g4, %o0
574 END(memcpy)
576 libc_hidden_builtin_def (memcpy)
578 libc_hidden_def (__mempcpy)
579 weak_alias (__mempcpy, mempcpy)
580 libc_hidden_builtin_def (mempcpy)