powerpc64: Fix by using the configure value $libc_cv_cc_submachine [BZ #31629]
[glibc.git] / sysdeps / sparc / sparc64 / memcpy.S
blobe7bb3909c265a01c7e56e9d47f0c17fbafd70517
1 /* Copy SIZE bytes from SRC to DEST.
2    For UltraSPARC.
3    Copyright (C) 1996-2024 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, see
18    <https://www.gnu.org/licenses/>.  */
20 #include <sysdep.h>
21 #include <asm/asi.h>
22 #ifndef XCC
23 #define USE_BPR
24         .register       %g2, #scratch
25         .register       %g3, #scratch
26         .register       %g6, #scratch
27 #define XCC     xcc
28 #endif
29 #define FPRS_FEF        4
31 #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9)           \
32         faligndata      %f1, %f2, %f48;                         \
33         faligndata      %f2, %f3, %f50;                         \
34         faligndata      %f3, %f4, %f52;                         \
35         faligndata      %f4, %f5, %f54;                         \
36         faligndata      %f5, %f6, %f56;                         \
37         faligndata      %f6, %f7, %f58;                         \
38         faligndata      %f7, %f8, %f60;                         \
39         faligndata      %f8, %f9, %f62;
41 #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt)    \
42         ldda            [%src] %asi, %fdest;                    \
43         add             %src, 0x40, %src;                       \
44         add             %dest, 0x40, %dest;                     \
45         subcc           %len, 0x40, %len;                       \
46         be,pn           %xcc, jmptgt;                           \
47          stda           %fsrc, [%dest - 0x40] %asi;
49 #define LOOP_CHUNK1(src, dest, len, branch_dest)                \
50         MAIN_LOOP_CHUNK(src, dest, f0,  f48, len, branch_dest)
51 #define LOOP_CHUNK2(src, dest, len, branch_dest)                \
52         MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
53 #define LOOP_CHUNK3(src, dest, len, branch_dest)                \
54         MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
56 #define STORE_SYNC(dest, fsrc)                                  \
57         stda            %fsrc, [%dest] %asi;                    \
58         add             %dest, 0x40, %dest;
60 #define STORE_JUMP(dest, fsrc, target)                          \
61         stda            %fsrc, [%dest] %asi;                    \
62         add             %dest, 0x40, %dest;                     \
63         ba,pt           %xcc, target;
65 #define VISLOOP_PAD nop; nop; nop; nop;                         \
66                     nop; nop; nop; nop;                         \
67                     nop; nop; nop; nop;                         \
68                     nop; nop; nop;
70 #define FINISH_VISCHUNK(dest, f0, f1, left)                     \
71         subcc           %left, 8, %left;                        \
72         bl,pn           %xcc, 205f;                             \
73          faligndata     %f0, %f1, %f48;                         \
74         std             %f48, [%dest];                          \
75         add             %dest, 8, %dest;
77 #define UNEVEN_VISCHUNK(dest, f0, f1, left)                     \
78         subcc           %left, 8, %left;                        \
79         bl,pn           %xcc, 205f;                             \
80          fsrc2          %f0, %f1;                               \
81         ba,a,pt         %xcc, 204f;
83         /* Macros for non-VIS memcpy code. */
84 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)         \
85         ldx             [%src + offset + 0x00], %t0;            \
86         ldx             [%src + offset + 0x08], %t1;            \
87         ldx             [%src + offset + 0x10], %t2;            \
88         ldx             [%src + offset + 0x18], %t3;            \
89         stw             %t0, [%dst + offset + 0x04];            \
90         srlx            %t0, 32, %t0;                           \
91         stw             %t0, [%dst + offset + 0x00];            \
92         stw             %t1, [%dst + offset + 0x0c];            \
93         srlx            %t1, 32, %t1;                           \
94         stw             %t1, [%dst + offset + 0x08];            \
95         stw             %t2, [%dst + offset + 0x14];            \
96         srlx            %t2, 32, %t2;                           \
97         stw             %t2, [%dst + offset + 0x10];            \
98         stw             %t3, [%dst + offset + 0x1c];            \
99         srlx            %t3, 32, %t3;                           \
100         stw             %t3, [%dst + offset + 0x18];
102 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)    \
103         ldx             [%src + offset + 0x00], %t0;            \
104         ldx             [%src + offset + 0x08], %t1;            \
105         ldx             [%src + offset + 0x10], %t2;            \
106         ldx             [%src + offset + 0x18], %t3;            \
107         stx             %t0, [%dst + offset + 0x00];            \
108         stx             %t1, [%dst + offset + 0x08];            \
109         stx             %t2, [%dst + offset + 0x10];            \
110         stx             %t3, [%dst + offset + 0x18];            \
111         ldx             [%src + offset + 0x20], %t0;            \
112         ldx             [%src + offset + 0x28], %t1;            \
113         ldx             [%src + offset + 0x30], %t2;            \
114         ldx             [%src + offset + 0x38], %t3;            \
115         stx             %t0, [%dst + offset + 0x20];            \
116         stx             %t1, [%dst + offset + 0x28];            \
117         stx             %t2, [%dst + offset + 0x30];            \
118         stx             %t3, [%dst + offset + 0x38];
120 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)        \
121         ldx             [%src - offset - 0x10], %t0;            \
122         ldx             [%src - offset - 0x08], %t1;            \
123         stw             %t0, [%dst - offset - 0x0c];            \
124         srlx            %t0, 32, %t2;                           \
125         stw             %t2, [%dst - offset - 0x10];            \
126         stw             %t1, [%dst - offset - 0x04];            \
127         srlx            %t1, 32, %t3;                           \
128         stw             %t3, [%dst - offset - 0x08];
130 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)           \
131         ldx             [%src - offset - 0x10], %t0;            \
132         ldx             [%src - offset - 0x08], %t1;            \
133         stx             %t0, [%dst - offset - 0x10];            \
134         stx             %t1, [%dst - offset - 0x08];
136         .text
137         .align          32
138 ENTRY(__memcpy_large)
139 200:    be,pt           %xcc, 201f                      /* CTI                          */
140          andcc          %o0, 0x38, %g5                  /* IEU1         Group           */
141         mov             8, %g1                          /* IEU0                         */
142         sub             %g1, %g2, %g2                   /* IEU0         Group           */
143         andcc           %o0, 1, %g0                     /* IEU1                         */
144         be,pt           %icc, 2f                        /* CTI                          */
145          sub            %o2, %g2, %o2                   /* IEU0         Group           */
146 1:      ldub            [%o1], %o5                      /* Load         Group           */
147         add             %o1, 1, %o1                     /* IEU0                         */
148         add             %o0, 1, %o0                     /* IEU1                         */
149         subcc           %g2, 1, %g2                     /* IEU1         Group           */
150         be,pn           %xcc, 3f                        /* CTI                          */
151          stb            %o5, [%o0 - 1]                  /* Store                        */
152 2:      ldub            [%o1], %o5                      /* Load         Group           */
153         add             %o0, 2, %o0                     /* IEU0                         */
154         ldub            [%o1 + 1], %g3                  /* Load         Group           */
155         subcc           %g2, 2, %g2                     /* IEU1         Group           */
156         stb             %o5, [%o0 - 2]                  /* Store                        */
157         add             %o1, 2, %o1                     /* IEU0                         */
158         bne,pt          %xcc, 2b                        /* CTI          Group           */
159          stb            %g3, [%o0 - 1]                  /* Store                        */
160 3:      andcc           %o0, 0x38, %g5                  /* IEU1         Group           */
161 201:    be,pt           %icc, 202f                      /* CTI                          */
162          mov            64, %g1                         /* IEU0                         */
163         fsrc2           %f0, %f2                        /* FPU                          */
164         sub             %g1, %g5, %g5                   /* IEU0         Group           */
165         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
166         ldd             [%g1], %f4                      /* Load         Group           */
167         sub             %o2, %g5, %o2                   /* IEU0                         */
168 1:      ldd             [%g1 + 0x8], %f6                /* Load         Group           */
169         add             %g1, 0x8, %g1                   /* IEU0         Group           */
170         subcc           %g5, 8, %g5                     /* IEU1                         */
171         faligndata      %f4, %f6, %f0                   /* GRU          Group           */
172         std             %f0, [%o0]                      /* Store                        */
173         add             %o1, 8, %o1                     /* IEU0         Group           */
174         be,pn           %xcc, 202f                      /* CTI                          */
175          add            %o0, 8, %o0                     /* IEU1                         */
176         ldd             [%g1 + 0x8], %f4                /* Load         Group           */
177         add             %g1, 8, %g1                     /* IEU0                         */
178         subcc           %g5, 8, %g5                     /* IEU1                         */
179         faligndata      %f6, %f4, %f0                   /* GRU          Group           */
180         std             %f0, [%o0]                      /* Store                        */
181         add             %o1, 8, %o1                     /* IEU0                         */
182         bne,pt          %xcc, 1b                        /* CTI          Group           */
183          add            %o0, 8, %o0                     /* IEU0                         */
184 202:    membar    #LoadStore | #StoreStore | #StoreLoad /* LSU          Group           */
185         wr              %g0, ASI_BLK_P, %asi            /* LSU          Group           */
186         subcc           %o2, 0x40, %g6                  /* IEU1         Group           */
187         mov             %o1, %g1                        /* IEU0                         */
188         andncc          %g6, (0x40 - 1), %g6            /* IEU1         Group           */
189         srl             %g1, 3, %g2                     /* IEU0                         */
190         sub             %o2, %g6, %g3                   /* IEU0         Group           */
191         andn            %o1, (0x40 - 1), %o1            /* IEU1                         */
192         and             %g2, 7, %g2                     /* IEU0         Group           */
193         andncc          %g3, 0x7, %g3                   /* IEU1                         */
194         fsrc2           %f0, %f2                        /* FPU                          */
195         sub             %g3, 0x10, %g3                  /* IEU0         Group           */
196         sub             %o2, %g6, %o2                   /* IEU1                         */
197         alignaddr       %g1, %g0, %g0                   /* GRU          Group           */
198         add             %g1, %g6, %g1                   /* IEU0         Group           */
199         subcc           %o2, %g3, %o2                   /* IEU1                         */
200         ldda            [%o1 + 0x00] %asi, %f0          /* LSU          Group           */
201         add             %g1, %g3, %g1                   /* IEU0                         */
202         ldda            [%o1 + 0x40] %asi, %f16         /* LSU          Group           */
203         sub             %g6, 0x80, %g6                  /* IEU0                         */
204         ldda            [%o1 + 0x80] %asi, %f32         /* LSU          Group           */
205                                                         /* Clk1         Group 8-(       */
206                                                         /* Clk2         Group 8-(       */
207                                                         /* Clk3         Group 8-(       */
208                                                         /* Clk4         Group 8-(       */
209 203:    rd              %pc, %g5                        /* PDU          Group 8-(       */
210         addcc           %g5, %lo(300f - 203b), %g5      /* IEU1         Group           */
211         sll             %g2, 9, %g2                     /* IEU0                         */
212         jmpl            %g5 + %g2, %g0                  /* CTI          Group brk forced*/
213          addcc          %o1, 0xc0, %o1                  /* IEU1         Group           */
215         .align          512             /* OK, here comes the fun part... */
216 300:    FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  LOOP_CHUNK1(o1, o0, g6, 301f)
217         FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  LOOP_CHUNK2(o1, o0, g6, 302f)
218         FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   LOOP_CHUNK3(o1, o0, g6, 303f)
219         b,pt            %xcc, 300b+4; faligndata %f0, %f2, %f48
220 301:    FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  STORE_SYNC(o0, f48) membar #Sync
221         FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   STORE_JUMP(o0, f48, 400f) membar #Sync
222 302:    FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   STORE_SYNC(o0, f48) membar #Sync
223         FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  STORE_JUMP(o0, f48, 416f) membar #Sync
224 303:    FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  STORE_SYNC(o0, f48) membar #Sync
225         FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  STORE_JUMP(o0, f48, 432f) membar #Sync
226         VISLOOP_PAD
227 310:    FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  LOOP_CHUNK1(o1, o0, g6, 311f)
228         FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  LOOP_CHUNK2(o1, o0, g6, 312f)
229         FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   LOOP_CHUNK3(o1, o0, g6, 313f)
230         b,pt            %xcc, 310b+4; faligndata %f2, %f4, %f48
231 311:    FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  STORE_SYNC(o0, f48) membar #Sync
232         FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   STORE_JUMP(o0, f48, 402f) membar #Sync
233 312:    FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   STORE_SYNC(o0, f48) membar #Sync
234         FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  STORE_JUMP(o0, f48, 418f) membar #Sync
235 313:    FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  STORE_SYNC(o0, f48) membar #Sync
236         FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  STORE_JUMP(o0, f48, 434f) membar #Sync
237         VISLOOP_PAD
238 320:    FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  LOOP_CHUNK1(o1, o0, g6, 321f)
239         FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  LOOP_CHUNK2(o1, o0, g6, 322f)
240         FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   LOOP_CHUNK3(o1, o0, g6, 323f)
241         b,pt            %xcc, 320b+4; faligndata %f4, %f6, %f48
242 321:    FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  STORE_SYNC(o0, f48) membar #Sync
243         FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   STORE_JUMP(o0, f48, 404f) membar #Sync
244 322:    FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   STORE_SYNC(o0, f48) membar #Sync
245         FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  STORE_JUMP(o0, f48, 420f) membar #Sync
246 323:    FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  STORE_SYNC(o0, f48) membar #Sync
247         FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  STORE_JUMP(o0, f48, 436f) membar #Sync
248         VISLOOP_PAD
249 330:    FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  LOOP_CHUNK1(o1, o0, g6, 331f)
250         FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  LOOP_CHUNK2(o1, o0, g6, 332f)
251         FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   LOOP_CHUNK3(o1, o0, g6, 333f)
252         b,pt            %xcc, 330b+4; faligndata %f6, %f8, %f48
253 331:    FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  STORE_SYNC(o0, f48) membar #Sync
254         FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   STORE_JUMP(o0, f48, 406f) membar #Sync
255 332:    FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   STORE_SYNC(o0, f48) membar #Sync
256         FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  STORE_JUMP(o0, f48, 422f) membar #Sync
257 333:    FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  STORE_SYNC(o0, f48) membar #Sync
258         FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  STORE_JUMP(o0, f48, 438f) membar #Sync
259         VISLOOP_PAD
260 340:    FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  LOOP_CHUNK1(o1, o0, g6, 341f)
261         FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  LOOP_CHUNK2(o1, o0, g6, 342f)
262         FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   LOOP_CHUNK3(o1, o0, g6, 343f)
263         b,pt            %xcc, 340b+4; faligndata %f8, %f10, %f48
264 341:    FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  STORE_SYNC(o0, f48) membar #Sync
265         FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   STORE_JUMP(o0, f48, 408f) membar #Sync
266 342:    FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   STORE_SYNC(o0, f48) membar #Sync
267         FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  STORE_JUMP(o0, f48, 424f) membar #Sync
268 343:    FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  STORE_SYNC(o0, f48) membar #Sync
269         FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  STORE_JUMP(o0, f48, 440f) membar #Sync
270         VISLOOP_PAD
271 350:    FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  LOOP_CHUNK1(o1, o0, g6, 351f)
272         FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  LOOP_CHUNK2(o1, o0, g6, 352f)
273         FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  LOOP_CHUNK3(o1, o0, g6, 353f)
274         b,pt            %xcc, 350b+4; faligndata %f10, %f12, %f48
275 351:    FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  STORE_SYNC(o0, f48) membar #Sync
276         FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  STORE_JUMP(o0, f48, 410f) membar #Sync
277 352:    FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  STORE_SYNC(o0, f48) membar #Sync
278         FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  STORE_JUMP(o0, f48, 426f) membar #Sync
279 353:    FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  STORE_SYNC(o0, f48) membar #Sync
280         FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  STORE_JUMP(o0, f48, 442f) membar #Sync
281         VISLOOP_PAD
282 360:    FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  LOOP_CHUNK1(o1, o0, g6, 361f)
283         FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  LOOP_CHUNK2(o1, o0, g6, 362f)
284         FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  LOOP_CHUNK3(o1, o0, g6, 363f)
285         b,pt            %xcc, 360b+4; faligndata %f12, %f14, %f48
286 361:    FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  STORE_SYNC(o0, f48) membar #Sync
287         FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  STORE_JUMP(o0, f48, 412f) membar #Sync
288 362:    FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  STORE_SYNC(o0, f48) membar #Sync
289         FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  STORE_JUMP(o0, f48, 428f) membar #Sync
290 363:    FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  STORE_SYNC(o0, f48) membar #Sync
291         FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  STORE_JUMP(o0, f48, 444f) membar #Sync
292         VISLOOP_PAD
293 370:    FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  LOOP_CHUNK1(o1, o0, g6, 371f)
294         FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  LOOP_CHUNK2(o1, o0, g6, 372f)
295         FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  LOOP_CHUNK3(o1, o0, g6, 373f)
296         b,pt            %xcc, 370b+4; faligndata %f14, %f16, %f48
297 371:    FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  STORE_SYNC(o0, f48) membar #Sync
298         FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  STORE_JUMP(o0, f48, 414f) membar #Sync
299 372:    FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  STORE_SYNC(o0, f48) membar #Sync
300         FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  STORE_JUMP(o0, f48, 430f) membar #Sync
301 373:    FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  STORE_SYNC(o0, f48) membar #Sync
302         FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  STORE_JUMP(o0, f48, 446f) membar #Sync
303         VISLOOP_PAD
304 400:    FINISH_VISCHUNK(o0, f0,  f2,  g3)
305 402:    FINISH_VISCHUNK(o0, f2,  f4,  g3)
306 404:    FINISH_VISCHUNK(o0, f4,  f6,  g3)
307 406:    FINISH_VISCHUNK(o0, f6,  f8,  g3)
308 408:    FINISH_VISCHUNK(o0, f8,  f10, g3)
309 410:    FINISH_VISCHUNK(o0, f10, f12, g3)
310 412:    FINISH_VISCHUNK(o0, f12, f14, g3)
311 414:    UNEVEN_VISCHUNK(o0, f14, f0,  g3)
312 416:    FINISH_VISCHUNK(o0, f16, f18, g3)
313 418:    FINISH_VISCHUNK(o0, f18, f20, g3)
314 420:    FINISH_VISCHUNK(o0, f20, f22, g3)
315 422:    FINISH_VISCHUNK(o0, f22, f24, g3)
316 424:    FINISH_VISCHUNK(o0, f24, f26, g3)
317 426:    FINISH_VISCHUNK(o0, f26, f28, g3)
318 428:    FINISH_VISCHUNK(o0, f28, f30, g3)
319 430:    UNEVEN_VISCHUNK(o0, f30, f0,  g3)
320 432:    FINISH_VISCHUNK(o0, f32, f34, g3)
321 434:    FINISH_VISCHUNK(o0, f34, f36, g3)
322 436:    FINISH_VISCHUNK(o0, f36, f38, g3)
323 438:    FINISH_VISCHUNK(o0, f38, f40, g3)
324 440:    FINISH_VISCHUNK(o0, f40, f42, g3)
325 442:    FINISH_VISCHUNK(o0, f42, f44, g3)
326 444:    FINISH_VISCHUNK(o0, f44, f46, g3)
327 446:    UNEVEN_VISCHUNK(o0, f46, f0,  g3)
328 204:    ldd             [%o1], %f2                      /* Load         Group           */
329         add             %o1, 8, %o1                     /* IEU0                         */
330         subcc           %g3, 8, %g3                     /* IEU1                         */
331         faligndata      %f0, %f2, %f8                   /* GRU          Group           */
332         std             %f8, [%o0]                      /* Store                        */
333         bl,pn           %xcc, 205f                      /* CTI                          */
334          add            %o0, 8, %o0                     /* IEU0         Group           */
335         ldd             [%o1], %f0                      /* Load         Group           */
336         add             %o1, 8, %o1                     /* IEU0                         */
337         subcc           %g3, 8, %g3                     /* IEU1                         */
338         faligndata      %f2, %f0, %f8                   /* GRU          Group           */
339         std             %f8, [%o0]                      /* Store                        */
340         bge,pt          %xcc, 204b                      /* CTI                          */
341          add            %o0, 8, %o0                     /* IEU0         Group           */
342 205:    brz,pt          %o2, 207f                       /* CTI          Group           */
343          mov            %g1, %o1                        /* IEU0                         */
344 206:    ldub            [%o1], %g5                      /* LOAD                         */
345         add             %o1, 1, %o1                     /* IEU0                         */
346         add             %o0, 1, %o0                     /* IEU1                         */
347         subcc           %o2, 1, %o2                     /* IEU1                         */
348         bne,pt          %xcc, 206b                      /* CTI                          */
349          stb            %g5, [%o0 - 1]                  /* Store        Group           */
350 207:    membar          #StoreLoad | #StoreStore        /* LSU          Group           */
351         wr              %g0, FPRS_FEF, %fprs
352         retl
353          mov            %g4, %o0
355 208:    andcc           %o2, 1, %g0                     /* IEU1         Group           */
356         be,pt           %icc, 2f+4                      /* CTI                          */
357 1:       ldub           [%o1], %g5                      /* LOAD         Group           */
358         add             %o1, 1, %o1                     /* IEU0                         */
359         add             %o0, 1, %o0                     /* IEU1                         */
360         subcc           %o2, 1, %o2                     /* IEU1         Group           */
361         be,pn           %xcc, 209f                      /* CTI                          */
362          stb            %g5, [%o0 - 1]                  /* Store                        */
363 2:      ldub            [%o1], %g5                      /* LOAD         Group           */
364         add             %o0, 2, %o0                     /* IEU0                         */
365         ldub            [%o1 + 1], %o5                  /* LOAD         Group           */
366         add             %o1, 2, %o1                     /* IEU0                         */
367         subcc           %o2, 2, %o2                     /* IEU1         Group           */
368         stb             %g5, [%o0 - 2]                  /* Store                        */
369         bne,pt          %xcc, 2b                        /* CTI                          */
370          stb            %o5, [%o0 - 1]                  /* Store                        */
371 209:    retl
372          mov            %g4, %o0
373 END(__memcpy_large)
375 ENTRY(__mempcpy)
376         ba,pt           %xcc, 210f
377          add            %o0, %o2, %g4
378 END(__mempcpy)
380         .align          32
381 ENTRY(memcpy)
382          mov            %o0, %g4                        /* IEU0         Group           */
383 210:
384 #ifndef USE_BPR
385         srl             %o2, 0, %o2                     /* IEU1                         */
386 #endif
387         brz,pn          %o2, 209b                       /* CTI          Group           */
388 218:     cmp            %o2, 15                         /* IEU1                         */
389         bleu,pn         %xcc, 208b                      /* CTI          Group           */
390          cmp            %o2, (64 * 6)                   /* IEU1                         */
391         bgeu,pn         %xcc, 200b                      /* CTI          Group           */
392          andcc          %o0, 7, %g2                     /* IEU1                         */
393         sub             %o0, %o1, %g5                   /* IEU0                         */
394         andcc           %g5, 3, %o5                     /* IEU1         Group           */
395         bne,pn          %xcc, 212f                      /* CTI                          */
396          andcc          %o1, 3, %g0                     /* IEU1         Group           */
397         be,a,pt         %xcc, 216f                      /* CTI                          */
398          andcc          %o1, 4, %g0                     /* IEU1         Group           */
399         andcc           %o1, 1, %g0                     /* IEU1         Group           */
400         be,pn           %xcc, 4f                        /* CTI                          */
401          andcc          %o1, 2, %g0                     /* IEU1         Group           */
402         ldub            [%o1], %g2                      /* Load         Group           */
403         add             %o1, 1, %o1                     /* IEU0                         */
404         add             %o0, 1, %o0                     /* IEU1                         */
405         sub             %o2, 1, %o2                     /* IEU0         Group           */
406         bne,pn          %xcc, 5f                        /* CTI          Group           */
407          stb            %g2, [%o0 - 1]                  /* Store                        */
408 4:      lduh            [%o1], %g2                      /* Load         Group           */
409         add             %o1, 2, %o1                     /* IEU0                         */
410         add             %o0, 2, %o0                     /* IEU1                         */
411         sub             %o2, 2, %o2                     /* IEU0                         */
412         sth             %g2, [%o0 - 2]                  /* Store        Group + bubble  */
413 5:      andcc           %o1, 4, %g0                     /* IEU1                         */
414 216:    be,a,pn         %xcc, 2f                        /* CTI                          */
415          andcc          %o2, -128, %g6                  /* IEU1         Group           */
416         lduw            [%o1], %g5                      /* Load         Group           */
417         add             %o1, 4, %o1                     /* IEU0                         */
418         add             %o0, 4, %o0                     /* IEU1                         */
419         sub             %o2, 4, %o2                     /* IEU0         Group           */
420         stw             %g5, [%o0 - 4]                  /* Store                        */
421         andcc           %o2, -128, %g6                  /* IEU1         Group           */
422 2:      be,pn           %xcc, 215f                      /* CTI                          */
423          andcc          %o0, 4, %g0                     /* IEU1         Group           */
424         be,pn           %xcc, 82f + 4                   /* CTI          Group           */
425 5:      MOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
426         MOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
427         MOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
428         MOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
429 35:     subcc           %g6, 128, %g6                   /* IEU1         Group           */
430         add             %o1, 128, %o1                   /* IEU0                         */
431         bne,pt          %xcc, 5b                        /* CTI                          */
432          add            %o0, 128, %o0                   /* IEU0         Group           */
433 215:    andcc           %o2, 0x70, %g6                  /* IEU1         Group           */
434 41:     be,pn           %xcc, 80f                       /* CTI                          */
435          andcc          %o2, 8, %g0                     /* IEU1         Group           */
436                                                         /* Clk1 8-(                     */
437                                                         /* Clk2 8-(                     */
438                                                         /* Clk3 8-(                     */
439                                                         /* Clk4 8-(                     */
440 79:     rd              %pc, %o5                        /* PDU          Group           */
441         sll             %g6, 1, %g5                     /* IEU0         Group           */
442         add             %o1, %g6, %o1                   /* IEU1                         */
443         sub             %o5, %g5, %o5                   /* IEU0         Group           */
444         jmpl            %o5 + %lo(80f - 79b), %g0       /* CTI          Group brk forced*/
445          add            %o0, %g6, %o0                   /* IEU0         Group           */
446 36:     MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
447         MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
448         MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
449         MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
450         MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
451         MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
452         MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
453 80:     be,pt           %xcc, 81f                       /* CTI                          */
454          andcc          %o2, 4, %g0                     /* IEU1                         */
455         ldx             [%o1], %g2                      /* Load         Group           */
456         add             %o0, 8, %o0                     /* IEU0                         */
457         stw             %g2, [%o0 - 0x4]                /* Store        Group           */
458         add             %o1, 8, %o1                     /* IEU1                         */
459         srlx            %g2, 32, %g2                    /* IEU0         Group           */
460         stw             %g2, [%o0 - 0x8]                /* Store                        */
461 81:     be,pt           %xcc, 1f                        /* CTI                          */
462          andcc          %o2, 2, %g0                     /* IEU1         Group           */
463         lduw            [%o1], %g2                      /* Load         Group           */
464         add             %o1, 4, %o1                     /* IEU0                         */
465         stw             %g2, [%o0]                      /* Store        Group           */
466         add             %o0, 4, %o0                     /* IEU0                         */
467 1:      be,pt           %xcc, 1f                        /* CTI                          */
468          andcc          %o2, 1, %g0                     /* IEU1         Group           */
469         lduh            [%o1], %g2                      /* Load         Group           */
470         add             %o1, 2, %o1                     /* IEU0                         */
471         sth             %g2, [%o0]                      /* Store        Group           */
472         add             %o0, 2, %o0                     /* IEU0                         */
473 1:      be,pt           %xcc, 211f                      /* CTI                          */
474          nop                                            /* IEU1                         */
475         ldub            [%o1], %g2                      /* Load         Group           */
476         stb             %g2, [%o0]                      /* Store        Group + bubble  */
477 211:    retl
478          mov            %g4, %o0
480 82:     MOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
481         MOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
482 37:     subcc           %g6, 128, %g6                   /* IEU1         Group           */
483         add             %o1, 128, %o1                   /* IEU0                         */
484         bne,pt          %xcc, 82b                       /* CTI                          */
485          add            %o0, 128, %o0                   /* IEU0         Group           */
486         andcc           %o2, 0x70, %g6                  /* IEU1                         */
487         be,pn           %xcc, 84f                       /* CTI                          */
488          andcc          %o2, 8, %g0                     /* IEU1         Group           */
489                                                         /* Clk1 8-(                     */
490                                                         /* Clk2 8-(                     */
491                                                         /* Clk3 8-(                     */
492                                                         /* Clk4 8-(                     */
493 83:     rd              %pc, %o5                        /* PDU          Group           */
494         add             %o1, %g6, %o1                   /* IEU0         Group           */
495         sub             %o5, %g6, %o5                   /* IEU1                         */
496         jmpl            %o5 + %lo(84f - 83b), %g0       /* CTI          Group brk forced*/
497          add            %o0, %g6, %o0                   /* IEU0         Group           */
498 38:     MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
499         MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
500         MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
501         MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
502         MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
503         MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
504         MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
505 84:     be,pt           %xcc, 85f                       /* CTI          Group           */
506          andcc          %o2, 4, %g0                     /* IEU1                         */
507         ldx             [%o1], %g2                      /* Load         Group           */
508         add             %o0, 8, %o0                     /* IEU0                         */
509         add             %o1, 8, %o1                     /* IEU0         Group           */
510         stx             %g2, [%o0 - 0x8]                /* Store                        */
511 85:     be,pt           %xcc, 1f                        /* CTI                          */
512          andcc          %o2, 2, %g0                     /* IEU1         Group           */
513         lduw            [%o1], %g2                      /* Load         Group           */
514         add             %o0, 4, %o0                     /* IEU0                         */
515         add             %o1, 4, %o1                     /* IEU0         Group           */
516         stw             %g2, [%o0 - 0x4]                /* Store                        */
517 1:      be,pt           %xcc, 1f                        /* CTI                          */
518          andcc          %o2, 1, %g0                     /* IEU1         Group           */
519         lduh            [%o1], %g2                      /* Load         Group           */
520         add             %o0, 2, %o0                     /* IEU0                         */
521         add             %o1, 2, %o1                     /* IEU0         Group           */
522         sth             %g2, [%o0 - 0x2]                /* Store                        */
523 1:      be,pt           %xcc, 1f                        /* CTI                          */
524          nop                                            /* IEU0         Group           */
525         ldub            [%o1], %g2                      /* Load         Group           */
526         stb             %g2, [%o0]                      /* Store        Group + bubble  */
527 1:      retl
528          mov            %g4, %o0
530 212:    brz,pt          %g2, 2f                         /* CTI          Group           */
531          mov            8, %g1                          /* IEU0                         */
532         sub             %g1, %g2, %g2                   /* IEU0         Group           */
533         sub             %o2, %g2, %o2                   /* IEU0         Group           */
534 1:      ldub            [%o1], %g5                      /* Load         Group           */
535         add             %o1, 1, %o1                     /* IEU0                         */
536         add             %o0, 1, %o0                     /* IEU1                         */
537         subcc           %g2, 1, %g2                     /* IEU1         Group           */
538         bne,pt          %xcc, 1b                        /* CTI                          */
539          stb            %g5, [%o0 - 1]                  /* Store                        */
540 2:      andn            %o2, 7, %g5                     /* IEU0         Group           */
541         and             %o2, 7, %o2                     /* IEU1                         */
542         fsrc2           %f0, %f2                        /* FPU                          */
543         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
544         ldd             [%g1], %f4                      /* Load         Group           */
545 1:      ldd             [%g1 + 0x8], %f6                /* Load         Group           */
546         add             %g1, 0x8, %g1                   /* IEU0         Group           */
547         subcc           %g5, 8, %g5                     /* IEU1                         */
548         faligndata      %f4, %f6, %f0                   /* GRU          Group           */
549         std             %f0, [%o0]                      /* Store                        */
550         add             %o1, 8, %o1                     /* IEU0         Group           */
551         be,pn           %xcc, 213f                      /* CTI                          */
552          add            %o0, 8, %o0                     /* IEU1                         */
553         ldd             [%g1 + 0x8], %f4                /* Load         Group           */
554         add             %g1, 8, %g1                     /* IEU0                         */
555         subcc           %g5, 8, %g5                     /* IEU1                         */
556         faligndata      %f6, %f4, %f0                   /* GRU          Group           */
557         std             %f0, [%o0]                      /* Store                        */
558         add             %o1, 8, %o1                     /* IEU0                         */
559         bne,pn          %xcc, 1b                        /* CTI          Group           */
560          add            %o0, 8, %o0                     /* IEU0                         */
561 213:    brz,pn          %o2, 214f                       /* CTI          Group           */
562          nop                                            /* IEU0                         */
563         ldub            [%o1], %g5                      /* LOAD                         */
564         add             %o1, 1, %o1                     /* IEU0                         */
565         add             %o0, 1, %o0                     /* IEU1                         */
566         subcc           %o2, 1, %o2                     /* IEU1                         */
567         bne,pt          %xcc, 206b                      /* CTI                          */
568          stb            %g5, [%o0 - 1]                  /* Store        Group           */
569 214:    wr              %g0, FPRS_FEF, %fprs
570         retl
571          mov            %g4, %o0
572 END(memcpy)
574 libc_hidden_builtin_def (memcpy)
576 libc_hidden_def (__mempcpy)
577 weak_alias (__mempcpy, mempcpy)
578 libc_hidden_builtin_def (mempcpy)