(CFLAGS-tst-align.c): Add -mpreferred-stack-boundary=4.
[glibc.git] / sysdeps / sparc / sparc64 / memcpy.S
blob3742573248d89577ca9ff50f4a6119879900c620
1 /* Copy SIZE bytes from SRC to DEST.
2    For UltraSPARC.
3    Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by David S. Miller (davem@caip.rutgers.edu) and
6                   Jakub Jelinek (jakub@redhat.com).
8    The GNU C Library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public
10    License as published by the Free Software Foundation; either
11    version 2.1 of the License, or (at your option) any later version.
13    The GNU C Library is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16    Lesser General Public License for more details.
18    You should have received a copy of the GNU Lesser General Public
19    License along with the GNU C Library; if not, write to the Free
20    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21    02111-1307 USA.  */
23 #include <sysdep.h>
24 #include <asm/asi.h>
25 #ifndef XCC
26 #define USE_BPR
27         .register       %g2, #scratch
28         .register       %g3, #scratch
29         .register       %g6, #scratch
30 #define XCC     xcc
31 #endif
32 #define FPRS_FEF        4
34 #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9)           \
35         faligndata      %f1, %f2, %f48;                         \
36         faligndata      %f2, %f3, %f50;                         \
37         faligndata      %f3, %f4, %f52;                         \
38         faligndata      %f4, %f5, %f54;                         \
39         faligndata      %f5, %f6, %f56;                         \
40         faligndata      %f6, %f7, %f58;                         \
41         faligndata      %f7, %f8, %f60;                         \
42         faligndata      %f8, %f9, %f62;
44 #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt)    \
45         ldda            [%src] %asi, %fdest;                    \
46         add             %src, 0x40, %src;                       \
47         add             %dest, 0x40, %dest;                     \
48         subcc           %len, 0x40, %len;                       \
49         be,pn           %xcc, jmptgt;                           \
50          stda           %fsrc, [%dest - 0x40] %asi;
52 #define LOOP_CHUNK1(src, dest, len, branch_dest)                \
53         MAIN_LOOP_CHUNK(src, dest, f0,  f48, len, branch_dest)
54 #define LOOP_CHUNK2(src, dest, len, branch_dest)                \
55         MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
56 #define LOOP_CHUNK3(src, dest, len, branch_dest)                \
57         MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
59 #define STORE_SYNC(dest, fsrc)                                  \
60         stda            %fsrc, [%dest] %asi;                    \
61         add             %dest, 0x40, %dest;
63 #define STORE_JUMP(dest, fsrc, target)                          \
64         stda            %fsrc, [%dest] %asi;                    \
65         add             %dest, 0x40, %dest;                     \
66         ba,pt           %xcc, target;
68 #define VISLOOP_PAD nop; nop; nop; nop;                         \
69                     nop; nop; nop; nop;                         \
70                     nop; nop; nop; nop;                         \
71                     nop; nop; nop;
73 #define FINISH_VISCHUNK(dest, f0, f1, left)                     \
74         subcc           %left, 8, %left;                        \
75         bl,pn           %xcc, 205f;                             \
76          faligndata     %f0, %f1, %f48;                         \
77         std             %f48, [%dest];                          \
78         add             %dest, 8, %dest;
80 #define UNEVEN_VISCHUNK(dest, f0, f1, left)                     \
81         subcc           %left, 8, %left;                        \
82         bl,pn           %xcc, 205f;                             \
83          fsrc1          %f0, %f1;                               \
84         ba,a,pt         %xcc, 204f;
86         /* Macros for non-VIS memcpy code. */
87 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)         \
88         ldx             [%src + offset + 0x00], %t0;            \
89         ldx             [%src + offset + 0x08], %t1;            \
90         ldx             [%src + offset + 0x10], %t2;            \
91         ldx             [%src + offset + 0x18], %t3;            \
92         stw             %t0, [%dst + offset + 0x04];            \
93         srlx            %t0, 32, %t0;                           \
94         stw             %t0, [%dst + offset + 0x00];            \
95         stw             %t1, [%dst + offset + 0x0c];            \
96         srlx            %t1, 32, %t1;                           \
97         stw             %t1, [%dst + offset + 0x08];            \
98         stw             %t2, [%dst + offset + 0x14];            \
99         srlx            %t2, 32, %t2;                           \
100         stw             %t2, [%dst + offset + 0x10];            \
101         stw             %t3, [%dst + offset + 0x1c];            \
102         srlx            %t3, 32, %t3;                           \
103         stw             %t3, [%dst + offset + 0x18];
105 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)    \
106         ldx             [%src + offset + 0x00], %t0;            \
107         ldx             [%src + offset + 0x08], %t1;            \
108         ldx             [%src + offset + 0x10], %t2;            \
109         ldx             [%src + offset + 0x18], %t3;            \
110         stx             %t0, [%dst + offset + 0x00];            \
111         stx             %t1, [%dst + offset + 0x08];            \
112         stx             %t2, [%dst + offset + 0x10];            \
113         stx             %t3, [%dst + offset + 0x18];            \
114         ldx             [%src + offset + 0x20], %t0;            \
115         ldx             [%src + offset + 0x28], %t1;            \
116         ldx             [%src + offset + 0x30], %t2;            \
117         ldx             [%src + offset + 0x38], %t3;            \
118         stx             %t0, [%dst + offset + 0x20];            \
119         stx             %t1, [%dst + offset + 0x28];            \
120         stx             %t2, [%dst + offset + 0x30];            \
121         stx             %t3, [%dst + offset + 0x38];
123 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)        \
124         ldx             [%src - offset - 0x10], %t0;            \
125         ldx             [%src - offset - 0x08], %t1;            \
126         stw             %t0, [%dst - offset - 0x0c];            \
127         srlx            %t0, 32, %t2;                           \
128         stw             %t2, [%dst - offset - 0x10];            \
129         stw             %t1, [%dst - offset - 0x04];            \
130         srlx            %t1, 32, %t3;                           \
131         stw             %t3, [%dst - offset - 0x08];
133 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)           \
134         ldx             [%src - offset - 0x10], %t0;            \
135         ldx             [%src - offset - 0x08], %t1;            \
136         stx             %t0, [%dst - offset - 0x10];            \
137         stx             %t1, [%dst - offset - 0x08];
139         /* Macros for non-VIS memmove code. */
140 #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)        \
141         ldx             [%src - offset - 0x20], %t0;            \
142         ldx             [%src - offset - 0x18], %t1;            \
143         ldx             [%src - offset - 0x10], %t2;            \
144         ldx             [%src - offset - 0x08], %t3;            \
145         stw             %t0, [%dst - offset - 0x1c];            \
146         srlx            %t0, 32, %t0;                           \
147         stw             %t0, [%dst - offset - 0x20];            \
148         stw             %t1, [%dst - offset - 0x14];            \
149         srlx            %t1, 32, %t1;                           \
150         stw             %t1, [%dst - offset - 0x18];            \
151         stw             %t2, [%dst - offset - 0x0c];            \
152         srlx            %t2, 32, %t2;                           \
153         stw             %t2, [%dst - offset - 0x10];            \
154         stw             %t3, [%dst - offset - 0x04];            \
155         srlx            %t3, 32, %t3;                           \
156         stw             %t3, [%dst - offset - 0x08];
158 #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)   \
159         ldx             [%src - offset - 0x20], %t0;            \
160         ldx             [%src - offset - 0x18], %t1;            \
161         ldx             [%src - offset - 0x10], %t2;            \
162         ldx             [%src - offset - 0x08], %t3;            \
163         stx             %t0, [%dst - offset - 0x20];            \
164         stx             %t1, [%dst - offset - 0x18];            \
165         stx             %t2, [%dst - offset - 0x10];            \
166         stx             %t3, [%dst - offset - 0x08];            \
167         ldx             [%src - offset - 0x40], %t0;            \
168         ldx             [%src - offset - 0x38], %t1;            \
169         ldx             [%src - offset - 0x30], %t2;            \
170         ldx             [%src - offset - 0x28], %t3;            \
171         stx             %t0, [%dst - offset - 0x40];            \
172         stx             %t1, [%dst - offset - 0x38];            \
173         stx             %t2, [%dst - offset - 0x30];            \
174         stx             %t3, [%dst - offset - 0x28];
176 #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)       \
177         ldx             [%src + offset + 0x00], %t0;            \
178         ldx             [%src + offset + 0x08], %t1;            \
179         stw             %t0, [%dst + offset + 0x04];            \
180         srlx            %t0, 32, %t2;                           \
181         stw             %t2, [%dst + offset + 0x00];            \
182         stw             %t1, [%dst + offset + 0x0c];            \
183         srlx            %t1, 32, %t3;                           \
184         stw             %t3, [%dst + offset + 0x08];
186 #define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)          \
187         ldx             [%src + offset + 0x00], %t0;            \
188         ldx             [%src + offset + 0x08], %t1;            \
189         stx             %t0, [%dst + offset + 0x00];            \
190         stx             %t1, [%dst + offset + 0x08];
192         .text
193         .align          32
195 ENTRY(bcopy)
196         sub             %o1, %o0, %o4                   /* IEU0         Group           */
197         mov             %o0, %g3                        /* IEU1                         */
198         cmp             %o4, %o2                        /* IEU1         Group           */
199         mov             %o1, %o0                        /* IEU0                         */
200         bgeu,pt         %XCC, 210f                      /* CTI                          */
201          mov            %g3, %o1                        /* IEU0         Group           */
202 #ifndef USE_BPR
203         srl             %o2, 0, %o2                     /* IEU1                         */
204 #endif
205         brnz,pn         %o2, 220f                       /* CTI          Group           */
206          add            %o0, %o2, %o0                   /* IEU0                         */
207         retl
208          nop
209 END(bcopy)
211         .align          32
212 200:    be,pt           %xcc, 201f                      /* CTI                          */
213          andcc          %o0, 0x38, %g5                  /* IEU1         Group           */
214         mov             8, %g1                          /* IEU0                         */
215         sub             %g1, %g2, %g2                   /* IEU0         Group           */
216         andcc           %o0, 1, %g0                     /* IEU1                         */
217         be,pt           %icc, 2f                        /* CTI                          */
218          sub            %o2, %g2, %o2                   /* IEU0         Group           */
219 1:      ldub            [%o1], %o5                      /* Load         Group           */
220         add             %o1, 1, %o1                     /* IEU0                         */
221         add             %o0, 1, %o0                     /* IEU1                         */
222         subcc           %g2, 1, %g2                     /* IEU1         Group           */
223         be,pn           %xcc, 3f                        /* CTI                          */
224          stb            %o5, [%o0 - 1]                  /* Store                        */
225 2:      ldub            [%o1], %o5                      /* Load         Group           */
226         add             %o0, 2, %o0                     /* IEU0                         */
227         ldub            [%o1 + 1], %g3                  /* Load         Group           */
228         subcc           %g2, 2, %g2                     /* IEU1         Group           */
229         stb             %o5, [%o0 - 2]                  /* Store                        */
230         add             %o1, 2, %o1                     /* IEU0                         */
231         bne,pt          %xcc, 2b                        /* CTI          Group           */
232          stb            %g3, [%o0 - 1]                  /* Store                        */
233 3:      andcc           %o0, 0x38, %g5                  /* IEU1         Group           */
234 201:    be,pt           %icc, 202f                      /* CTI                          */
235          mov            64, %g1                         /* IEU0                         */
236         fmovd           %f0, %f2                        /* FPU                          */
237         sub             %g1, %g5, %g5                   /* IEU0         Group           */
238         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
239         ldd             [%g1], %f4                      /* Load         Group           */
240         sub             %o2, %g5, %o2                   /* IEU0                         */
241 1:      ldd             [%g1 + 0x8], %f6                /* Load         Group           */
242         add             %g1, 0x8, %g1                   /* IEU0         Group           */
243         subcc           %g5, 8, %g5                     /* IEU1                         */
244         faligndata      %f4, %f6, %f0                   /* GRU          Group           */
245         std             %f0, [%o0]                      /* Store                        */
246         add             %o1, 8, %o1                     /* IEU0         Group           */
247         be,pn           %xcc, 202f                      /* CTI                          */
248          add            %o0, 8, %o0                     /* IEU1                         */
249         ldd             [%g1 + 0x8], %f4                /* Load         Group           */
250         add             %g1, 8, %g1                     /* IEU0                         */
251         subcc           %g5, 8, %g5                     /* IEU1                         */
252         faligndata      %f6, %f4, %f0                   /* GRU          Group           */
253         std             %f0, [%o0]                      /* Store                        */
254         add             %o1, 8, %o1                     /* IEU0                         */
255         bne,pt          %xcc, 1b                        /* CTI          Group           */
256          add            %o0, 8, %o0                     /* IEU0                         */
257 202:    membar    #LoadStore | #StoreStore | #StoreLoad /* LSU          Group           */
258         wr              %g0, ASI_BLK_P, %asi            /* LSU          Group           */
259         subcc           %o2, 0x40, %g6                  /* IEU1         Group           */
260         mov             %o1, %g1                        /* IEU0                         */
261         andncc          %g6, (0x40 - 1), %g6            /* IEU1         Group           */
262         srl             %g1, 3, %g2                     /* IEU0                         */
263         sub             %o2, %g6, %g3                   /* IEU0         Group           */
264         andn            %o1, (0x40 - 1), %o1            /* IEU1                         */
265         and             %g2, 7, %g2                     /* IEU0         Group           */
266         andncc          %g3, 0x7, %g3                   /* IEU1                         */
267         fmovd           %f0, %f2                        /* FPU                          */
268         sub             %g3, 0x10, %g3                  /* IEU0         Group           */
269         sub             %o2, %g6, %o2                   /* IEU1                         */
270         alignaddr       %g1, %g0, %g0                   /* GRU          Group           */
271         add             %g1, %g6, %g1                   /* IEU0         Group           */
272         subcc           %o2, %g3, %o2                   /* IEU1                         */
273         ldda            [%o1 + 0x00] %asi, %f0          /* LSU          Group           */
274         add             %g1, %g3, %g1                   /* IEU0                         */
275         ldda            [%o1 + 0x40] %asi, %f16         /* LSU          Group           */
276         sub             %g6, 0x80, %g6                  /* IEU0                         */
277         ldda            [%o1 + 0x80] %asi, %f32         /* LSU          Group           */
278                                                         /* Clk1         Group 8-(       */
279                                                         /* Clk2         Group 8-(       */
280                                                         /* Clk3         Group 8-(       */
281                                                         /* Clk4         Group 8-(       */
282 203:    rd              %pc, %g5                        /* PDU          Group 8-(       */
283         addcc           %g5, %lo(300f - 203b), %g5      /* IEU1         Group           */
284         sll             %g2, 9, %g2                     /* IEU0                         */
285         jmpl            %g5 + %g2, %g0                  /* CTI          Group brk forced*/
286          addcc          %o1, 0xc0, %o1                  /* IEU1         Group           */
288         .align          512             /* OK, here comes the fun part... */
289 300:    FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  LOOP_CHUNK1(o1, o0, g6, 301f)
290         FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  LOOP_CHUNK2(o1, o0, g6, 302f)
291         FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   LOOP_CHUNK3(o1, o0, g6, 303f)
292         b,pt            %xcc, 300b+4; faligndata %f0, %f2, %f48
293 301:    FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  STORE_SYNC(o0, f48) membar #Sync
294         FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   STORE_JUMP(o0, f48, 400f) membar #Sync
295 302:    FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   STORE_SYNC(o0, f48) membar #Sync
296         FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  STORE_JUMP(o0, f48, 416f) membar #Sync
297 303:    FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  STORE_SYNC(o0, f48) membar #Sync
298         FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  STORE_JUMP(o0, f48, 432f) membar #Sync
299         VISLOOP_PAD
300 310:    FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  LOOP_CHUNK1(o1, o0, g6, 311f)
301         FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  LOOP_CHUNK2(o1, o0, g6, 312f)
302         FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   LOOP_CHUNK3(o1, o0, g6, 313f)
303         b,pt            %xcc, 310b+4; faligndata %f2, %f4, %f48
304 311:    FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  STORE_SYNC(o0, f48) membar #Sync
305         FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   STORE_JUMP(o0, f48, 402f) membar #Sync
306 312:    FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   STORE_SYNC(o0, f48) membar #Sync
307         FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  STORE_JUMP(o0, f48, 418f) membar #Sync
308 313:    FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  STORE_SYNC(o0, f48) membar #Sync
309         FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  STORE_JUMP(o0, f48, 434f) membar #Sync
310         VISLOOP_PAD
311 320:    FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  LOOP_CHUNK1(o1, o0, g6, 321f)
312         FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  LOOP_CHUNK2(o1, o0, g6, 322f)
313         FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   LOOP_CHUNK3(o1, o0, g6, 323f)
314         b,pt            %xcc, 320b+4; faligndata %f4, %f6, %f48
315 321:    FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  STORE_SYNC(o0, f48) membar #Sync
316         FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   STORE_JUMP(o0, f48, 404f) membar #Sync
317 322:    FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   STORE_SYNC(o0, f48) membar #Sync
318         FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  STORE_JUMP(o0, f48, 420f) membar #Sync
319 323:    FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  STORE_SYNC(o0, f48) membar #Sync
320         FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  STORE_JUMP(o0, f48, 436f) membar #Sync
321         VISLOOP_PAD
322 330:    FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  LOOP_CHUNK1(o1, o0, g6, 331f)
323         FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  LOOP_CHUNK2(o1, o0, g6, 332f)
324         FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   LOOP_CHUNK3(o1, o0, g6, 333f)
325         b,pt            %xcc, 330b+4; faligndata %f6, %f8, %f48
326 331:    FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  STORE_SYNC(o0, f48) membar #Sync
327         FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   STORE_JUMP(o0, f48, 406f) membar #Sync
328 332:    FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   STORE_SYNC(o0, f48) membar #Sync
329         FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  STORE_JUMP(o0, f48, 422f) membar #Sync
330 333:    FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  STORE_SYNC(o0, f48) membar #Sync
331         FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  STORE_JUMP(o0, f48, 438f) membar #Sync
332         VISLOOP_PAD
333 340:    FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  LOOP_CHUNK1(o1, o0, g6, 341f)
334         FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  LOOP_CHUNK2(o1, o0, g6, 342f)
335         FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   LOOP_CHUNK3(o1, o0, g6, 343f)
336         b,pt            %xcc, 340b+4; faligndata %f8, %f10, %f48
337 341:    FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  STORE_SYNC(o0, f48) membar #Sync
338         FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   STORE_JUMP(o0, f48, 408f) membar #Sync
339 342:    FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   STORE_SYNC(o0, f48) membar #Sync
340         FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  STORE_JUMP(o0, f48, 424f) membar #Sync
341 343:    FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  STORE_SYNC(o0, f48) membar #Sync
342         FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  STORE_JUMP(o0, f48, 440f) membar #Sync
343         VISLOOP_PAD
344 350:    FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  LOOP_CHUNK1(o1, o0, g6, 351f)
345         FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  LOOP_CHUNK2(o1, o0, g6, 352f)
346         FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  LOOP_CHUNK3(o1, o0, g6, 353f)
347         b,pt            %xcc, 350b+4; faligndata %f10, %f12, %f48
348 351:    FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  STORE_SYNC(o0, f48) membar #Sync
349         FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  STORE_JUMP(o0, f48, 410f) membar #Sync
350 352:    FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  STORE_SYNC(o0, f48) membar #Sync
351         FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  STORE_JUMP(o0, f48, 426f) membar #Sync
352 353:    FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  STORE_SYNC(o0, f48) membar #Sync
353         FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  STORE_JUMP(o0, f48, 442f) membar #Sync
354         VISLOOP_PAD
355 360:    FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  LOOP_CHUNK1(o1, o0, g6, 361f)
356         FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  LOOP_CHUNK2(o1, o0, g6, 362f)
357         FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  LOOP_CHUNK3(o1, o0, g6, 363f)
358         b,pt            %xcc, 360b+4; faligndata %f12, %f14, %f48
359 361:    FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  STORE_SYNC(o0, f48) membar #Sync
360         FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  STORE_JUMP(o0, f48, 412f) membar #Sync
361 362:    FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  STORE_SYNC(o0, f48) membar #Sync
362         FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  STORE_JUMP(o0, f48, 428f) membar #Sync
363 363:    FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  STORE_SYNC(o0, f48) membar #Sync
364         FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  STORE_JUMP(o0, f48, 444f) membar #Sync
365         VISLOOP_PAD
366 370:    FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  LOOP_CHUNK1(o1, o0, g6, 371f)
367         FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  LOOP_CHUNK2(o1, o0, g6, 372f)
368         FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  LOOP_CHUNK3(o1, o0, g6, 373f)
369         b,pt            %xcc, 370b+4; faligndata %f14, %f16, %f48
370 371:    FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  STORE_SYNC(o0, f48) membar #Sync
371         FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  STORE_JUMP(o0, f48, 414f) membar #Sync
372 372:    FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  STORE_SYNC(o0, f48) membar #Sync
373         FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  STORE_JUMP(o0, f48, 430f) membar #Sync
374 373:    FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  STORE_SYNC(o0, f48) membar #Sync
375         FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  STORE_JUMP(o0, f48, 446f) membar #Sync
376         VISLOOP_PAD
377 400:    FINISH_VISCHUNK(o0, f0,  f2,  g3)
378 402:    FINISH_VISCHUNK(o0, f2,  f4,  g3)
379 404:    FINISH_VISCHUNK(o0, f4,  f6,  g3)
380 406:    FINISH_VISCHUNK(o0, f6,  f8,  g3)
381 408:    FINISH_VISCHUNK(o0, f8,  f10, g3)
382 410:    FINISH_VISCHUNK(o0, f10, f12, g3)
383 412:    FINISH_VISCHUNK(o0, f12, f14, g3)
384 414:    UNEVEN_VISCHUNK(o0, f14, f0,  g3)
385 416:    FINISH_VISCHUNK(o0, f16, f18, g3)
386 418:    FINISH_VISCHUNK(o0, f18, f20, g3)
387 420:    FINISH_VISCHUNK(o0, f20, f22, g3)
388 422:    FINISH_VISCHUNK(o0, f22, f24, g3)
389 424:    FINISH_VISCHUNK(o0, f24, f26, g3)
390 426:    FINISH_VISCHUNK(o0, f26, f28, g3)
391 428:    FINISH_VISCHUNK(o0, f28, f30, g3)
392 430:    UNEVEN_VISCHUNK(o0, f30, f0,  g3)
393 432:    FINISH_VISCHUNK(o0, f32, f34, g3)
394 434:    FINISH_VISCHUNK(o0, f34, f36, g3)
395 436:    FINISH_VISCHUNK(o0, f36, f38, g3)
396 438:    FINISH_VISCHUNK(o0, f38, f40, g3)
397 440:    FINISH_VISCHUNK(o0, f40, f42, g3)
398 442:    FINISH_VISCHUNK(o0, f42, f44, g3)
399 444:    FINISH_VISCHUNK(o0, f44, f46, g3)
400 446:    UNEVEN_VISCHUNK(o0, f46, f0,  g3)
401 204:    ldd             [%o1], %f2                      /* Load         Group           */
402         add             %o1, 8, %o1                     /* IEU0                         */
403         subcc           %g3, 8, %g3                     /* IEU1                         */
404         faligndata      %f0, %f2, %f8                   /* GRU          Group           */
405         std             %f8, [%o0]                      /* Store                        */
406         bl,pn           %xcc, 205f                      /* CTI                          */
407          add            %o0, 8, %o0                     /* IEU0         Group           */
408         ldd             [%o1], %f0                      /* Load         Group           */
409         add             %o1, 8, %o1                     /* IEU0                         */
410         subcc           %g3, 8, %g3                     /* IEU1                         */
411         faligndata      %f2, %f0, %f8                   /* GRU          Group           */
412         std             %f8, [%o0]                      /* Store                        */
413         bge,pt          %xcc, 204b                      /* CTI                          */
414          add            %o0, 8, %o0                     /* IEU0         Group           */
415 205:    brz,pt          %o2, 207f                       /* CTI          Group           */
416          mov            %g1, %o1                        /* IEU0                         */
417 206:    ldub            [%o1], %g5                      /* LOAD                         */
418         add             %o1, 1, %o1                     /* IEU0                         */
419         add             %o0, 1, %o0                     /* IEU1                         */
420         subcc           %o2, 1, %o2                     /* IEU1                         */
421         bne,pt          %xcc, 206b                      /* CTI                          */
422          stb            %g5, [%o0 - 1]                  /* Store        Group           */
423 207:    membar          #StoreLoad | #StoreStore        /* LSU          Group           */
424         wr              %g0, FPRS_FEF, %fprs
425         retl
426          mov            %g4, %o0
428 208:    andcc           %o2, 1, %g0                     /* IEU1         Group           */
429         be,pt           %icc, 2f+4                      /* CTI                          */
430 1:       ldub           [%o1], %g5                      /* LOAD         Group           */
431         add             %o1, 1, %o1                     /* IEU0                         */
432         add             %o0, 1, %o0                     /* IEU1                         */
433         subcc           %o2, 1, %o2                     /* IEU1         Group           */
434         be,pn           %xcc, 209f                      /* CTI                          */
435          stb            %g5, [%o0 - 1]                  /* Store                        */
436 2:      ldub            [%o1], %g5                      /* LOAD         Group           */
437         add             %o0, 2, %o0                     /* IEU0                         */
438         ldub            [%o1 + 1], %o5                  /* LOAD         Group           */
439         add             %o1, 2, %o1                     /* IEU0                         */
440         subcc           %o2, 2, %o2                     /* IEU1         Group           */
441         stb             %g5, [%o0 - 2]                  /* Store                        */
442         bne,pt          %xcc, 2b                        /* CTI                          */
443          stb            %o5, [%o0 - 1]                  /* Store                        */
444 209:    retl
445          mov            %g4, %o0
447 #ifdef USE_BPR
449         /* void *__align_cpy_4(void *dest, void *src, size_t n)
450          * SPARC v9 SYSV ABI
451          * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 3))
452          */
454         .align          32
455 ENTRY(__align_cpy_4)
456         mov             %o0, %g4                        /* IEU0         Group           */
457         cmp             %o2, 15                         /* IEU1                         */
458         bleu,pn         %xcc, 208b                      /* CTI                          */
459          cmp            %o2, (64 * 6)                   /* IEU1         Group           */
460         bgeu,pn         %xcc, 200b                      /* CTI                          */
461          andcc          %o0, 7, %g2                     /* IEU1         Group           */
462         ba,pt           %xcc, 216f                      /* CTI                          */
463          andcc          %o1, 4, %g0                     /* IEU1         Group           */
464 END(__align_cpy_4)
466         /* void *__align_cpy_8(void *dest, void *src, size_t n)
467          * SPARC v9 SYSV ABI
468          * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 7))
469          */
471         .align          32
472 ENTRY(__align_cpy_8)
473         mov             %o0, %g4                        /* IEU0         Group           */
474         cmp             %o2, 15                         /* IEU1                         */
475         bleu,pn         %xcc, 208b                      /* CTI                          */
476          cmp            %o2, (64 * 6)                   /* IEU1         Group           */
477         bgeu,pn         %xcc, 201b                      /* CTI                          */
478          andcc          %o0, 0x38, %g5                  /* IEU1         Group           */
479         andcc           %o2, -128, %g6                  /* IEU1         Group           */
480         bne,a,pt        %xcc, 82f + 4                   /* CTI                          */
481          ldx            [%o1], %g1                      /* Load                         */
482         ba,pt           %xcc, 41f                       /* CTI          Group           */
483          andcc          %o2, 0x70, %g6                  /* IEU1                         */
484 END(__align_cpy_8)
486         /* void *__align_cpy_16(void *dest, void *src, size_t n)
487          * SPARC v9 SYSV ABI
488          * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 15))
489          */
491         .align          32
492 ENTRY(__align_cpy_16)
493         mov             %o0, %g4                        /* IEU0         Group           */
494         cmp             %o2, (64 * 6)                   /* IEU1                         */
495         bgeu,pn         %xcc, 201b                      /* CTI                          */
496          andcc          %o0, 0x38, %g5                  /* IEU1         Group           */
497         andcc           %o2, -128, %g6                  /* IEU1         Group           */
498         bne,a,pt        %xcc, 82f + 4                   /* CTI                          */
499          ldx            [%o1], %g1                      /* Load                         */
500         ba,pt           %xcc, 41f                       /* CTI          Group           */
501          andcc          %o2, 0x70, %g6                  /* IEU1                         */
502 END(__align_cpy_16)
504 #endif
506         .align          32
507 ENTRY(memcpy)
508 210:
509 #ifndef USE_BPR
510         srl             %o2, 0, %o2                     /* IEU1         Group           */
511 #endif  
512         brz,pn          %o2, 209b                       /* CTI          Group           */
513          mov            %o0, %g4                        /* IEU0                         */
514 218:    cmp             %o2, 15                         /* IEU1         Group           */
515         bleu,pn         %xcc, 208b                      /* CTI                          */
516          cmp            %o2, (64 * 6)                   /* IEU1         Group           */
517         bgeu,pn         %xcc, 200b                      /* CTI                          */
518          andcc          %o0, 7, %g2                     /* IEU1         Group           */
519         sub             %o0, %o1, %g5                   /* IEU0                         */
520         andcc           %g5, 3, %o5                     /* IEU1         Group           */
521         bne,pn          %xcc, 212f                      /* CTI                          */
522          andcc          %o1, 3, %g0                     /* IEU1         Group           */
523         be,a,pt         %xcc, 216f                      /* CTI                          */
524          andcc          %o1, 4, %g0                     /* IEU1         Group           */
525         andcc           %o1, 1, %g0                     /* IEU1         Group           */
526         be,pn           %xcc, 4f                        /* CTI                          */
527          andcc          %o1, 2, %g0                     /* IEU1         Group           */
528         ldub            [%o1], %g2                      /* Load         Group           */
529         add             %o1, 1, %o1                     /* IEU0                         */
530         add             %o0, 1, %o0                     /* IEU1                         */
531         sub             %o2, 1, %o2                     /* IEU0         Group           */
532         bne,pn          %xcc, 5f                        /* CTI          Group           */
533          stb            %g2, [%o0 - 1]                  /* Store                        */
534 4:      lduh            [%o1], %g2                      /* Load         Group           */
535         add             %o1, 2, %o1                     /* IEU0                         */
536         add             %o0, 2, %o0                     /* IEU1                         */
537         sub             %o2, 2, %o2                     /* IEU0                         */
538         sth             %g2, [%o0 - 2]                  /* Store        Group + bubble  */
539 5:      andcc           %o1, 4, %g0                     /* IEU1                         */
540 216:    be,a,pn         %xcc, 2f                        /* CTI                          */
541          andcc          %o2, -128, %g6                  /* IEU1         Group           */
542         lduw            [%o1], %g5                      /* Load         Group           */
543         add             %o1, 4, %o1                     /* IEU0                         */
544         add             %o0, 4, %o0                     /* IEU1                         */
545         sub             %o2, 4, %o2                     /* IEU0         Group           */
546         stw             %g5, [%o0 - 4]                  /* Store                        */
547         andcc           %o2, -128, %g6                  /* IEU1         Group           */
548 2:      be,pn           %xcc, 215f                      /* CTI                          */
549          andcc          %o0, 4, %g0                     /* IEU1         Group           */
550         be,pn           %xcc, 82f + 4                   /* CTI          Group           */
551 5:      MOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
552         MOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
553         MOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
554         MOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
555 35:     subcc           %g6, 128, %g6                   /* IEU1         Group           */
556         add             %o1, 128, %o1                   /* IEU0                         */
557         bne,pt          %xcc, 5b                        /* CTI                          */
558          add            %o0, 128, %o0                   /* IEU0         Group           */
559 215:    andcc           %o2, 0x70, %g6                  /* IEU1         Group           */
560 41:     be,pn           %xcc, 80f                       /* CTI                          */
561          andcc          %o2, 8, %g0                     /* IEU1         Group           */
562                                                         /* Clk1 8-(                     */
563                                                         /* Clk2 8-(                     */
564                                                         /* Clk3 8-(                     */
565                                                         /* Clk4 8-(                     */
566 79:     rd              %pc, %o5                        /* PDU          Group           */
567         sll             %g6, 1, %g5                     /* IEU0         Group           */
568         add             %o1, %g6, %o1                   /* IEU1                         */
569         sub             %o5, %g5, %o5                   /* IEU0         Group           */
570         jmpl            %o5 + %lo(80f - 79b), %g0       /* CTI          Group brk forced*/
571          add            %o0, %g6, %o0                   /* IEU0         Group           */
572 36:     MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
573         MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
574         MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
575         MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
576         MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
577         MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
578         MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
579 80:     be,pt           %xcc, 81f                       /* CTI                          */
580          andcc          %o2, 4, %g0                     /* IEU1                         */
581         ldx             [%o1], %g2                      /* Load         Group           */
582         add             %o0, 8, %o0                     /* IEU0                         */
583         stw             %g2, [%o0 - 0x4]                /* Store        Group           */
584         add             %o1, 8, %o1                     /* IEU1                         */
585         srlx            %g2, 32, %g2                    /* IEU0         Group           */
586         stw             %g2, [%o0 - 0x8]                /* Store                        */
587 81:     be,pt           %xcc, 1f                        /* CTI                          */
588          andcc          %o2, 2, %g0                     /* IEU1         Group           */
589         lduw            [%o1], %g2                      /* Load         Group           */
590         add             %o1, 4, %o1                     /* IEU0                         */
591         stw             %g2, [%o0]                      /* Store        Group           */
592         add             %o0, 4, %o0                     /* IEU0                         */
593 1:      be,pt           %xcc, 1f                        /* CTI                          */
594          andcc          %o2, 1, %g0                     /* IEU1         Group           */
595         lduh            [%o1], %g2                      /* Load         Group           */
596         add             %o1, 2, %o1                     /* IEU0                         */
597         sth             %g2, [%o0]                      /* Store        Group           */
598         add             %o0, 2, %o0                     /* IEU0                         */
599 1:      be,pt           %xcc, 211f                      /* CTI                          */
600          nop                                            /* IEU1                         */
601         ldub            [%o1], %g2                      /* Load         Group           */
602         stb             %g2, [%o0]                      /* Store        Group + bubble  */
603 211:    retl
604          mov            %g4, %o0
606 82:     MOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
607         MOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
608 37:     subcc           %g6, 128, %g6                   /* IEU1         Group           */
609         add             %o1, 128, %o1                   /* IEU0                         */
610         bne,pt          %xcc, 82b                       /* CTI                          */
611          add            %o0, 128, %o0                   /* IEU0         Group           */
612         andcc           %o2, 0x70, %g6                  /* IEU1                         */
613         be,pn           %xcc, 84f                       /* CTI                          */
614          andcc          %o2, 8, %g0                     /* IEU1         Group           */
615                                                         /* Clk1 8-(                     */
616                                                         /* Clk2 8-(                     */
617                                                         /* Clk3 8-(                     */
618                                                         /* Clk4 8-(                     */
619 83:     rd              %pc, %o5                        /* PDU          Group           */
620         add             %o1, %g6, %o1                   /* IEU0         Group           */
621         sub             %o5, %g6, %o5                   /* IEU1                         */
622         jmpl            %o5 + %lo(84f - 83b), %g0       /* CTI          Group brk forced*/
623          add            %o0, %g6, %o0                   /* IEU0         Group           */
624 38:     MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
625         MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
626         MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
627         MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
628         MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
629         MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
630         MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
631 84:     be,pt           %xcc, 85f                       /* CTI          Group           */
632          andcc          %o2, 4, %g0                     /* IEU1                         */
633         ldx             [%o1], %g2                      /* Load         Group           */
634         add             %o0, 8, %o0                     /* IEU0                         */
635         add             %o1, 8, %o1                     /* IEU0         Group           */
636         stx             %g2, [%o0 - 0x8]                /* Store                        */
637 85:     be,pt           %xcc, 1f                        /* CTI                          */
638          andcc          %o2, 2, %g0                     /* IEU1         Group           */
639         lduw            [%o1], %g2                      /* Load         Group           */
640         add             %o0, 4, %o0                     /* IEU0                         */
641         add             %o1, 4, %o1                     /* IEU0         Group           */
642         stw             %g2, [%o0 - 0x4]                /* Store                        */
643 1:      be,pt           %xcc, 1f                        /* CTI                          */
644          andcc          %o2, 1, %g0                     /* IEU1         Group           */
645         lduh            [%o1], %g2                      /* Load         Group           */
646         add             %o0, 2, %o0                     /* IEU0                         */
647         add             %o1, 2, %o1                     /* IEU0         Group           */
648         sth             %g2, [%o0 - 0x2]                /* Store                        */
649 1:      be,pt           %xcc, 1f                        /* CTI                          */
650          nop                                            /* IEU0         Group           */
651         ldub            [%o1], %g2                      /* Load         Group           */
652         stb             %g2, [%o0]                      /* Store        Group + bubble  */
653 1:      retl
654          mov            %g4, %o0
656 212:    brz,pt          %g2, 2f                         /* CTI          Group           */
657          mov            8, %g1                          /* IEU0                         */
658         sub             %g1, %g2, %g2                   /* IEU0         Group           */
659         sub             %o2, %g2, %o2                   /* IEU0         Group           */
660 1:      ldub            [%o1], %g5                      /* Load         Group           */
661         add             %o1, 1, %o1                     /* IEU0                         */
662         add             %o0, 1, %o0                     /* IEU1                         */
663         subcc           %g2, 1, %g2                     /* IEU1         Group           */
664         bne,pt          %xcc, 1b                        /* CTI                          */
665          stb            %g5, [%o0 - 1]                  /* Store                        */
666 2:      andn            %o2, 7, %g5                     /* IEU0         Group           */
667         and             %o2, 7, %o2                     /* IEU1                         */
668         fmovd           %f0, %f2                        /* FPU                          */
669         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
670         ldd             [%g1], %f4                      /* Load         Group           */
671 1:      ldd             [%g1 + 0x8], %f6                /* Load         Group           */
672         add             %g1, 0x8, %g1                   /* IEU0         Group           */
673         subcc           %g5, 8, %g5                     /* IEU1                         */
674         faligndata      %f4, %f6, %f0                   /* GRU          Group           */
675         std             %f0, [%o0]                      /* Store                        */
676         add             %o1, 8, %o1                     /* IEU0         Group           */
677         be,pn           %xcc, 213f                      /* CTI                          */
678          add            %o0, 8, %o0                     /* IEU1                         */
679         ldd             [%g1 + 0x8], %f4                /* Load         Group           */
680         add             %g1, 8, %g1                     /* IEU0                         */
681         subcc           %g5, 8, %g5                     /* IEU1                         */
682         faligndata      %f6, %f4, %f0                   /* GRU          Group           */
683         std             %f0, [%o0]                      /* Store                        */
684         add             %o1, 8, %o1                     /* IEU0                         */
685         bne,pn          %xcc, 1b                        /* CTI          Group           */
686          add            %o0, 8, %o0                     /* IEU0                         */
687 213:    brz,pn          %o2, 214f                       /* CTI          Group           */
688          nop                                            /* IEU0                         */
689         ldub            [%o1], %g5                      /* LOAD                         */
690         add             %o1, 1, %o1                     /* IEU0                         */
691         add             %o0, 1, %o0                     /* IEU1                         */
692         subcc           %o2, 1, %o2                     /* IEU1                         */
693         bne,pt          %xcc, 206b                      /* CTI                          */
694          stb            %g5, [%o0 - 1]                  /* Store        Group           */
695 214:    wr              %g0, FPRS_FEF, %fprs
696         retl
697          mov            %g4, %o0
698 END(memcpy)
700         .align          32
701 228:    andcc           %o2, 1, %g0                     /* IEU1         Group           */
702         be,pt           %icc, 2f+4                      /* CTI                          */
703 1:       ldub           [%o1 - 1], %o5                  /* LOAD         Group           */
704         sub             %o1, 1, %o1                     /* IEU0                         */
705         sub             %o0, 1, %o0                     /* IEU1                         */
706         subcc           %o2, 1, %o2                     /* IEU1         Group           */
707         be,pn           %xcc, 229f                      /* CTI                          */
708          stb            %o5, [%o0]                      /* Store                        */
709 2:      ldub            [%o1 - 1], %o5                  /* LOAD         Group           */
710         sub             %o0, 2, %o0                     /* IEU0                         */
711         ldub            [%o1 - 2], %g5                  /* LOAD         Group           */
712         sub             %o1, 2, %o1                     /* IEU0                         */
713         subcc           %o2, 2, %o2                     /* IEU1         Group           */
714         stb             %o5, [%o0 + 1]                  /* Store                        */
715         bne,pt          %xcc, 2b                        /* CTI                          */
716          stb            %g5, [%o0]                      /* Store                        */
717 229:    retl
718          mov            %g4, %o0
719 219:    retl
720          nop
722         .align          32
723 ENTRY(memmove)
724 #ifndef USE_BPR
725         srl             %o2, 0, %o2                     /* IEU1         Group           */
726 #endif
727         brz,pn          %o2, 219b                       /* CTI          Group           */
728          sub            %o0, %o1, %o4                   /* IEU0                         */
729         cmp             %o4, %o2                        /* IEU1         Group           */
730         bgeu,pt         %XCC, 218b                      /* CTI                          */
731          mov            %o0, %g4                        /* IEU0                         */
732         add             %o0, %o2, %o0                   /* IEU0         Group           */
733 220:    add             %o1, %o2, %o1                   /* IEU1                         */
734         cmp             %o2, 15                         /* IEU1         Group           */
735         bleu,pn         %xcc, 228b                      /* CTI                          */
736          andcc          %o0, 7, %g2                     /* IEU1         Group           */
737         sub             %o0, %o1, %g5                   /* IEU0                         */
738         andcc           %g5, 3, %o5                     /* IEU1         Group           */
739         bne,pn          %xcc, 232f                      /* CTI                          */
740          andcc          %o1, 3, %g0                     /* IEU1         Group           */
741         be,a,pt         %xcc, 236f                      /* CTI                          */
742          andcc          %o1, 4, %g0                     /* IEU1         Group           */
743         andcc           %o1, 1, %g0                     /* IEU1         Group           */
744         be,pn           %xcc, 4f                        /* CTI                          */
745          andcc          %o1, 2, %g0                     /* IEU1         Group           */
746         ldub            [%o1 - 1], %g2                  /* Load         Group           */
747         sub             %o1, 1, %o1                     /* IEU0                         */
748         sub             %o0, 1, %o0                     /* IEU1                         */
749         sub             %o2, 1, %o2                     /* IEU0         Group           */
750         be,pn           %xcc, 5f                        /* CTI          Group           */
751          stb            %g2, [%o0]                      /* Store                        */
752 4:      lduh            [%o1 - 2], %g2                  /* Load         Group           */
753         sub             %o1, 2, %o1                     /* IEU0                         */
754         sub             %o0, 2, %o0                     /* IEU1                         */
755         sub             %o2, 2, %o2                     /* IEU0                         */
756         sth             %g2, [%o0]                      /* Store        Group + bubble  */
757 5:      andcc           %o1, 4, %g0                     /* IEU1                         */
758 236:    be,a,pn         %xcc, 2f                        /* CTI                          */
759          andcc          %o2, -128, %g6                  /* IEU1         Group           */
760         lduw            [%o1 - 4], %g5                  /* Load         Group           */
761         sub             %o1, 4, %o1                     /* IEU0                         */
762         sub             %o0, 4, %o0                     /* IEU1                         */
763         sub             %o2, 4, %o2                     /* IEU0         Group           */
764         stw             %g5, [%o0]                      /* Store                        */
765         andcc           %o2, -128, %g6                  /* IEU1         Group           */
766 2:      be,pn           %xcc, 235f                      /* CTI                          */
767          andcc          %o0, 4, %g0                     /* IEU1         Group           */
768         be,pn           %xcc, 282f + 4                  /* CTI          Group           */
769 5:      RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
770         RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
771         RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
772         RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
773         subcc           %g6, 128, %g6                   /* IEU1         Group           */
774         sub             %o1, 128, %o1                   /* IEU0                         */
775         bne,pt          %xcc, 5b                        /* CTI                          */
776          sub            %o0, 128, %o0                   /* IEU0         Group           */
777 235:    andcc           %o2, 0x70, %g6                  /* IEU1         Group           */
778 41:     be,pn           %xcc, 280f                      /* CTI                          */
779          andcc          %o2, 8, %g0                     /* IEU1         Group           */
780                                                         /* Clk1 8-(                     */
781                                                         /* Clk2 8-(                     */
782                                                         /* Clk3 8-(                     */
783                                                         /* Clk4 8-(                     */
784 279:    rd              %pc, %o5                        /* PDU          Group           */
785         sll             %g6, 1, %g5                     /* IEU0         Group           */
786         sub             %o1, %g6, %o1                   /* IEU1                         */
787         sub             %o5, %g5, %o5                   /* IEU0         Group           */
788         jmpl            %o5 + %lo(280f - 279b), %g0     /* CTI          Group brk forced*/
789          sub            %o0, %g6, %o0                   /* IEU0         Group           */
790         RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
791         RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
792         RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
793         RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
794         RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
795         RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
796         RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
797 280:    be,pt           %xcc, 281f                      /* CTI                          */
798          andcc          %o2, 4, %g0                     /* IEU1                         */
799         ldx             [%o1 - 8], %g2                  /* Load         Group           */
800         sub             %o0, 8, %o0                     /* IEU0                         */
801         stw             %g2, [%o0 + 4]                  /* Store        Group           */
802         sub             %o1, 8, %o1                     /* IEU1                         */
803         srlx            %g2, 32, %g2                    /* IEU0         Group           */
804         stw             %g2, [%o0]                      /* Store                        */
805 281:    be,pt           %xcc, 1f                        /* CTI                          */
806          andcc          %o2, 2, %g0                     /* IEU1         Group           */
807         lduw            [%o1 - 4], %g2                  /* Load         Group           */
808         sub             %o1, 4, %o1                     /* IEU0                         */
809         stw             %g2, [%o0 - 4]                  /* Store        Group           */
810         sub             %o0, 4, %o0                     /* IEU0                         */
811 1:      be,pt           %xcc, 1f                        /* CTI                          */
812          andcc          %o2, 1, %g0                     /* IEU1         Group           */
813         lduh            [%o1 - 2], %g2                  /* Load         Group           */
814         sub             %o1, 2, %o1                     /* IEU0                         */
815         sth             %g2, [%o0 - 2]                  /* Store        Group           */
816         sub             %o0, 2, %o0                     /* IEU0                         */
817 1:      be,pt           %xcc, 211f                      /* CTI                          */
818          nop                                            /* IEU1                         */
819         ldub            [%o1 - 1], %g2                  /* Load         Group           */
820         stb             %g2, [%o0 - 1]                  /* Store        Group + bubble  */
821 211:    retl
822          mov            %g4, %o0
824 282:    RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
825         RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
826         subcc           %g6, 128, %g6                   /* IEU1         Group           */
827         sub             %o1, 128, %o1                   /* IEU0                         */
828         bne,pt          %xcc, 282b                      /* CTI                          */
829          sub            %o0, 128, %o0                   /* IEU0         Group           */
830         andcc           %o2, 0x70, %g6                  /* IEU1                         */
831         be,pn           %xcc, 284f                      /* CTI                          */
832          andcc          %o2, 8, %g0                     /* IEU1         Group           */
833                                                         /* Clk1 8-(                     */
834                                                         /* Clk2 8-(                     */
835                                                         /* Clk3 8-(                     */
836                                                         /* Clk4 8-(                     */
837 283:    rd              %pc, %o5                        /* PDU          Group           */
838         sub             %o1, %g6, %o1                   /* IEU0         Group           */
839         sub             %o5, %g6, %o5                   /* IEU1                         */
840         jmpl            %o5 + %lo(284f - 283b), %g0     /* CTI          Group brk forced*/
841          sub            %o0, %g6, %o0                   /* IEU0         Group           */
842         RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
843         RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
844         RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
845         RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
846         RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
847         RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
848         RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
849 284:    be,pt           %xcc, 285f                      /* CTI          Group           */
850          andcc          %o2, 4, %g0                     /* IEU1                         */
851         ldx             [%o1 - 8], %g2                  /* Load         Group           */
852         sub             %o0, 8, %o0                     /* IEU0                         */
853         sub             %o1, 8, %o1                     /* IEU0         Group           */
854         stx             %g2, [%o0]                      /* Store                        */
855 285:    be,pt           %xcc, 1f                        /* CTI                          */
856          andcc          %o2, 2, %g0                     /* IEU1         Group           */
857         lduw            [%o1 - 4], %g2                  /* Load         Group           */
858         sub             %o0, 4, %o0                     /* IEU0                         */
859         sub             %o1, 4, %o1                     /* IEU0         Group           */
860         stw             %g2, [%o0]                      /* Store                        */
861 1:      be,pt           %xcc, 1f                        /* CTI                          */
862          andcc          %o2, 1, %g0                     /* IEU1         Group           */
863         lduh            [%o1 - 2], %g2                  /* Load         Group           */
864         sub             %o0, 2, %o0                     /* IEU0                         */
865         sub             %o1, 2, %o1                     /* IEU0         Group           */
866         sth             %g2, [%o0]                      /* Store                        */
867 1:      be,pt           %xcc, 1f                        /* CTI                          */
868          nop                                            /* IEU0         Group           */
869         ldub            [%o1 - 1], %g2                  /* Load         Group           */
870         stb             %g2, [%o0 - 1]                  /* Store        Group + bubble  */
871 1:      retl
872          mov            %g4, %o0
874 232:    brz,pt          %g2, 2f                         /* CTI          Group           */
875          sub            %o2, %g2, %o2                   /* IEU0         Group           */
876 1:      ldub            [%o1 - 1], %g5                  /* Load         Group           */
877         sub             %o1, 1, %o1                     /* IEU0                         */
878         sub             %o0, 1, %o0                     /* IEU1                         */
879         subcc           %g2, 1, %g2                     /* IEU1         Group           */
880         bne,pt          %xcc, 1b                        /* CTI                          */
881          stb            %g5, [%o0]                      /* Store                        */
882 2:      andn            %o2, 7, %g5                     /* IEU0         Group           */
883         and             %o2, 7, %o2                     /* IEU1                         */
884         fmovd           %f0, %f2                        /* FPU                          */
885         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
886         ldd             [%g1], %f4                      /* Load         Group           */
887 1:      ldd             [%g1 - 8], %f6                  /* Load         Group           */
888         sub             %g1, 8, %g1                     /* IEU0         Group           */
889         subcc           %g5, 8, %g5                     /* IEU1                         */
890         faligndata      %f6, %f4, %f0                   /* GRU          Group           */
891         std             %f0, [%o0 - 8]                  /* Store                        */
892         sub             %o1, 8, %o1                     /* IEU0         Group           */
893         be,pn           %xcc, 233f                      /* CTI                          */
894          sub            %o0, 8, %o0                     /* IEU1                         */
895         ldd             [%g1 - 8], %f4                  /* Load         Group           */
896         sub             %g1, 8, %g1                     /* IEU0                         */
897         subcc           %g5, 8, %g5                     /* IEU1                         */
898         faligndata      %f4, %f6, %f0                   /* GRU          Group           */
899         std             %f0, [%o0 - 8]                  /* Store                        */
900         sub             %o1, 8, %o1                     /* IEU0                         */
901         bne,pn          %xcc, 1b                        /* CTI          Group           */
902          sub            %o0, 8, %o0                     /* IEU0                         */
903 233:    brz,pn          %o2, 234f                       /* CTI          Group           */
904          nop                                            /* IEU0                         */
905 237:    ldub            [%o1 - 1], %g5                  /* LOAD                         */
906         sub             %o1, 1, %o1                     /* IEU0                         */
907         sub             %o0, 1, %o0                     /* IEU1                         */
908         subcc           %o2, 1, %o2                     /* IEU1                         */
909         bne,pt          %xcc, 237b                      /* CTI                          */
910          stb            %g5, [%o0]                      /* Store        Group           */
911 234:    wr              %g0, FPRS_FEF, %fprs
912         retl
913          mov            %g4, %o0
914 END(memmove)
916 #ifdef USE_BPR
917 weak_alias(memcpy, __align_cpy_1)
918 weak_alias(memcpy, __align_cpy_2)
919 #endif
920 libc_hidden_builtin_def (memcpy)
921 libc_hidden_builtin_def (memmove)