2.9
[glibc/nacl-glibc.git] / sysdeps / sparc / sparc64 / memcpy.S
blob59933580174109f255fdd4fe9526d08f55bc4a76
1 /* Copy SIZE bytes from SRC to DEST.
2    For UltraSPARC.
3    Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by David S. Miller (davem@caip.rutgers.edu) and
6                   Jakub Jelinek (jakub@redhat.com).
8    The GNU C Library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public
10    License as published by the Free Software Foundation; either
11    version 2.1 of the License, or (at your option) any later version.
13    The GNU C Library is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16    Lesser General Public License for more details.
18    You should have received a copy of the GNU Lesser General Public
19    License along with the GNU C Library; if not, write to the Free
20    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21    02111-1307 USA.  */
23 #include <sysdep.h>
24 #include <asm/asi.h>
25 #ifndef XCC
26 #define USE_BPR
27         .register       %g2, #scratch
28         .register       %g3, #scratch
29         .register       %g6, #scratch
30 #define XCC     xcc
31 #endif
32 #define FPRS_FEF        4
34 #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9)           \
35         faligndata      %f1, %f2, %f48;                         \
36         faligndata      %f2, %f3, %f50;                         \
37         faligndata      %f3, %f4, %f52;                         \
38         faligndata      %f4, %f5, %f54;                         \
39         faligndata      %f5, %f6, %f56;                         \
40         faligndata      %f6, %f7, %f58;                         \
41         faligndata      %f7, %f8, %f60;                         \
42         faligndata      %f8, %f9, %f62;
44 #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt)    \
45         ldda            [%src] %asi, %fdest;                    \
46         add             %src, 0x40, %src;                       \
47         add             %dest, 0x40, %dest;                     \
48         subcc           %len, 0x40, %len;                       \
49         be,pn           %xcc, jmptgt;                           \
50          stda           %fsrc, [%dest - 0x40] %asi;
52 #define LOOP_CHUNK1(src, dest, len, branch_dest)                \
53         MAIN_LOOP_CHUNK(src, dest, f0,  f48, len, branch_dest)
54 #define LOOP_CHUNK2(src, dest, len, branch_dest)                \
55         MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
56 #define LOOP_CHUNK3(src, dest, len, branch_dest)                \
57         MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
59 #define STORE_SYNC(dest, fsrc)                                  \
60         stda            %fsrc, [%dest] %asi;                    \
61         add             %dest, 0x40, %dest;
63 #define STORE_JUMP(dest, fsrc, target)                          \
64         stda            %fsrc, [%dest] %asi;                    \
65         add             %dest, 0x40, %dest;                     \
66         ba,pt           %xcc, target;
68 #define VISLOOP_PAD nop; nop; nop; nop;                         \
69                     nop; nop; nop; nop;                         \
70                     nop; nop; nop; nop;                         \
71                     nop; nop; nop;
73 #define FINISH_VISCHUNK(dest, f0, f1, left)                     \
74         subcc           %left, 8, %left;                        \
75         bl,pn           %xcc, 205f;                             \
76          faligndata     %f0, %f1, %f48;                         \
77         std             %f48, [%dest];                          \
78         add             %dest, 8, %dest;
80 #define UNEVEN_VISCHUNK(dest, f0, f1, left)                     \
81         subcc           %left, 8, %left;                        \
82         bl,pn           %xcc, 205f;                             \
83          fsrc1          %f0, %f1;                               \
84         ba,a,pt         %xcc, 204f;
86         /* Macros for non-VIS memcpy code. */
87 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)         \
88         ldx             [%src + offset + 0x00], %t0;            \
89         ldx             [%src + offset + 0x08], %t1;            \
90         ldx             [%src + offset + 0x10], %t2;            \
91         ldx             [%src + offset + 0x18], %t3;            \
92         stw             %t0, [%dst + offset + 0x04];            \
93         srlx            %t0, 32, %t0;                           \
94         stw             %t0, [%dst + offset + 0x00];            \
95         stw             %t1, [%dst + offset + 0x0c];            \
96         srlx            %t1, 32, %t1;                           \
97         stw             %t1, [%dst + offset + 0x08];            \
98         stw             %t2, [%dst + offset + 0x14];            \
99         srlx            %t2, 32, %t2;                           \
100         stw             %t2, [%dst + offset + 0x10];            \
101         stw             %t3, [%dst + offset + 0x1c];            \
102         srlx            %t3, 32, %t3;                           \
103         stw             %t3, [%dst + offset + 0x18];
105 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)    \
106         ldx             [%src + offset + 0x00], %t0;            \
107         ldx             [%src + offset + 0x08], %t1;            \
108         ldx             [%src + offset + 0x10], %t2;            \
109         ldx             [%src + offset + 0x18], %t3;            \
110         stx             %t0, [%dst + offset + 0x00];            \
111         stx             %t1, [%dst + offset + 0x08];            \
112         stx             %t2, [%dst + offset + 0x10];            \
113         stx             %t3, [%dst + offset + 0x18];            \
114         ldx             [%src + offset + 0x20], %t0;            \
115         ldx             [%src + offset + 0x28], %t1;            \
116         ldx             [%src + offset + 0x30], %t2;            \
117         ldx             [%src + offset + 0x38], %t3;            \
118         stx             %t0, [%dst + offset + 0x20];            \
119         stx             %t1, [%dst + offset + 0x28];            \
120         stx             %t2, [%dst + offset + 0x30];            \
121         stx             %t3, [%dst + offset + 0x38];
123 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)        \
124         ldx             [%src - offset - 0x10], %t0;            \
125         ldx             [%src - offset - 0x08], %t1;            \
126         stw             %t0, [%dst - offset - 0x0c];            \
127         srlx            %t0, 32, %t2;                           \
128         stw             %t2, [%dst - offset - 0x10];            \
129         stw             %t1, [%dst - offset - 0x04];            \
130         srlx            %t1, 32, %t3;                           \
131         stw             %t3, [%dst - offset - 0x08];
133 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)           \
134         ldx             [%src - offset - 0x10], %t0;            \
135         ldx             [%src - offset - 0x08], %t1;            \
136         stx             %t0, [%dst - offset - 0x10];            \
137         stx             %t1, [%dst - offset - 0x08];
139         /* Macros for non-VIS memmove code. */
140 #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)        \
141         ldx             [%src - offset - 0x20], %t0;            \
142         ldx             [%src - offset - 0x18], %t1;            \
143         ldx             [%src - offset - 0x10], %t2;            \
144         ldx             [%src - offset - 0x08], %t3;            \
145         stw             %t0, [%dst - offset - 0x1c];            \
146         srlx            %t0, 32, %t0;                           \
147         stw             %t0, [%dst - offset - 0x20];            \
148         stw             %t1, [%dst - offset - 0x14];            \
149         srlx            %t1, 32, %t1;                           \
150         stw             %t1, [%dst - offset - 0x18];            \
151         stw             %t2, [%dst - offset - 0x0c];            \
152         srlx            %t2, 32, %t2;                           \
153         stw             %t2, [%dst - offset - 0x10];            \
154         stw             %t3, [%dst - offset - 0x04];            \
155         srlx            %t3, 32, %t3;                           \
156         stw             %t3, [%dst - offset - 0x08];
158 #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)   \
159         ldx             [%src - offset - 0x20], %t0;            \
160         ldx             [%src - offset - 0x18], %t1;            \
161         ldx             [%src - offset - 0x10], %t2;            \
162         ldx             [%src - offset - 0x08], %t3;            \
163         stx             %t0, [%dst - offset - 0x20];            \
164         stx             %t1, [%dst - offset - 0x18];            \
165         stx             %t2, [%dst - offset - 0x10];            \
166         stx             %t3, [%dst - offset - 0x08];            \
167         ldx             [%src - offset - 0x40], %t0;            \
168         ldx             [%src - offset - 0x38], %t1;            \
169         ldx             [%src - offset - 0x30], %t2;            \
170         ldx             [%src - offset - 0x28], %t3;            \
171         stx             %t0, [%dst - offset - 0x40];            \
172         stx             %t1, [%dst - offset - 0x38];            \
173         stx             %t2, [%dst - offset - 0x30];            \
174         stx             %t3, [%dst - offset - 0x28];
176 #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)       \
177         ldx             [%src + offset + 0x00], %t0;            \
178         ldx             [%src + offset + 0x08], %t1;            \
179         stw             %t0, [%dst + offset + 0x04];            \
180         srlx            %t0, 32, %t2;                           \
181         stw             %t2, [%dst + offset + 0x00];            \
182         stw             %t1, [%dst + offset + 0x0c];            \
183         srlx            %t1, 32, %t3;                           \
184         stw             %t3, [%dst + offset + 0x08];
186 #define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)          \
187         ldx             [%src + offset + 0x00], %t0;            \
188         ldx             [%src + offset + 0x08], %t1;            \
189         stx             %t0, [%dst + offset + 0x00];            \
190         stx             %t1, [%dst + offset + 0x08];
192         .text
193         .align          32
195 ENTRY(bcopy)
196         sub             %o1, %o0, %o4                   /* IEU0         Group           */
197         mov             %o0, %g3                        /* IEU1                         */
198         cmp             %o4, %o2                        /* IEU1         Group           */
199         mov             %o1, %o0                        /* IEU0                         */
200         bgeu,pt         %XCC, 210f                      /* CTI                          */
201          mov            %g3, %o1                        /* IEU0         Group           */
202 #ifndef USE_BPR
203         srl             %o2, 0, %o2                     /* IEU1                         */
204 #endif
205         brnz,pn         %o2, 220f                       /* CTI          Group           */
206          add            %o0, %o2, %o0                   /* IEU0                         */
207         retl
208          nop
209 END(bcopy)
211         .align          32
212 ENTRY(__memcpy_large)
213 200:    be,pt           %xcc, 201f                      /* CTI                          */
214          andcc          %o0, 0x38, %g5                  /* IEU1         Group           */
215         mov             8, %g1                          /* IEU0                         */
216         sub             %g1, %g2, %g2                   /* IEU0         Group           */
217         andcc           %o0, 1, %g0                     /* IEU1                         */
218         be,pt           %icc, 2f                        /* CTI                          */
219          sub            %o2, %g2, %o2                   /* IEU0         Group           */
220 1:      ldub            [%o1], %o5                      /* Load         Group           */
221         add             %o1, 1, %o1                     /* IEU0                         */
222         add             %o0, 1, %o0                     /* IEU1                         */
223         subcc           %g2, 1, %g2                     /* IEU1         Group           */
224         be,pn           %xcc, 3f                        /* CTI                          */
225          stb            %o5, [%o0 - 1]                  /* Store                        */
226 2:      ldub            [%o1], %o5                      /* Load         Group           */
227         add             %o0, 2, %o0                     /* IEU0                         */
228         ldub            [%o1 + 1], %g3                  /* Load         Group           */
229         subcc           %g2, 2, %g2                     /* IEU1         Group           */
230         stb             %o5, [%o0 - 2]                  /* Store                        */
231         add             %o1, 2, %o1                     /* IEU0                         */
232         bne,pt          %xcc, 2b                        /* CTI          Group           */
233          stb            %g3, [%o0 - 1]                  /* Store                        */
234 3:      andcc           %o0, 0x38, %g5                  /* IEU1         Group           */
235 201:    be,pt           %icc, 202f                      /* CTI                          */
236          mov            64, %g1                         /* IEU0                         */
237         fmovd           %f0, %f2                        /* FPU                          */
238         sub             %g1, %g5, %g5                   /* IEU0         Group           */
239         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
240         ldd             [%g1], %f4                      /* Load         Group           */
241         sub             %o2, %g5, %o2                   /* IEU0                         */
242 1:      ldd             [%g1 + 0x8], %f6                /* Load         Group           */
243         add             %g1, 0x8, %g1                   /* IEU0         Group           */
244         subcc           %g5, 8, %g5                     /* IEU1                         */
245         faligndata      %f4, %f6, %f0                   /* GRU          Group           */
246         std             %f0, [%o0]                      /* Store                        */
247         add             %o1, 8, %o1                     /* IEU0         Group           */
248         be,pn           %xcc, 202f                      /* CTI                          */
249          add            %o0, 8, %o0                     /* IEU1                         */
250         ldd             [%g1 + 0x8], %f4                /* Load         Group           */
251         add             %g1, 8, %g1                     /* IEU0                         */
252         subcc           %g5, 8, %g5                     /* IEU1                         */
253         faligndata      %f6, %f4, %f0                   /* GRU          Group           */
254         std             %f0, [%o0]                      /* Store                        */
255         add             %o1, 8, %o1                     /* IEU0                         */
256         bne,pt          %xcc, 1b                        /* CTI          Group           */
257          add            %o0, 8, %o0                     /* IEU0                         */
258 202:    membar    #LoadStore | #StoreStore | #StoreLoad /* LSU          Group           */
259         wr              %g0, ASI_BLK_P, %asi            /* LSU          Group           */
260         subcc           %o2, 0x40, %g6                  /* IEU1         Group           */
261         mov             %o1, %g1                        /* IEU0                         */
262         andncc          %g6, (0x40 - 1), %g6            /* IEU1         Group           */
263         srl             %g1, 3, %g2                     /* IEU0                         */
264         sub             %o2, %g6, %g3                   /* IEU0         Group           */
265         andn            %o1, (0x40 - 1), %o1            /* IEU1                         */
266         and             %g2, 7, %g2                     /* IEU0         Group           */
267         andncc          %g3, 0x7, %g3                   /* IEU1                         */
268         fmovd           %f0, %f2                        /* FPU                          */
269         sub             %g3, 0x10, %g3                  /* IEU0         Group           */
270         sub             %o2, %g6, %o2                   /* IEU1                         */
271         alignaddr       %g1, %g0, %g0                   /* GRU          Group           */
272         add             %g1, %g6, %g1                   /* IEU0         Group           */
273         subcc           %o2, %g3, %o2                   /* IEU1                         */
274         ldda            [%o1 + 0x00] %asi, %f0          /* LSU          Group           */
275         add             %g1, %g3, %g1                   /* IEU0                         */
276         ldda            [%o1 + 0x40] %asi, %f16         /* LSU          Group           */
277         sub             %g6, 0x80, %g6                  /* IEU0                         */
278         ldda            [%o1 + 0x80] %asi, %f32         /* LSU          Group           */
279                                                         /* Clk1         Group 8-(       */
280                                                         /* Clk2         Group 8-(       */
281                                                         /* Clk3         Group 8-(       */
282                                                         /* Clk4         Group 8-(       */
283 203:    rd              %pc, %g5                        /* PDU          Group 8-(       */
284         addcc           %g5, %lo(300f - 203b), %g5      /* IEU1         Group           */
285         sll             %g2, 9, %g2                     /* IEU0                         */
286         jmpl            %g5 + %g2, %g0                  /* CTI          Group brk forced*/
287          addcc          %o1, 0xc0, %o1                  /* IEU1         Group           */
289         .align          512             /* OK, here comes the fun part... */
290 300:    FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  LOOP_CHUNK1(o1, o0, g6, 301f)
291         FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  LOOP_CHUNK2(o1, o0, g6, 302f)
292         FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   LOOP_CHUNK3(o1, o0, g6, 303f)
293         b,pt            %xcc, 300b+4; faligndata %f0, %f2, %f48
294 301:    FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  STORE_SYNC(o0, f48) membar #Sync
295         FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   STORE_JUMP(o0, f48, 400f) membar #Sync
296 302:    FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)   STORE_SYNC(o0, f48) membar #Sync
297         FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  STORE_JUMP(o0, f48, 416f) membar #Sync
298 303:    FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)  STORE_SYNC(o0, f48) membar #Sync
299         FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)  STORE_JUMP(o0, f48, 432f) membar #Sync
300         VISLOOP_PAD
301 310:    FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  LOOP_CHUNK1(o1, o0, g6, 311f)
302         FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  LOOP_CHUNK2(o1, o0, g6, 312f)
303         FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   LOOP_CHUNK3(o1, o0, g6, 313f)
304         b,pt            %xcc, 310b+4; faligndata %f2, %f4, %f48
305 311:    FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  STORE_SYNC(o0, f48) membar #Sync
306         FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   STORE_JUMP(o0, f48, 402f) membar #Sync
307 312:    FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)   STORE_SYNC(o0, f48) membar #Sync
308         FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  STORE_JUMP(o0, f48, 418f) membar #Sync
309 313:    FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)  STORE_SYNC(o0, f48) membar #Sync
310         FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)  STORE_JUMP(o0, f48, 434f) membar #Sync
311         VISLOOP_PAD
312 320:    FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  LOOP_CHUNK1(o1, o0, g6, 321f)
313         FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  LOOP_CHUNK2(o1, o0, g6, 322f)
314         FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   LOOP_CHUNK3(o1, o0, g6, 323f)
315         b,pt            %xcc, 320b+4; faligndata %f4, %f6, %f48
316 321:    FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  STORE_SYNC(o0, f48) membar #Sync
317         FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   STORE_JUMP(o0, f48, 404f) membar #Sync
318 322:    FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)   STORE_SYNC(o0, f48) membar #Sync
319         FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  STORE_JUMP(o0, f48, 420f) membar #Sync
320 323:    FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)  STORE_SYNC(o0, f48) membar #Sync
321         FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)  STORE_JUMP(o0, f48, 436f) membar #Sync
322         VISLOOP_PAD
323 330:    FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  LOOP_CHUNK1(o1, o0, g6, 331f)
324         FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  LOOP_CHUNK2(o1, o0, g6, 332f)
325         FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   LOOP_CHUNK3(o1, o0, g6, 333f)
326         b,pt            %xcc, 330b+4; faligndata %f6, %f8, %f48
327 331:    FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  STORE_SYNC(o0, f48) membar #Sync
328         FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   STORE_JUMP(o0, f48, 406f) membar #Sync
329 332:    FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)   STORE_SYNC(o0, f48) membar #Sync
330         FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  STORE_JUMP(o0, f48, 422f) membar #Sync
331 333:    FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)  STORE_SYNC(o0, f48) membar #Sync
332         FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)  STORE_JUMP(o0, f48, 438f) membar #Sync
333         VISLOOP_PAD
334 340:    FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  LOOP_CHUNK1(o1, o0, g6, 341f)
335         FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  LOOP_CHUNK2(o1, o0, g6, 342f)
336         FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   LOOP_CHUNK3(o1, o0, g6, 343f)
337         b,pt            %xcc, 340b+4; faligndata %f8, %f10, %f48
338 341:    FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  STORE_SYNC(o0, f48) membar #Sync
339         FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   STORE_JUMP(o0, f48, 408f) membar #Sync
340 342:    FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)   STORE_SYNC(o0, f48) membar #Sync
341         FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  STORE_JUMP(o0, f48, 424f) membar #Sync
342 343:    FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)  STORE_SYNC(o0, f48) membar #Sync
343         FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)  STORE_JUMP(o0, f48, 440f) membar #Sync
344         VISLOOP_PAD
345 350:    FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  LOOP_CHUNK1(o1, o0, g6, 351f)
346         FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  LOOP_CHUNK2(o1, o0, g6, 352f)
347         FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  LOOP_CHUNK3(o1, o0, g6, 353f)
348         b,pt            %xcc, 350b+4; faligndata %f10, %f12, %f48
349 351:    FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  STORE_SYNC(o0, f48) membar #Sync
350         FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  STORE_JUMP(o0, f48, 410f) membar #Sync
351 352:    FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)  STORE_SYNC(o0, f48) membar #Sync
352         FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  STORE_JUMP(o0, f48, 426f) membar #Sync
353 353:    FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)  STORE_SYNC(o0, f48) membar #Sync
354         FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)  STORE_JUMP(o0, f48, 442f) membar #Sync
355         VISLOOP_PAD
356 360:    FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  LOOP_CHUNK1(o1, o0, g6, 361f)
357         FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  LOOP_CHUNK2(o1, o0, g6, 362f)
358         FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  LOOP_CHUNK3(o1, o0, g6, 363f)
359         b,pt            %xcc, 360b+4; faligndata %f12, %f14, %f48
360 361:    FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  STORE_SYNC(o0, f48) membar #Sync
361         FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  STORE_JUMP(o0, f48, 412f) membar #Sync
362 362:    FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)  STORE_SYNC(o0, f48) membar #Sync
363         FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  STORE_JUMP(o0, f48, 428f) membar #Sync
364 363:    FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)  STORE_SYNC(o0, f48) membar #Sync
365         FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)  STORE_JUMP(o0, f48, 444f) membar #Sync
366         VISLOOP_PAD
367 370:    FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  LOOP_CHUNK1(o1, o0, g6, 371f)
368         FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  LOOP_CHUNK2(o1, o0, g6, 372f)
369         FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  LOOP_CHUNK3(o1, o0, g6, 373f)
370         b,pt            %xcc, 370b+4; faligndata %f14, %f16, %f48
371 371:    FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  STORE_SYNC(o0, f48) membar #Sync
372         FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  STORE_JUMP(o0, f48, 414f) membar #Sync
373 372:    FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)  STORE_SYNC(o0, f48) membar #Sync
374         FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  STORE_JUMP(o0, f48, 430f) membar #Sync
375 373:    FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)  STORE_SYNC(o0, f48) membar #Sync
376         FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)  STORE_JUMP(o0, f48, 446f) membar #Sync
377         VISLOOP_PAD
378 400:    FINISH_VISCHUNK(o0, f0,  f2,  g3)
379 402:    FINISH_VISCHUNK(o0, f2,  f4,  g3)
380 404:    FINISH_VISCHUNK(o0, f4,  f6,  g3)
381 406:    FINISH_VISCHUNK(o0, f6,  f8,  g3)
382 408:    FINISH_VISCHUNK(o0, f8,  f10, g3)
383 410:    FINISH_VISCHUNK(o0, f10, f12, g3)
384 412:    FINISH_VISCHUNK(o0, f12, f14, g3)
385 414:    UNEVEN_VISCHUNK(o0, f14, f0,  g3)
386 416:    FINISH_VISCHUNK(o0, f16, f18, g3)
387 418:    FINISH_VISCHUNK(o0, f18, f20, g3)
388 420:    FINISH_VISCHUNK(o0, f20, f22, g3)
389 422:    FINISH_VISCHUNK(o0, f22, f24, g3)
390 424:    FINISH_VISCHUNK(o0, f24, f26, g3)
391 426:    FINISH_VISCHUNK(o0, f26, f28, g3)
392 428:    FINISH_VISCHUNK(o0, f28, f30, g3)
393 430:    UNEVEN_VISCHUNK(o0, f30, f0,  g3)
394 432:    FINISH_VISCHUNK(o0, f32, f34, g3)
395 434:    FINISH_VISCHUNK(o0, f34, f36, g3)
396 436:    FINISH_VISCHUNK(o0, f36, f38, g3)
397 438:    FINISH_VISCHUNK(o0, f38, f40, g3)
398 440:    FINISH_VISCHUNK(o0, f40, f42, g3)
399 442:    FINISH_VISCHUNK(o0, f42, f44, g3)
400 444:    FINISH_VISCHUNK(o0, f44, f46, g3)
401 446:    UNEVEN_VISCHUNK(o0, f46, f0,  g3)
402 204:    ldd             [%o1], %f2                      /* Load         Group           */
403         add             %o1, 8, %o1                     /* IEU0                         */
404         subcc           %g3, 8, %g3                     /* IEU1                         */
405         faligndata      %f0, %f2, %f8                   /* GRU          Group           */
406         std             %f8, [%o0]                      /* Store                        */
407         bl,pn           %xcc, 205f                      /* CTI                          */
408          add            %o0, 8, %o0                     /* IEU0         Group           */
409         ldd             [%o1], %f0                      /* Load         Group           */
410         add             %o1, 8, %o1                     /* IEU0                         */
411         subcc           %g3, 8, %g3                     /* IEU1                         */
412         faligndata      %f2, %f0, %f8                   /* GRU          Group           */
413         std             %f8, [%o0]                      /* Store                        */
414         bge,pt          %xcc, 204b                      /* CTI                          */
415          add            %o0, 8, %o0                     /* IEU0         Group           */
416 205:    brz,pt          %o2, 207f                       /* CTI          Group           */
417          mov            %g1, %o1                        /* IEU0                         */
418 206:    ldub            [%o1], %g5                      /* LOAD                         */
419         add             %o1, 1, %o1                     /* IEU0                         */
420         add             %o0, 1, %o0                     /* IEU1                         */
421         subcc           %o2, 1, %o2                     /* IEU1                         */
422         bne,pt          %xcc, 206b                      /* CTI                          */
423          stb            %g5, [%o0 - 1]                  /* Store        Group           */
424 207:    membar          #StoreLoad | #StoreStore        /* LSU          Group           */
425         wr              %g0, FPRS_FEF, %fprs
426         retl
427          mov            %g4, %o0
429 208:    andcc           %o2, 1, %g0                     /* IEU1         Group           */
430         be,pt           %icc, 2f+4                      /* CTI                          */
431 1:       ldub           [%o1], %g5                      /* LOAD         Group           */
432         add             %o1, 1, %o1                     /* IEU0                         */
433         add             %o0, 1, %o0                     /* IEU1                         */
434         subcc           %o2, 1, %o2                     /* IEU1         Group           */
435         be,pn           %xcc, 209f                      /* CTI                          */
436          stb            %g5, [%o0 - 1]                  /* Store                        */
437 2:      ldub            [%o1], %g5                      /* LOAD         Group           */
438         add             %o0, 2, %o0                     /* IEU0                         */
439         ldub            [%o1 + 1], %o5                  /* LOAD         Group           */
440         add             %o1, 2, %o1                     /* IEU0                         */
441         subcc           %o2, 2, %o2                     /* IEU1         Group           */
442         stb             %g5, [%o0 - 2]                  /* Store                        */
443         bne,pt          %xcc, 2b                        /* CTI                          */
444          stb            %o5, [%o0 - 1]                  /* Store                        */
445 209:    retl
446          mov            %g4, %o0
447 END(__memcpy_large)
449 #ifdef USE_BPR
451         /* void *__align_cpy_4(void *dest, void *src, size_t n)
452          * SPARC v9 SYSV ABI
453          * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 3))
454          */
456         .align          32
457 ENTRY(__align_cpy_4)
458         mov             %o0, %g4                        /* IEU0         Group           */
459         cmp             %o2, 15                         /* IEU1                         */
460         bleu,pn         %xcc, 208b                      /* CTI                          */
461          cmp            %o2, (64 * 6)                   /* IEU1         Group           */
462         bgeu,pn         %xcc, 200b                      /* CTI                          */
463          andcc          %o0, 7, %g2                     /* IEU1         Group           */
464         ba,pt           %xcc, 216f                      /* CTI                          */
465          andcc          %o1, 4, %g0                     /* IEU1         Group           */
466 END(__align_cpy_4)
468         /* void *__align_cpy_8(void *dest, void *src, size_t n)
469          * SPARC v9 SYSV ABI
470          * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 7))
471          */
473         .align          32
474 ENTRY(__align_cpy_8)
475         mov             %o0, %g4                        /* IEU0         Group           */
476         cmp             %o2, 15                         /* IEU1                         */
477         bleu,pn         %xcc, 208b                      /* CTI                          */
478          cmp            %o2, (64 * 6)                   /* IEU1         Group           */
479         bgeu,pn         %xcc, 201b                      /* CTI                          */
480          andcc          %o0, 0x38, %g5                  /* IEU1         Group           */
481         andcc           %o2, -128, %g6                  /* IEU1         Group           */
482         bne,a,pt        %xcc, 82f + 4                   /* CTI                          */
483          ldx            [%o1], %g1                      /* Load                         */
484         ba,pt           %xcc, 41f                       /* CTI          Group           */
485          andcc          %o2, 0x70, %g6                  /* IEU1                         */
486 END(__align_cpy_8)
488         /* void *__align_cpy_16(void *dest, void *src, size_t n)
489          * SPARC v9 SYSV ABI
490          * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 15))
491          */
493         .align          32
494 ENTRY(__align_cpy_16)
495         mov             %o0, %g4                        /* IEU0         Group           */
496         cmp             %o2, (64 * 6)                   /* IEU1                         */
497         bgeu,pn         %xcc, 201b                      /* CTI                          */
498          andcc          %o0, 0x38, %g5                  /* IEU1         Group           */
499         andcc           %o2, -128, %g6                  /* IEU1         Group           */
500         bne,a,pt        %xcc, 82f + 4                   /* CTI                          */
501          ldx            [%o1], %g1                      /* Load                         */
502         ba,pt           %xcc, 41f                       /* CTI          Group           */
503          andcc          %o2, 0x70, %g6                  /* IEU1                         */
504 END(__align_cpy_16)
506 #endif
508         .align          32
509 ENTRY(memcpy)
510 210:
511 #ifndef USE_BPR
512         srl             %o2, 0, %o2                     /* IEU1         Group           */
513 #endif  
514         brz,pn          %o2, 209b                       /* CTI          Group           */
515          mov            %o0, %g4                        /* IEU0                         */
516 218:    cmp             %o2, 15                         /* IEU1         Group           */
517         bleu,pn         %xcc, 208b                      /* CTI                          */
518          cmp            %o2, (64 * 6)                   /* IEU1         Group           */
519         bgeu,pn         %xcc, 200b                      /* CTI                          */
520          andcc          %o0, 7, %g2                     /* IEU1         Group           */
521         sub             %o0, %o1, %g5                   /* IEU0                         */
522         andcc           %g5, 3, %o5                     /* IEU1         Group           */
523         bne,pn          %xcc, 212f                      /* CTI                          */
524          andcc          %o1, 3, %g0                     /* IEU1         Group           */
525         be,a,pt         %xcc, 216f                      /* CTI                          */
526          andcc          %o1, 4, %g0                     /* IEU1         Group           */
527         andcc           %o1, 1, %g0                     /* IEU1         Group           */
528         be,pn           %xcc, 4f                        /* CTI                          */
529          andcc          %o1, 2, %g0                     /* IEU1         Group           */
530         ldub            [%o1], %g2                      /* Load         Group           */
531         add             %o1, 1, %o1                     /* IEU0                         */
532         add             %o0, 1, %o0                     /* IEU1                         */
533         sub             %o2, 1, %o2                     /* IEU0         Group           */
534         bne,pn          %xcc, 5f                        /* CTI          Group           */
535          stb            %g2, [%o0 - 1]                  /* Store                        */
536 4:      lduh            [%o1], %g2                      /* Load         Group           */
537         add             %o1, 2, %o1                     /* IEU0                         */
538         add             %o0, 2, %o0                     /* IEU1                         */
539         sub             %o2, 2, %o2                     /* IEU0                         */
540         sth             %g2, [%o0 - 2]                  /* Store        Group + bubble  */
541 5:      andcc           %o1, 4, %g0                     /* IEU1                         */
542 216:    be,a,pn         %xcc, 2f                        /* CTI                          */
543          andcc          %o2, -128, %g6                  /* IEU1         Group           */
544         lduw            [%o1], %g5                      /* Load         Group           */
545         add             %o1, 4, %o1                     /* IEU0                         */
546         add             %o0, 4, %o0                     /* IEU1                         */
547         sub             %o2, 4, %o2                     /* IEU0         Group           */
548         stw             %g5, [%o0 - 4]                  /* Store                        */
549         andcc           %o2, -128, %g6                  /* IEU1         Group           */
550 2:      be,pn           %xcc, 215f                      /* CTI                          */
551          andcc          %o0, 4, %g0                     /* IEU1         Group           */
552         be,pn           %xcc, 82f + 4                   /* CTI          Group           */
553 5:      MOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
554         MOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
555         MOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
556         MOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
557 35:     subcc           %g6, 128, %g6                   /* IEU1         Group           */
558         add             %o1, 128, %o1                   /* IEU0                         */
559         bne,pt          %xcc, 5b                        /* CTI                          */
560          add            %o0, 128, %o0                   /* IEU0         Group           */
561 215:    andcc           %o2, 0x70, %g6                  /* IEU1         Group           */
562 41:     be,pn           %xcc, 80f                       /* CTI                          */
563          andcc          %o2, 8, %g0                     /* IEU1         Group           */
564                                                         /* Clk1 8-(                     */
565                                                         /* Clk2 8-(                     */
566                                                         /* Clk3 8-(                     */
567                                                         /* Clk4 8-(                     */
568 79:     rd              %pc, %o5                        /* PDU          Group           */
569         sll             %g6, 1, %g5                     /* IEU0         Group           */
570         add             %o1, %g6, %o1                   /* IEU1                         */
571         sub             %o5, %g5, %o5                   /* IEU0         Group           */
572         jmpl            %o5 + %lo(80f - 79b), %g0       /* CTI          Group brk forced*/
573          add            %o0, %g6, %o0                   /* IEU0         Group           */
574 36:     MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
575         MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
576         MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
577         MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
578         MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
579         MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
580         MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
581 80:     be,pt           %xcc, 81f                       /* CTI                          */
582          andcc          %o2, 4, %g0                     /* IEU1                         */
583         ldx             [%o1], %g2                      /* Load         Group           */
584         add             %o0, 8, %o0                     /* IEU0                         */
585         stw             %g2, [%o0 - 0x4]                /* Store        Group           */
586         add             %o1, 8, %o1                     /* IEU1                         */
587         srlx            %g2, 32, %g2                    /* IEU0         Group           */
588         stw             %g2, [%o0 - 0x8]                /* Store                        */
589 81:     be,pt           %xcc, 1f                        /* CTI                          */
590          andcc          %o2, 2, %g0                     /* IEU1         Group           */
591         lduw            [%o1], %g2                      /* Load         Group           */
592         add             %o1, 4, %o1                     /* IEU0                         */
593         stw             %g2, [%o0]                      /* Store        Group           */
594         add             %o0, 4, %o0                     /* IEU0                         */
595 1:      be,pt           %xcc, 1f                        /* CTI                          */
596          andcc          %o2, 1, %g0                     /* IEU1         Group           */
597         lduh            [%o1], %g2                      /* Load         Group           */
598         add             %o1, 2, %o1                     /* IEU0                         */
599         sth             %g2, [%o0]                      /* Store        Group           */
600         add             %o0, 2, %o0                     /* IEU0                         */
601 1:      be,pt           %xcc, 211f                      /* CTI                          */
602          nop                                            /* IEU1                         */
603         ldub            [%o1], %g2                      /* Load         Group           */
604         stb             %g2, [%o0]                      /* Store        Group + bubble  */
605 211:    retl
606          mov            %g4, %o0
608 82:     MOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
609         MOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
610 37:     subcc           %g6, 128, %g6                   /* IEU1         Group           */
611         add             %o1, 128, %o1                   /* IEU0                         */
612         bne,pt          %xcc, 82b                       /* CTI                          */
613          add            %o0, 128, %o0                   /* IEU0         Group           */
614         andcc           %o2, 0x70, %g6                  /* IEU1                         */
615         be,pn           %xcc, 84f                       /* CTI                          */
616          andcc          %o2, 8, %g0                     /* IEU1         Group           */
617                                                         /* Clk1 8-(                     */
618                                                         /* Clk2 8-(                     */
619                                                         /* Clk3 8-(                     */
620                                                         /* Clk4 8-(                     */
621 83:     rd              %pc, %o5                        /* PDU          Group           */
622         add             %o1, %g6, %o1                   /* IEU0         Group           */
623         sub             %o5, %g6, %o5                   /* IEU1                         */
624         jmpl            %o5 + %lo(84f - 83b), %g0       /* CTI          Group brk forced*/
625          add            %o0, %g6, %o0                   /* IEU0         Group           */
626 38:     MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
627         MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
628         MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
629         MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
630         MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
631         MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
632         MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
633 84:     be,pt           %xcc, 85f                       /* CTI          Group           */
634          andcc          %o2, 4, %g0                     /* IEU1                         */
635         ldx             [%o1], %g2                      /* Load         Group           */
636         add             %o0, 8, %o0                     /* IEU0                         */
637         add             %o1, 8, %o1                     /* IEU0         Group           */
638         stx             %g2, [%o0 - 0x8]                /* Store                        */
639 85:     be,pt           %xcc, 1f                        /* CTI                          */
640          andcc          %o2, 2, %g0                     /* IEU1         Group           */
641         lduw            [%o1], %g2                      /* Load         Group           */
642         add             %o0, 4, %o0                     /* IEU0                         */
643         add             %o1, 4, %o1                     /* IEU0         Group           */
644         stw             %g2, [%o0 - 0x4]                /* Store                        */
645 1:      be,pt           %xcc, 1f                        /* CTI                          */
646          andcc          %o2, 1, %g0                     /* IEU1         Group           */
647         lduh            [%o1], %g2                      /* Load         Group           */
648         add             %o0, 2, %o0                     /* IEU0                         */
649         add             %o1, 2, %o1                     /* IEU0         Group           */
650         sth             %g2, [%o0 - 0x2]                /* Store                        */
651 1:      be,pt           %xcc, 1f                        /* CTI                          */
652          nop                                            /* IEU0         Group           */
653         ldub            [%o1], %g2                      /* Load         Group           */
654         stb             %g2, [%o0]                      /* Store        Group + bubble  */
655 1:      retl
656          mov            %g4, %o0
658 212:    brz,pt          %g2, 2f                         /* CTI          Group           */
659          mov            8, %g1                          /* IEU0                         */
660         sub             %g1, %g2, %g2                   /* IEU0         Group           */
661         sub             %o2, %g2, %o2                   /* IEU0         Group           */
662 1:      ldub            [%o1], %g5                      /* Load         Group           */
663         add             %o1, 1, %o1                     /* IEU0                         */
664         add             %o0, 1, %o0                     /* IEU1                         */
665         subcc           %g2, 1, %g2                     /* IEU1         Group           */
666         bne,pt          %xcc, 1b                        /* CTI                          */
667          stb            %g5, [%o0 - 1]                  /* Store                        */
668 2:      andn            %o2, 7, %g5                     /* IEU0         Group           */
669         and             %o2, 7, %o2                     /* IEU1                         */
670         fmovd           %f0, %f2                        /* FPU                          */
671         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
672         ldd             [%g1], %f4                      /* Load         Group           */
673 1:      ldd             [%g1 + 0x8], %f6                /* Load         Group           */
674         add             %g1, 0x8, %g1                   /* IEU0         Group           */
675         subcc           %g5, 8, %g5                     /* IEU1                         */
676         faligndata      %f4, %f6, %f0                   /* GRU          Group           */
677         std             %f0, [%o0]                      /* Store                        */
678         add             %o1, 8, %o1                     /* IEU0         Group           */
679         be,pn           %xcc, 213f                      /* CTI                          */
680          add            %o0, 8, %o0                     /* IEU1                         */
681         ldd             [%g1 + 0x8], %f4                /* Load         Group           */
682         add             %g1, 8, %g1                     /* IEU0                         */
683         subcc           %g5, 8, %g5                     /* IEU1                         */
684         faligndata      %f6, %f4, %f0                   /* GRU          Group           */
685         std             %f0, [%o0]                      /* Store                        */
686         add             %o1, 8, %o1                     /* IEU0                         */
687         bne,pn          %xcc, 1b                        /* CTI          Group           */
688          add            %o0, 8, %o0                     /* IEU0                         */
689 213:    brz,pn          %o2, 214f                       /* CTI          Group           */
690          nop                                            /* IEU0                         */
691         ldub            [%o1], %g5                      /* LOAD                         */
692         add             %o1, 1, %o1                     /* IEU0                         */
693         add             %o0, 1, %o0                     /* IEU1                         */
694         subcc           %o2, 1, %o2                     /* IEU1                         */
695         bne,pt          %xcc, 206b                      /* CTI                          */
696          stb            %g5, [%o0 - 1]                  /* Store        Group           */
697 214:    wr              %g0, FPRS_FEF, %fprs
698         retl
699          mov            %g4, %o0
700 END(memcpy)
702         .align          32
703 ENTRY(__memmove_slowpath)
704 228:    andcc           %o2, 1, %g0                     /* IEU1         Group           */
705         be,pt           %icc, 2f+4                      /* CTI                          */
706 1:       ldub           [%o1 - 1], %o5                  /* LOAD         Group           */
707         sub             %o1, 1, %o1                     /* IEU0                         */
708         sub             %o0, 1, %o0                     /* IEU1                         */
709         subcc           %o2, 1, %o2                     /* IEU1         Group           */
710         be,pn           %xcc, 229f                      /* CTI                          */
711          stb            %o5, [%o0]                      /* Store                        */
712 2:      ldub            [%o1 - 1], %o5                  /* LOAD         Group           */
713         sub             %o0, 2, %o0                     /* IEU0                         */
714         ldub            [%o1 - 2], %g5                  /* LOAD         Group           */
715         sub             %o1, 2, %o1                     /* IEU0                         */
716         subcc           %o2, 2, %o2                     /* IEU1         Group           */
717         stb             %o5, [%o0 + 1]                  /* Store                        */
718         bne,pt          %xcc, 2b                        /* CTI                          */
719          stb            %g5, [%o0]                      /* Store                        */
720 229:    retl
721          mov            %g4, %o0
722 219:    retl
723          nop
724 END(__memmove_slowpath)
726         .align          32
727 ENTRY(memmove)
728 #ifndef USE_BPR
729         srl             %o2, 0, %o2                     /* IEU1         Group           */
730 #endif
731         brz,pn          %o2, 219b                       /* CTI          Group           */
732          sub            %o0, %o1, %o4                   /* IEU0                         */
733         cmp             %o4, %o2                        /* IEU1         Group           */
734         bgeu,pt         %XCC, 218b                      /* CTI                          */
735          mov            %o0, %g4                        /* IEU0                         */
736         add             %o0, %o2, %o0                   /* IEU0         Group           */
737 220:    add             %o1, %o2, %o1                   /* IEU1                         */
738         cmp             %o2, 15                         /* IEU1         Group           */
739         bleu,pn         %xcc, 228b                      /* CTI                          */
740          andcc          %o0, 7, %g2                     /* IEU1         Group           */
741         sub             %o0, %o1, %g5                   /* IEU0                         */
742         andcc           %g5, 3, %o5                     /* IEU1         Group           */
743         bne,pn          %xcc, 232f                      /* CTI                          */
744          andcc          %o1, 3, %g0                     /* IEU1         Group           */
745         be,a,pt         %xcc, 236f                      /* CTI                          */
746          andcc          %o1, 4, %g0                     /* IEU1         Group           */
747         andcc           %o1, 1, %g0                     /* IEU1         Group           */
748         be,pn           %xcc, 4f                        /* CTI                          */
749          andcc          %o1, 2, %g0                     /* IEU1         Group           */
750         ldub            [%o1 - 1], %g2                  /* Load         Group           */
751         sub             %o1, 1, %o1                     /* IEU0                         */
752         sub             %o0, 1, %o0                     /* IEU1                         */
753         sub             %o2, 1, %o2                     /* IEU0         Group           */
754         be,pn           %xcc, 5f                        /* CTI          Group           */
755          stb            %g2, [%o0]                      /* Store                        */
756 4:      lduh            [%o1 - 2], %g2                  /* Load         Group           */
757         sub             %o1, 2, %o1                     /* IEU0                         */
758         sub             %o0, 2, %o0                     /* IEU1                         */
759         sub             %o2, 2, %o2                     /* IEU0                         */
760         sth             %g2, [%o0]                      /* Store        Group + bubble  */
761 5:      andcc           %o1, 4, %g0                     /* IEU1                         */
762 236:    be,a,pn         %xcc, 2f                        /* CTI                          */
763          andcc          %o2, -128, %g6                  /* IEU1         Group           */
764         lduw            [%o1 - 4], %g5                  /* Load         Group           */
765         sub             %o1, 4, %o1                     /* IEU0                         */
766         sub             %o0, 4, %o0                     /* IEU1                         */
767         sub             %o2, 4, %o2                     /* IEU0         Group           */
768         stw             %g5, [%o0]                      /* Store                        */
769         andcc           %o2, -128, %g6                  /* IEU1         Group           */
770 2:      be,pn           %xcc, 235f                      /* CTI                          */
771          andcc          %o0, 4, %g0                     /* IEU1         Group           */
772         be,pn           %xcc, 282f + 4                  /* CTI          Group           */
773 5:      RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
774         RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
775         RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
776         RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
777         subcc           %g6, 128, %g6                   /* IEU1         Group           */
778         sub             %o1, 128, %o1                   /* IEU0                         */
779         bne,pt          %xcc, 5b                        /* CTI                          */
780          sub            %o0, 128, %o0                   /* IEU0         Group           */
781 235:    andcc           %o2, 0x70, %g6                  /* IEU1         Group           */
782 41:     be,pn           %xcc, 280f                      /* CTI                          */
783          andcc          %o2, 8, %g0                     /* IEU1         Group           */
784                                                         /* Clk1 8-(                     */
785                                                         /* Clk2 8-(                     */
786                                                         /* Clk3 8-(                     */
787                                                         /* Clk4 8-(                     */
788 279:    rd              %pc, %o5                        /* PDU          Group           */
789         sll             %g6, 1, %g5                     /* IEU0         Group           */
790         sub             %o1, %g6, %o1                   /* IEU1                         */
791         sub             %o5, %g5, %o5                   /* IEU0         Group           */
792         jmpl            %o5 + %lo(280f - 279b), %g0     /* CTI          Group brk forced*/
793          sub            %o0, %g6, %o0                   /* IEU0         Group           */
794         RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
795         RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
796         RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
797         RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
798         RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
799         RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
800         RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
801 280:    be,pt           %xcc, 281f                      /* CTI                          */
802          andcc          %o2, 4, %g0                     /* IEU1                         */
803         ldx             [%o1 - 8], %g2                  /* Load         Group           */
804         sub             %o0, 8, %o0                     /* IEU0                         */
805         stw             %g2, [%o0 + 4]                  /* Store        Group           */
806         sub             %o1, 8, %o1                     /* IEU1                         */
807         srlx            %g2, 32, %g2                    /* IEU0         Group           */
808         stw             %g2, [%o0]                      /* Store                        */
809 281:    be,pt           %xcc, 1f                        /* CTI                          */
810          andcc          %o2, 2, %g0                     /* IEU1         Group           */
811         lduw            [%o1 - 4], %g2                  /* Load         Group           */
812         sub             %o1, 4, %o1                     /* IEU0                         */
813         stw             %g2, [%o0 - 4]                  /* Store        Group           */
814         sub             %o0, 4, %o0                     /* IEU0                         */
815 1:      be,pt           %xcc, 1f                        /* CTI                          */
816          andcc          %o2, 1, %g0                     /* IEU1         Group           */
817         lduh            [%o1 - 2], %g2                  /* Load         Group           */
818         sub             %o1, 2, %o1                     /* IEU0                         */
819         sth             %g2, [%o0 - 2]                  /* Store        Group           */
820         sub             %o0, 2, %o0                     /* IEU0                         */
821 1:      be,pt           %xcc, 211f                      /* CTI                          */
822          nop                                            /* IEU1                         */
823         ldub            [%o1 - 1], %g2                  /* Load         Group           */
824         stb             %g2, [%o0 - 1]                  /* Store        Group + bubble  */
825 211:    retl
826          mov            %g4, %o0
828 282:    RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
829         RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
830         subcc           %g6, 128, %g6                   /* IEU1         Group           */
831         sub             %o1, 128, %o1                   /* IEU0                         */
832         bne,pt          %xcc, 282b                      /* CTI                          */
833          sub            %o0, 128, %o0                   /* IEU0         Group           */
834         andcc           %o2, 0x70, %g6                  /* IEU1                         */
835         be,pn           %xcc, 284f                      /* CTI                          */
836          andcc          %o2, 8, %g0                     /* IEU1         Group           */
837                                                         /* Clk1 8-(                     */
838                                                         /* Clk2 8-(                     */
839                                                         /* Clk3 8-(                     */
840                                                         /* Clk4 8-(                     */
841 283:    rd              %pc, %o5                        /* PDU          Group           */
842         sub             %o1, %g6, %o1                   /* IEU0         Group           */
843         sub             %o5, %g6, %o5                   /* IEU1                         */
844         jmpl            %o5 + %lo(284f - 283b), %g0     /* CTI          Group brk forced*/
845          sub            %o0, %g6, %o0                   /* IEU0         Group           */
846         RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
847         RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
848         RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
849         RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
850         RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
851         RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
852         RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
853 284:    be,pt           %xcc, 285f                      /* CTI          Group           */
854          andcc          %o2, 4, %g0                     /* IEU1                         */
855         ldx             [%o1 - 8], %g2                  /* Load         Group           */
856         sub             %o0, 8, %o0                     /* IEU0                         */
857         sub             %o1, 8, %o1                     /* IEU0         Group           */
858         stx             %g2, [%o0]                      /* Store                        */
859 285:    be,pt           %xcc, 1f                        /* CTI                          */
860          andcc          %o2, 2, %g0                     /* IEU1         Group           */
861         lduw            [%o1 - 4], %g2                  /* Load         Group           */
862         sub             %o0, 4, %o0                     /* IEU0                         */
863         sub             %o1, 4, %o1                     /* IEU0         Group           */
864         stw             %g2, [%o0]                      /* Store                        */
865 1:      be,pt           %xcc, 1f                        /* CTI                          */
866          andcc          %o2, 1, %g0                     /* IEU1         Group           */
867         lduh            [%o1 - 2], %g2                  /* Load         Group           */
868         sub             %o0, 2, %o0                     /* IEU0                         */
869         sub             %o1, 2, %o1                     /* IEU0         Group           */
870         sth             %g2, [%o0]                      /* Store                        */
871 1:      be,pt           %xcc, 1f                        /* CTI                          */
872          nop                                            /* IEU0         Group           */
873         ldub            [%o1 - 1], %g2                  /* Load         Group           */
874         stb             %g2, [%o0 - 1]                  /* Store        Group + bubble  */
875 1:      retl
876          mov            %g4, %o0
878 232:    brz,pt          %g2, 2f                         /* CTI          Group           */
879          sub            %o2, %g2, %o2                   /* IEU0         Group           */
880 1:      ldub            [%o1 - 1], %g5                  /* Load         Group           */
881         sub             %o1, 1, %o1                     /* IEU0                         */
882         sub             %o0, 1, %o0                     /* IEU1                         */
883         subcc           %g2, 1, %g2                     /* IEU1         Group           */
884         bne,pt          %xcc, 1b                        /* CTI                          */
885          stb            %g5, [%o0]                      /* Store                        */
886 2:      andn            %o2, 7, %g5                     /* IEU0         Group           */
887         and             %o2, 7, %o2                     /* IEU1                         */
888         fmovd           %f0, %f2                        /* FPU                          */
889         alignaddr       %o1, %g0, %g1                   /* GRU          Group           */
890         ldd             [%g1], %f4                      /* Load         Group           */
891 1:      ldd             [%g1 - 8], %f6                  /* Load         Group           */
892         sub             %g1, 8, %g1                     /* IEU0         Group           */
893         subcc           %g5, 8, %g5                     /* IEU1                         */
894         faligndata      %f6, %f4, %f0                   /* GRU          Group           */
895         std             %f0, [%o0 - 8]                  /* Store                        */
896         sub             %o1, 8, %o1                     /* IEU0         Group           */
897         be,pn           %xcc, 233f                      /* CTI                          */
898          sub            %o0, 8, %o0                     /* IEU1                         */
899         ldd             [%g1 - 8], %f4                  /* Load         Group           */
900         sub             %g1, 8, %g1                     /* IEU0                         */
901         subcc           %g5, 8, %g5                     /* IEU1                         */
902         faligndata      %f4, %f6, %f0                   /* GRU          Group           */
903         std             %f0, [%o0 - 8]                  /* Store                        */
904         sub             %o1, 8, %o1                     /* IEU0                         */
905         bne,pn          %xcc, 1b                        /* CTI          Group           */
906          sub            %o0, 8, %o0                     /* IEU0                         */
907 233:    brz,pn          %o2, 234f                       /* CTI          Group           */
908          nop                                            /* IEU0                         */
909 237:    ldub            [%o1 - 1], %g5                  /* LOAD                         */
910         sub             %o1, 1, %o1                     /* IEU0                         */
911         sub             %o0, 1, %o0                     /* IEU1                         */
912         subcc           %o2, 1, %o2                     /* IEU1                         */
913         bne,pt          %xcc, 237b                      /* CTI                          */
914          stb            %g5, [%o0]                      /* Store        Group           */
915 234:    wr              %g0, FPRS_FEF, %fprs
916         retl
917          mov            %g4, %o0
918 END(memmove)
920 #ifdef USE_BPR
921 weak_alias (memcpy, __align_cpy_1)
922 weak_alias (memcpy, __align_cpy_2)
923 #endif
924 libc_hidden_builtin_def (memcpy)
925 libc_hidden_builtin_def (memmove)