Import 2.3.18pre1
[davej-history.git] / arch / sparc64 / lib / VIScopy.S
blob57cf6b0f1afc36cb20234485ea56ac12bd7ba213
1 /* $Id: VIScopy.S,v 1.21 1999/07/30 09:35:35 davem Exp $
2  * VIScopy.S: High speed copy operations utilizing the UltraSparc
3  *            Visual Instruction Set.
4  *
5  * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
6  * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
7  */
9 #include "VIS.h"
11         /* VIS code can be used for numerous copy/set operation variants.
12          * It can be made to work in the kernel, one single instance,
13          * for all of memcpy, copy_to_user, and copy_from_user by setting
14          * the ASI src/dest globals correctly.  Furthermore it can
15          * be used for kernel-->kernel page copies as well, a hook label
16          * is put in here just for this purpose.
17          *
18          * For userland, compiling this without __KERNEL__ defined makes
19          * it work just fine as a generic libc bcopy and memcpy.
20          * If for userland it is compiled with a 32bit gcc (but you need
21          * -Wa,-Av9a for as), the code will just rely on lower 32bits of
22          * IEU registers, if you compile it with 64bit gcc (ie. define
23          * __sparc_v9__), the code will use full 64bit.
24          */
25          
26 #ifdef __KERNEL__
28 #include <asm/visasm.h>
29 #include <asm/asm_offsets.h>
31 #define FPU_CLEAN_RETL                                                          \
32         ldub            [%g6 + AOFF_task_thread + AOFF_thread_current_ds], %o1; \
33         VISExit                                                                 \
34         clr             %o0;                                                    \
35         retl;                                                                   \
36          wr             %o1, %g0, %asi;
37 #define FPU_RETL                                                                \
38         ldub            [%g6 + AOFF_task_thread + AOFF_thread_current_ds], %o1; \
39         VISExit                                                                 \
40         clr             %o0;                                                    \
41         retl;                                                                   \
42          wr             %o1, %g0, %asi;
43 #define NORMAL_RETL                                                             \
44         ldub            [%g6 + AOFF_task_thread + AOFF_thread_current_ds], %o1; \
45         clr             %o0;                                                    \
46         retl;                                                                   \
47          wr             %o1, %g0, %asi;
48 #define EX(x,y,a,b)                             \
49 98:     x,y;                                    \
50         .section .fixup;                        \
51         .align  4;                              \
52 99:     ba      VIScopyfixup_ret;               \
53          a, b, %o1;                             \
54         .section __ex_table;                    \
55         .align  4;                              \
56         .word   98b, 99b;                       \
57         .text;                                  \
58         .align  4;
59 #define EX2(x,y,c,d,e,a,b)                      \
60 98:     x,y;                                    \
61         .section .fixup;                        \
62         .align  4;                              \
63 99:     c, d, e;                                \
64         ba      VIScopyfixup_ret;               \
65          a, b, %o1;                             \
66         .section __ex_table;                    \
67         .align  4;                              \
68         .word   98b, 99b;                       \
69         .text;                                  \
70         .align  4;
71 #define EXO2(x,y)                               \
72 98:     x,y;                                    \
73         .section __ex_table;                    \
74         .align  4;                              \
75         .word   98b, VIScopyfixup_reto2;        \
76         .text;                                  \
77         .align  4;
78 #define EXVISN(x,y,n)                           \
79 98:     x,y;                                    \
80         .section __ex_table;                    \
81         .align  4;                              \
82         .word   98b, VIScopyfixup_vis##n;       \
83         .text;                                  \
84         .align  4;
85 #define EXT(start,end,handler)                  \
86         .section __ex_table;                    \
87         .align  4;                              \
88         .word   start, 0, end, handler;         \
89         .text;                                  \
90         .align  4;
91 #else
92 #ifdef REGS_64BIT
93 #define FPU_CLEAN_RETL                          \
94         retl;                                   \
95          mov    %g6, %o0;
96 #define FPU_RETL                                \
97         retl;                                   \
98          mov    %g6, %o0;
99 #else
100 #define FPU_CLEAN_RETL                          \
101         wr      %g0, FPRS_FEF, %fprs;           \
102         retl;                                   \
103          mov    %g6, %o0;
104 #define FPU_RETL                                \
105         wr      %g0, FPRS_FEF, %fprs;           \
106         retl;                                   \
107          mov    %g6, %o0;
108 #endif
109 #define NORMAL_RETL     \
110         retl;           \
111          mov    %g6, %o0;
112 #define EX(x,y,a,b)             x,y
113 #define EX2(x,y,c,d,e,a,b)      x,y
114 #define EXO2(x,y)               x,y
115 #define EXVISN(x,y,n)           x,y
116 #define EXT(a,b,c)
117 #endif
118 #define EXVIS(x,y) EXVISN(x,y,0)
119 #define EXVIS1(x,y) EXVISN(x,y,1)
120 #define EXVIS2(x,y) EXVISN(x,y,2)
121 #define EXVIS3(x,y) EXVISN(x,y,3)
122 #define EXVIS4(x,y) EXVISN(x,y,4)
123 #define EXVIS5(x,y) EXVISN(x,y,5)
125 #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9)           \
126         faligndata              %f1, %f2, %f48;                 \
127         faligndata              %f2, %f3, %f50;                 \
128         faligndata              %f3, %f4, %f52;                 \
129         faligndata              %f4, %f5, %f54;                 \
130         faligndata              %f5, %f6, %f56;                 \
131         faligndata              %f6, %f7, %f58;                 \
132         faligndata              %f7, %f8, %f60;                 \
133         faligndata              %f8, %f9, %f62;
135 #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt)    \
136         EXVIS(LDBLK             [%src] ASIBLK, %fdest);         \
137         ASI_SETDST_BLK                                          \
138         add                     %src, 0x40, %src;               \
139         add                     %dest, 0x40, %dest;             \
140         subcc                   %len, 0x40, %len;               \
141         be,pn                   %xcc, jmptgt;                   \
142          EXVIS2(STBLK           %fsrc, [%dest - 0x40] ASIBLK);  \
143         ASI_SETSRC_BLK
145 #define LOOP_CHUNK1(src, dest, len, branch_dest)                \
146         MAIN_LOOP_CHUNK(src, dest, f0,  f48, len, branch_dest)
147 #define LOOP_CHUNK2(src, dest, len, branch_dest)                \
148         MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
149 #define LOOP_CHUNK3(src, dest, len, branch_dest)                \
150         MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
152 #define STORE_SYNC(dest, fsrc)                                  \
153         EXVIS(STBLK             %fsrc, [%dest] ASIBLK);         \
154         add                     %dest, 0x40, %dest;
156 #ifdef __KERNEL__
157 #define STORE_JUMP(dest, fsrc, target)                          \
158         srl                     asi_dest, 3, %g5;               \
159         EXVIS3(STBLK            %fsrc, [%dest] ASIBLK);         \
160         xor                    asi_dest, ASI_BLK_XOR1, asi_dest;\
161         add                     %dest, 0x40, %dest;             \
162         xor                     asi_dest, %g5, asi_dest;        \
163         ba,pt                   %xcc, target;
164 #else
165 #define STORE_JUMP(dest, fsrc, target)                          \
166         EXVIS3(STBLK            %fsrc, [%dest] ASIBLK);         \
167         add                     %dest, 0x40, %dest;             \
168         ba,pt                   %xcc, target;
169 #endif
171 #ifndef __KERNEL__
172 #define VISLOOP_PAD nop; nop; nop; nop; \
173                     nop; nop; nop; nop; \
174                     nop; nop; nop; nop; \
175                     nop; nop; nop;
176 #else
177 #define VISLOOP_PAD
178 #endif
180 #define FINISH_VISCHUNK(dest, f0, f1, left)                     \
181         ASI_SETDST_NOBLK                                        \
182         subcc                   %left, 8, %left;                \
183         bl,pn                   %xcc, vis_out;                  \
184          faligndata             %f0, %f1, %f48;                 \
185         EXVIS4(STDF             %f48, [%dest] ASINORMAL);       \
186         add                     %dest, 8, %dest;
188 #define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)                \
189         subcc                   %left, 8, %left;                \
190         bl,pn                   %xcc, vis_out;                  \
191          fsrc1                  %f0, %f1;
192 #define UNEVEN_VISCHUNK(dest, f0, f1, left)                     \
193         UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)                \
194         ba,a,pt                 %xcc, vis_out_slk;
196         /* Macros for non-VIS memcpy code. */
197 #ifdef REGS_64BIT
199 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)                 \
200         ASI_SETSRC_NOBLK                                                \
201         LDX                     [%src + offset + 0x00] ASINORMAL, %t0;  \
202         LDX                     [%src + offset + 0x08] ASINORMAL, %t1;  \
203         LDX                     [%src + offset + 0x10] ASINORMAL, %t2;  \
204         LDX                     [%src + offset + 0x18] ASINORMAL, %t3;  \
205         ASI_SETDST_NOBLK                                                \
206         STW                     %t0, [%dst + offset + 0x04] ASINORMAL;  \
207         srlx                    %t0, 32, %t0;                           \
208         STW                     %t0, [%dst + offset + 0x00] ASINORMAL;  \
209         STW                     %t1, [%dst + offset + 0x0c] ASINORMAL;  \
210         srlx                    %t1, 32, %t1;                           \
211         STW                     %t1, [%dst + offset + 0x08] ASINORMAL;  \
212         STW                     %t2, [%dst + offset + 0x14] ASINORMAL;  \
213         srlx                    %t2, 32, %t2;                           \
214         STW                     %t2, [%dst + offset + 0x10] ASINORMAL;  \
215         STW                     %t3, [%dst + offset + 0x1c] ASINORMAL;  \
216         srlx                    %t3, 32, %t3;                           \
217         STW                     %t3, [%dst + offset + 0x18] ASINORMAL;
219 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)            \
220         ASI_SETSRC_NOBLK                                                \
221         LDX                     [%src + offset + 0x00] ASINORMAL, %t0;  \
222         LDX                     [%src + offset + 0x08] ASINORMAL, %t1;  \
223         LDX                     [%src + offset + 0x10] ASINORMAL, %t2;  \
224         LDX                     [%src + offset + 0x18] ASINORMAL, %t3;  \
225         ASI_SETDST_NOBLK                                                \
226         STX                     %t0, [%dst + offset + 0x00] ASINORMAL;  \
227         STX                     %t1, [%dst + offset + 0x08] ASINORMAL;  \
228         STX                     %t2, [%dst + offset + 0x10] ASINORMAL;  \
229         STX                     %t3, [%dst + offset + 0x18] ASINORMAL;  \
230         ASI_SETSRC_NOBLK                                                \
231         LDX                     [%src + offset + 0x20] ASINORMAL, %t0;  \
232         LDX                     [%src + offset + 0x28] ASINORMAL, %t1;  \
233         LDX                     [%src + offset + 0x30] ASINORMAL, %t2;  \
234         LDX                     [%src + offset + 0x38] ASINORMAL, %t3;  \
235         ASI_SETDST_NOBLK                                                \
236         STX                     %t0, [%dst + offset + 0x20] ASINORMAL;  \
237         STX                     %t1, [%dst + offset + 0x28] ASINORMAL;  \
238         STX                     %t2, [%dst + offset + 0x30] ASINORMAL;  \
239         STX                     %t3, [%dst + offset + 0x38] ASINORMAL;
241 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)                \
242         ASI_SETSRC_NOBLK                                                \
243         LDX                     [%src - offset - 0x10] ASINORMAL, %t0;  \
244         LDX                     [%src - offset - 0x08] ASINORMAL, %t1;  \
245         ASI_SETDST_NOBLK                                                \
246         STW                     %t0, [%dst - offset - 0x0c] ASINORMAL;  \
247         srlx                    %t0, 32, %t2;                           \
248         STW                     %t2, [%dst - offset - 0x10] ASINORMAL;  \
249         STW                     %t1, [%dst - offset - 0x04] ASINORMAL;  \
250         srlx                    %t1, 32, %t3;                           \
251         STW                     %t3, [%dst - offset - 0x08] ASINORMAL;
253 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)                   \
254         ASI_SETSRC_NOBLK                                                \
255         LDX                     [%src - offset - 0x10] ASINORMAL, %t0;  \
256         LDX                     [%src - offset - 0x08] ASINORMAL, %t1;  \
257         ASI_SETDST_NOBLK                                                \
258         STX                     %t0, [%dst - offset - 0x10] ASINORMAL;  \
259         STX                     %t1, [%dst - offset - 0x08] ASINORMAL;
261 #else /* !REGS_64BIT */
263 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)                 \
264         lduw                    [%src + offset + 0x00], %t0;            \
265         lduw                    [%src + offset + 0x04], %t1;            \
266         lduw                    [%src + offset + 0x08], %t2;            \
267         lduw                    [%src + offset + 0x0c], %t3;            \
268         stw                     %t0, [%dst + offset + 0x00];            \
269         stw                     %t1, [%dst + offset + 0x04];            \
270         stw                     %t2, [%dst + offset + 0x08];            \
271         stw                     %t3, [%dst + offset + 0x0c];            \
272         lduw                    [%src + offset + 0x10], %t0;            \
273         lduw                    [%src + offset + 0x14], %t1;            \
274         lduw                    [%src + offset + 0x18], %t2;            \
275         lduw                    [%src + offset + 0x1c], %t3;            \
276         stw                     %t0, [%dst + offset + 0x10];            \
277         stw                     %t1, [%dst + offset + 0x14];            \
278         stw                     %t2, [%dst + offset + 0x18];            \
279         stw                     %t3, [%dst + offset + 0x1c];
281 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)                \
282         lduw                    [%src - offset - 0x10], %t0;            \
283         lduw                    [%src - offset - 0x0c], %t1;            \
284         lduw                    [%src - offset - 0x08], %t2;            \
285         lduw                    [%src - offset - 0x04], %t3;            \
286         stw                     %t0, [%dst - offset - 0x10];            \
287         stw                     %t1, [%dst - offset - 0x0c];            \
288         stw                     %t2, [%dst - offset - 0x08];            \
289         stw                     %t3, [%dst - offset - 0x04];
291 #endif /* !REGS_64BIT */
293 #ifdef __KERNEL__
294                 .section        __ex_table,#alloc
295                 .section        .fixup,#alloc,#execinstr
296 #endif
298                 .text
299                 .align                  32
300                 .globl                  memcpy
301                 .type                   memcpy,@function
303                 .globl                  bcopy
304                 .type                   bcopy,@function
306 #ifdef __KERNEL__
307                 .globl                  __memcpy
308                 .type                   __memcpy,@function
310                 .globl                  __memcpy_384plus
311                 .type                   __memcpy_384plus,@function
313                 .globl                  __memcpy_16plus
314                 .type                   __memcpy_16plus,@function
316                 .globl                  __memcpy_short
317                 .type                   __memcpy_short,@function
319                 .globl                  __memcpy_entry
320                 .type                   __memcpy_entry,@function
321 memcpy_private:
322 __memcpy:
323 memcpy:         mov             ASI_P, asi_src                  ! IEU0  Group
324                 brnz,pt         %o2, __memcpy_entry             ! CTI
325                  mov            ASI_P, asi_dest                 ! IEU1
326                 retl
327                  clr            %o0
329                 .align                  32
330                 .globl                  __copy_from_user
331                 .type                   __copy_from_user,@function
332 __copy_from_user:rd             %asi, asi_src                   ! IEU0  Group
333                 brnz,pt         %o2, __memcpy_entry             ! CTI
334                  mov            ASI_P, asi_dest                 ! IEU1
336                 .globl                  __copy_to_user
337                 .type                   __copy_to_user,@function
338 __copy_to_user: mov             ASI_P, asi_src                  ! IEU0  Group
339                 brnz,pt         %o2, __memcpy_entry             ! CTI
340                  rd             %asi, asi_dest                  ! IEU1
341                 retl                                            ! CTI   Group
342                  clr            %o0                             ! IEU0  Group
344                 .globl                  __copy_in_user
345                 .type                   __copy_in_user,@function
346 __copy_in_user: rd              %asi, asi_src                   ! IEU0  Group
347                 brnz,pt         %o2, __memcpy_entry             ! CTI
348                  mov            asi_src, asi_dest               ! IEU1
349                 retl                                            ! CTI   Group
350                  clr            %o0                             ! IEU0  Group
351 #endif
353 bcopy:          or              %o0, 0, %g3                     ! IEU0  Group
354                 addcc           %o1, 0, %o0                     ! IEU1
355                 brgez,pt        %o2, memcpy_private             ! CTI
356                  or             %g3, 0, %o1                     ! IEU0  Group
357                 retl                                            ! CTI   Group brk forced
358                  clr            %o0                             ! IEU0
361         .align                  32
362 #ifdef __KERNEL__
363 __memcpy_384plus:
364         andcc                   %o0, 7, %g2                     ! IEU1  Group
365 #endif
366 VIS_enter:
367         be,pt                   %xcc, dest_is_8byte_aligned     ! CTI
368 #ifdef __KERNEL__
369          nop                                                    ! IEU0  Group
370 #else
371          andcc                  %o0, 0x38, %g5                  ! IEU1  Group
372 #endif
373 do_dest_8byte_align:
374         mov                     8, %g1                          ! IEU0
375         sub                     %g1, %g2, %g2                   ! IEU0  Group
376         andcc                   %o0, 1, %g0                     ! IEU1
377         be,pt                   %icc, 2f                        ! CTI
378          sub                    %o2, %g2, %o2                   ! IEU0  Group
379 1:      ASI_SETSRC_NOBLK                                        ! LSU   Group
380         EX(LDUB                 [%o1] ASINORMAL, %o5, 
381                                 add %o2, %g2)                   ! Load  Group
382         add                     %o1, 1, %o1                     ! IEU0
383         add                     %o0, 1, %o0                     ! IEU1
384         ASI_SETDST_NOBLK                                        ! LSU   Group
385         subcc                   %g2, 1, %g2                     ! IEU1  Group
386         be,pn                   %xcc, 3f                        ! CTI
387          EX2(STB                %o5, [%o0 - 1] ASINORMAL,
388                                 add %g2, 1, %g2,
389                                 add %o2, %g2)                   ! Store
390 2:      ASI_SETSRC_NOBLK                                        ! LSU   Group
391         EX(LDUB                 [%o1] ASINORMAL, %o5, 
392                                 add %o2, %g2)                   ! Load  Group
393         add                     %o0, 2, %o0                     ! IEU0
394         EX2(LDUB                [%o1 + 1] ASINORMAL, %g3,
395                                 sub %o0, 2, %o0,
396                                 add %o2, %g2)                   ! Load  Group
397         ASI_SETDST_NOBLK                                        ! LSU   Group
398         subcc                   %g2, 2, %g2                     ! IEU1  Group
399         EX2(STB                 %o5, [%o0 - 2] ASINORMAL,
400                                 add %g2, 2, %g2,
401                                 add %o2, %g2)                   ! Store
402         add                     %o1, 2, %o1                     ! IEU0
403         bne,pt                  %xcc, 2b                        ! CTI   Group
404          EX2(STB                %g3, [%o0 - 1] ASINORMAL,
405                                 add %g2, 1, %g2,
406                                 add %o2, %g2)                   ! Store
407 #ifdef __KERNEL__
409 dest_is_8byte_aligned:
410         VISEntry
411         andcc                   %o0, 0x38, %g5                  ! IEU1  Group
412 #else
413 3:      andcc                   %o0, 0x38, %g5                  ! IEU1  Group
414 dest_is_8byte_aligned:
415 #endif
416         be,pt                   %icc, dest_is_64byte_aligned    ! CTI
417          mov                    64, %g1                         ! IEU0
418         fmovd                   %f0, %f2                        ! FPU
419         sub                     %g1, %g5, %g5                   ! IEU0  Group
420         ASI_SETSRC_NOBLK                                        ! LSU   Group
421         alignaddr               %o1, %g0, %g1                   ! GRU   Group
422         EXO2(LDDF               [%g1] ASINORMAL, %f4)           ! Load  Group
423         sub                     %o2, %g5, %o2                   ! IEU0
424 1:      EX(LDDF                 [%g1 + 0x8] ASINORMAL, %f6,
425                                 add %o2, %g5)                   ! Load  Group
426         add                     %g1, 0x8, %g1                   ! IEU0  Group
427         subcc                   %g5, 8, %g5                     ! IEU1
428         ASI_SETDST_NOBLK                                        ! LSU   Group
429         faligndata              %f4, %f6, %f0                   ! GRU   Group
430         EX2(STDF                %f0, [%o0] ASINORMAL,
431                                 add %g5, 8, %g5,
432                                 add %o2, %g5)                   ! Store
433         add                     %o1, 8, %o1                     ! IEU0  Group
434         be,pn                   %xcc, dest_is_64byte_aligned    ! CTI
435          add                    %o0, 8, %o0                     ! IEU1
436         ASI_SETSRC_NOBLK                                        ! LSU   Group
437         EX(LDDF                 [%g1 + 0x8] ASINORMAL, %f4,
438                                 add %o2, %g5)                   ! Load  Group
439         add                     %g1, 8, %g1                     ! IEU0
440         subcc                   %g5, 8, %g5                     ! IEU1
441         ASI_SETDST_NOBLK                                        ! LSU   Group
442         faligndata              %f6, %f4, %f0                   ! GRU   Group
443         EX2(STDF                %f0, [%o0] ASINORMAL,
444                                 add %g5, 8, %g5,
445                                 add %o2, %g5)                   ! Store
446         add                     %o1, 8, %o1                     ! IEU0
447         ASI_SETSRC_NOBLK                                        ! LSU   Group
448         bne,pt                  %xcc, 1b                        ! CTI   Group
449          add                    %o0, 8, %o0                     ! IEU0
450 dest_is_64byte_aligned:
451         membar            #LoadStore | #StoreStore | #StoreLoad ! LSU   Group
452 #ifndef __KERNEL__
453         wr                      %g0, ASI_BLK_P, %asi            ! LSU   Group
454 #endif
455         subcc                   %o2, 0x40, %g7                  ! IEU1  Group
456         mov                     %o1, %g1                        ! IEU0
457         andncc                  %g7, (0x40 - 1), %g7            ! IEU1  Group
458         srl                     %g1, 3, %g2                     ! IEU0
459         sub                     %o2, %g7, %g3                   ! IEU0  Group
460         andn                    %o1, (0x40 - 1), %o1            ! IEU1
461         and                     %g2, 7, %g2                     ! IEU0  Group
462         andncc                  %g3, 0x7, %g3                   ! IEU1
463         fmovd                   %f0, %f2                        ! FPU
464         sub                     %g3, 0x10, %g3                  ! IEU0  Group
465         sub                     %o2, %g7, %o2                   ! IEU1
466 #ifdef __KERNEL__
467         or                      asi_src, ASI_BLK_OR, asi_src    ! IEU0  Group
468         or                      asi_dest, ASI_BLK_OR, asi_dest  ! IEU1
469 #endif
470         alignaddr               %g1, %g0, %g0                   ! GRU   Group
471         add                     %g1, %g7, %g1                   ! IEU0  Group
472         subcc                   %o2, %g3, %o2                   ! IEU1
473         ASI_SETSRC_BLK                                          ! LSU   Group
474         EXVIS1(LDBLK            [%o1 + 0x00] ASIBLK, %f0)       ! LSU   Group
475         add                     %g1, %g3, %g1                   ! IEU0
476         EXVIS1(LDBLK            [%o1 + 0x40] ASIBLK, %f16)      ! LSU   Group
477         sub                     %g7, 0x80, %g7                  ! IEU0
478         EXVIS(LDBLK             [%o1 + 0x80] ASIBLK, %f32)      ! LSU   Group
479 #ifdef __KERNEL__
480 vispc:  sll                     %g2, 9, %g2                     ! IEU0  Group
481         sethi                   %hi(vis00), %g5                 ! IEU1
482         or                      %g5, %lo(vis00), %g5            ! IEU0  Group
483         jmpl                    %g5 + %g2, %g0                  ! CTI   Group brk forced
484          addcc                  %o1, 0xc0, %o1                  ! IEU1  Group
485 #else
486                                                                 ! Clk1  Group 8-(
487                                                                 ! Clk2  Group 8-(
488                                                                 ! Clk3  Group 8-(
489                                                                 ! Clk4  Group 8-(
490 vispc:  rd                      %pc, %g5                        ! PDU   Group 8-(
491         addcc                   %g5, %lo(vis00 - vispc), %g5    ! IEU1  Group
492         sll                     %g2, 9, %g2                     ! IEU0
493         jmpl                    %g5 + %g2, %g0                  ! CTI   Group brk forced
494          addcc                  %o1, 0xc0, %o1                  ! IEU1  Group
495 #endif
496         .align                  512             /* OK, here comes the fun part... */
497 vis00:FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) LOOP_CHUNK1(o1, o0, g7, vis01)
498       FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) LOOP_CHUNK2(o1, o0, g7, vis02)
499       FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)  LOOP_CHUNK3(o1, o0, g7, vis03)
500       b,pt                      %xcc, vis00+4; faligndata %f0, %f2, %f48
501 vis01:FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_SYNC(o0, f48) membar #Sync
502       FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)  STORE_JUMP(o0, f48, finish_f0) membar #Sync
503 vis02:FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)  STORE_SYNC(o0, f48) membar #Sync
504       FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_JUMP(o0, f48, finish_f16) membar #Sync
505 vis03:FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_SYNC(o0, f48) membar #Sync
506       FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_JUMP(o0, f48, finish_f32) membar #Sync
507       VISLOOP_PAD
508 vis10:FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) LOOP_CHUNK1(o1, o0, g7, vis11)
509       FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) LOOP_CHUNK2(o1, o0, g7, vis12)
510       FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)  LOOP_CHUNK3(o1, o0, g7, vis13)
511       b,pt                      %xcc, vis10+4; faligndata %f2, %f4, %f48
512 vis11:FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_SYNC(o0, f48) membar #Sync
513       FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)  STORE_JUMP(o0, f48, finish_f2) membar #Sync
514 vis12:FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)  STORE_SYNC(o0, f48) membar #Sync
515       FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_JUMP(o0, f48, finish_f18) membar #Sync
516 vis13:FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_SYNC(o0, f48) membar #Sync
517       FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_JUMP(o0, f48, finish_f34) membar #Sync
518       VISLOOP_PAD
519 vis20:FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) LOOP_CHUNK1(o1, o0, g7, vis21)
520       FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) LOOP_CHUNK2(o1, o0, g7, vis22)
521       FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)  LOOP_CHUNK3(o1, o0, g7, vis23)
522       b,pt                      %xcc, vis20+4; faligndata %f4, %f6, %f48
523 vis21:FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_SYNC(o0, f48) membar #Sync
524       FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)  STORE_JUMP(o0, f48, finish_f4) membar #Sync
525 vis22:FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)  STORE_SYNC(o0, f48) membar #Sync
526       FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_JUMP(o0, f48, finish_f20) membar #Sync
527 vis23:FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_SYNC(o0, f48) membar #Sync
528       FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_JUMP(o0, f48, finish_f36) membar #Sync
529       VISLOOP_PAD
530 vis30:FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) LOOP_CHUNK1(o1, o0, g7, vis31)
531       FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) LOOP_CHUNK2(o1, o0, g7, vis32)
532       FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)  LOOP_CHUNK3(o1, o0, g7, vis33)
533       b,pt                      %xcc, vis30+4; faligndata %f6, %f8, %f48
534 vis31:FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_SYNC(o0, f48) membar #Sync
535       FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)  STORE_JUMP(o0, f48, finish_f6) membar #Sync
536 vis32:FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)  STORE_SYNC(o0, f48) membar #Sync
537       FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_JUMP(o0, f48, finish_f22) membar #Sync
538 vis33:FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_SYNC(o0, f48) membar #Sync
539       FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_JUMP(o0, f48, finish_f38) membar #Sync
540       VISLOOP_PAD
541 vis40:FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) LOOP_CHUNK1(o1, o0, g7, vis41)
542       FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) LOOP_CHUNK2(o1, o0, g7, vis42)
543       FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)  LOOP_CHUNK3(o1, o0, g7, vis43)
544       b,pt                      %xcc, vis40+4; faligndata %f8, %f10, %f48
545 vis41:FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_SYNC(o0, f48) membar #Sync
546       FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)  STORE_JUMP(o0, f48, finish_f8) membar #Sync
547 vis42:FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)  STORE_SYNC(o0, f48) membar #Sync
548       FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_JUMP(o0, f48, finish_f24) membar #Sync
549 vis43:FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_SYNC(o0, f48) membar #Sync
550       FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_JUMP(o0, f48, finish_f40) membar #Sync
551       VISLOOP_PAD
552 vis50:FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) LOOP_CHUNK1(o1, o0, g7, vis51)
553       FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) LOOP_CHUNK2(o1, o0, g7, vis52)
554       FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) LOOP_CHUNK3(o1, o0, g7, vis53)
555       b,pt                      %xcc, vis50+4; faligndata %f10, %f12, %f48
556 vis51:FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_SYNC(o0, f48) membar #Sync
557       FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_JUMP(o0, f48, finish_f10) membar #Sync
558 vis52:FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_SYNC(o0, f48) membar #Sync
559       FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_JUMP(o0, f48, finish_f26) membar #Sync
560 vis53:FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_SYNC(o0, f48) membar #Sync
561       FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_JUMP(o0, f48, finish_f42) membar #Sync
562       VISLOOP_PAD
563 vis60:FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) LOOP_CHUNK1(o1, o0, g7, vis61)
564       FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) LOOP_CHUNK2(o1, o0, g7, vis62)
565       FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) LOOP_CHUNK3(o1, o0, g7, vis63)
566       b,pt                      %xcc, vis60+4; faligndata %f12, %f14, %f48
567 vis61:FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_SYNC(o0, f48) membar #Sync
568       FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_JUMP(o0, f48, finish_f12) membar #Sync
569 vis62:FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_SYNC(o0, f48) membar #Sync
570       FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_JUMP(o0, f48, finish_f28) membar #Sync
571 vis63:FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_SYNC(o0, f48) membar #Sync
572       FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_JUMP(o0, f48, finish_f44) membar #Sync
573       VISLOOP_PAD
574 vis70:FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) LOOP_CHUNK1(o1, o0, g7, vis71)
575       FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) LOOP_CHUNK2(o1, o0, g7, vis72)
576       FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) LOOP_CHUNK3(o1, o0, g7, vis73)
577       b,pt                      %xcc, vis70+4; faligndata %f14, %f16, %f48
578 vis71:FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_SYNC(o0, f48) membar #Sync
579       FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_JUMP(o0, f48, finish_f14) membar #Sync
580 vis72:FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_SYNC(o0, f48) membar #Sync
581       FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_JUMP(o0, f48, finish_f30) membar #Sync
582 vis73:FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_SYNC(o0, f48) membar #Sync
583       FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, finish_f46) membar #Sync
584       VISLOOP_PAD
585 finish_f0:      FINISH_VISCHUNK(o0, f0,  f2,  g3)
586 finish_f2:      FINISH_VISCHUNK(o0, f2,  f4,  g3)
587 finish_f4:      FINISH_VISCHUNK(o0, f4,  f6,  g3)
588 finish_f6:      FINISH_VISCHUNK(o0, f6,  f8,  g3)
589 finish_f8:      FINISH_VISCHUNK(o0, f8,  f10, g3)
590 finish_f10:     FINISH_VISCHUNK(o0, f10, f12, g3)
591 finish_f12:     FINISH_VISCHUNK(o0, f12, f14, g3)
592 finish_f14:     UNEVEN_VISCHUNK(o0, f14, f0,  g3)
593 finish_f16:     FINISH_VISCHUNK(o0, f16, f18, g3)
594 finish_f18:     FINISH_VISCHUNK(o0, f18, f20, g3)
595 finish_f20:     FINISH_VISCHUNK(o0, f20, f22, g3)
596 finish_f22:     FINISH_VISCHUNK(o0, f22, f24, g3)
597 finish_f24:     FINISH_VISCHUNK(o0, f24, f26, g3)
598 finish_f26:     FINISH_VISCHUNK(o0, f26, f28, g3)
599 finish_f28:     FINISH_VISCHUNK(o0, f28, f30, g3)
600 finish_f30:     UNEVEN_VISCHUNK(o0, f30, f0,  g3)
601 finish_f32:     FINISH_VISCHUNK(o0, f32, f34, g3)
602 finish_f34:     FINISH_VISCHUNK(o0, f34, f36, g3)
603 finish_f36:     FINISH_VISCHUNK(o0, f36, f38, g3)
604 finish_f38:     FINISH_VISCHUNK(o0, f38, f40, g3)
605 finish_f40:     FINISH_VISCHUNK(o0, f40, f42, g3)
606 finish_f42:     FINISH_VISCHUNK(o0, f42, f44, g3)
607 finish_f44:     FINISH_VISCHUNK(o0, f44, f46, g3)
608 finish_f46:     UNEVEN_VISCHUNK_LAST(o0, f46, f0,  g3)
609 vis_out_slk:
610 #ifdef __KERNEL__
611         srl             asi_src, 3, %g5                         ! IEU0  Group
612         xor             asi_src, ASI_BLK_XOR1, asi_src          ! IEU1
613         xor             asi_src, %g5, asi_src                   ! IEU0  Group
614 #endif
615 vis_slk:ASI_SETSRC_NOBLK                                        ! LSU   Group
616         EXVIS4(LDDF     [%o1] ASINORMAL, %f2)                   ! Load  Group
617         add             %o1, 8, %o1                             ! IEU0
618         subcc           %g3, 8, %g3                             ! IEU1
619         ASI_SETDST_NOBLK                                        ! LSU   Group
620         faligndata      %f0, %f2, %f8                           ! GRU   Group
621         EXVIS5(STDF     %f8, [%o0] ASINORMAL)                   ! Store
622         bl,pn           %xcc, vis_out_slp                       ! CTI
623          add            %o0, 8, %o0                             ! IEU0  Group
624         ASI_SETSRC_NOBLK                                        ! LSU   Group
625         EXVIS4(LDDF     [%o1] ASINORMAL, %f0)                   ! Load  Group
626         add             %o1, 8, %o1                             ! IEU0
627         subcc           %g3, 8, %g3                             ! IEU1
628         ASI_SETDST_NOBLK                                        ! LSU   Group
629         faligndata      %f2, %f0, %f8                           ! GRU   Group
630         EXVIS5(STDF     %f8, [%o0] ASINORMAL)                   ! Store
631         bge,pt          %xcc, vis_slk                           ! CTI
632          add            %o0, 8, %o0                             ! IEU0  Group
633 vis_out_slp:
634 #ifdef __KERNEL__
635         brz,pt          %o2, vis_ret                            ! CTI   Group
636          mov            %g1, %o1                                ! IEU0
637         ba,pt           %xcc, vis_slp+4                         ! CTI   Group
638          ASI_SETSRC_NOBLK                                       ! LSU   Group
639 #endif
640 vis_out:brz,pt          %o2, vis_ret                            ! CTI   Group
641          mov            %g1, %o1                                ! IEU0
642 #ifdef __KERNEL__
643         srl             asi_src, 3, %g5                         ! IEU0  Group
644         xor             asi_src, ASI_BLK_XOR1, asi_src          ! IEU1
645         xor             asi_src, %g5, asi_src                   ! IEU0  Group
646 #endif
647 vis_slp:ASI_SETSRC_NOBLK                                        ! LSU   Group
648         EXO2(LDUB       [%o1] ASINORMAL, %g5)                   ! LOAD
649         add             %o1, 1, %o1                             ! IEU0
650         add             %o0, 1, %o0                             ! IEU1
651         ASI_SETDST_NOBLK                                        ! LSU   Group
652         subcc           %o2, 1, %o2                             ! IEU1
653         bne,pt          %xcc, vis_slp                           ! CTI
654          EX(STB         %g5, [%o0 - 1] ASINORMAL,
655                         add %o2, 1)                             ! Store Group
656 vis_ret:membar          #StoreLoad | #StoreStore                ! LSU   Group
657         FPU_CLEAN_RETL
660 __memcpy_short:
661         andcc           %o2, 1, %g0                             ! IEU1  Group
662         be,pt           %icc, 2f                                ! CTI
663 1:       ASI_SETSRC_NOBLK                                       ! LSU   Group
664         EXO2(LDUB       [%o1] ASINORMAL, %g5)                   ! LOAD  Group
665         add             %o1, 1, %o1                             ! IEU0
666         add             %o0, 1, %o0                             ! IEU1
667         ASI_SETDST_NOBLK                                        ! LSU   Group
668         subcc           %o2, 1, %o2                             ! IEU1  Group
669         be,pn           %xcc, short_ret                         ! CTI
670          EX(STB         %g5, [%o0 - 1] ASINORMAL,
671                         add %o2, 1)                             ! Store
672 2:      ASI_SETSRC_NOBLK                                        ! LSU   Group
673         EXO2(LDUB       [%o1] ASINORMAL, %g5)                   ! LOAD  Group
674         add             %o0, 2, %o0                             ! IEU0
675         EX2(LDUB        [%o1 + 1] ASINORMAL, %o5,
676                         sub %o0, 2, %o0,
677                         add %o2, %g0)                           ! LOAD  Group
678         add             %o1, 2, %o1                             ! IEU0
679         ASI_SETDST_NOBLK                                        ! LSU   Group
680         subcc           %o2, 2, %o2                             ! IEU1  Group
681         EX(STB          %g5, [%o0 - 2] ASINORMAL,
682                         add %o2, 2)                             ! Store
683         bne,pt          %xcc, 2b                                ! CTI
684          EX(STB         %o5, [%o0 - 1] ASINORMAL,
685                         add %o2, 1)                             ! Store
686 short_ret:
687         NORMAL_RETL
689 #ifndef __KERNEL__
690 memcpy_private:
691 memcpy:
692 #ifndef REGS_64BIT
693         srl             %o2, 0, %o2                             ! IEU1  Group
694 #endif  
695         brz,pn          %o2, short_ret                          ! CTI   Group
696          mov            %o0, %g6                                ! IEU0
697 #endif
698 __memcpy_entry:
699         cmp             %o2, 15                                 ! IEU1  Group
700         bleu,pn         %xcc, __memcpy_short                    ! CTI
701          cmp            %o2, (64 * 6)                           ! IEU1  Group
702         bgeu,pn         %xcc, VIS_enter                         ! CTI
703 #ifdef __KERNEL__
704 __memcpy_16plus:
705 #endif
706          andcc          %o0, 7, %g2                             ! IEU1  Group
707         sub             %o0, %o1, %g5                           ! IEU0
708         andcc           %g5, 3, %o5                             ! IEU1  Group
709         bne,pn          %xcc, memcpy_noVIS_misaligned           ! CTI
710          andcc          %o1, 3, %g0                             ! IEU1  Group
711 #ifdef REGS_64BIT
712         be,a,pt         %xcc, 3f                                ! CTI
713          andcc          %o1, 4, %g0                             ! IEU1  Group
714         andcc           %o1, 1, %g0                             ! IEU1  Group
715 #else /* !REGS_64BIT */
716         be,pt           %xcc, 5f                                ! CTI
717          andcc          %o1, 1, %g0                             ! IEU1  Group
718 #endif /* !REGS_64BIT */
719         be,pn           %xcc, 4f                                ! CTI
720          andcc          %o1, 2, %g0                             ! IEU1  Group
721         ASI_SETSRC_NOBLK                                        ! LSU   Group
722         EXO2(LDUB       [%o1] ASINORMAL, %g2)                   ! Load  Group
723         add             %o1, 1, %o1                             ! IEU0
724         add             %o0, 1, %o0                             ! IEU1
725         sub             %o2, 1, %o2                             ! IEU0  Group
726         ASI_SETDST_NOBLK                                        ! LSU   Group
727         bne,pn          %xcc, 5f                                ! CTI   Group
728          EX(STB         %g2, [%o0 - 1] ASINORMAL,
729                         add %o2, 1)                             ! Store
730 4:      ASI_SETSRC_NOBLK                                        ! LSU   Group
731         EXO2(LDUH       [%o1] ASINORMAL, %g2)                   ! Load  Group
732         add             %o1, 2, %o1                             ! IEU0
733         add             %o0, 2, %o0                             ! IEU1
734         ASI_SETDST_NOBLK                                        ! LSU   Group
735         sub             %o2, 2, %o2                             ! IEU0
736         EX(STH          %g2, [%o0 - 2] ASINORMAL,
737                         add %o2, 2)                             ! Store Group + bubble
738 #ifdef REGS_64BIT
739 5:      andcc           %o1, 4, %g0                             ! IEU1
740 3:      be,a,pn         %xcc, 2f                                ! CTI
741          andcc          %o2, -128, %g7                          ! IEU1  Group
742         ASI_SETSRC_NOBLK                                        ! LSU   Group
743         EXO2(LDUW       [%o1] ASINORMAL, %g5)                   ! Load  Group
744         add             %o1, 4, %o1                             ! IEU0
745         add             %o0, 4, %o0                             ! IEU1
746         ASI_SETDST_NOBLK                                        ! LSU   Group
747         sub             %o2, 4, %o2                             ! IEU0  Group
748         EX(STW          %g5, [%o0 - 4] ASINORMAL,
749                         add %o2, 4)                             ! Store
750         andcc           %o2, -128, %g7                          ! IEU1  Group
751 2:      be,pn           %xcc, 3f                                ! CTI
752          andcc          %o0, 4, %g0                             ! IEU1  Group
753         be,pn           %xcc, 82f + 4                           ! CTI   Group
754 #else /* !REGS_64BIT */
755 5:      andcc           %o2, -128, %g7                          ! IEU1
756         be,a,pn         %xcc, 41f                               ! CTI
757          andcc          %o2, 0x70, %g7                          ! IEU1  Group
758 #endif /* !REGS_64BIT */
759 5:      MOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
760         MOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
761         MOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
762         MOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
763         EXT(5b,35f,VIScopyfixup1)
764 35:     subcc           %g7, 128, %g7                           ! IEU1  Group
765         add             %o1, 128, %o1                           ! IEU0
766         bne,pt          %xcc, 5b                                ! CTI
767          add            %o0, 128, %o0                           ! IEU0  Group
768 3:      andcc           %o2, 0x70, %g7                          ! IEU1  Group
769 41:     be,pn           %xcc, 80f                               ! CTI
770          andcc          %o2, 8, %g0                             ! IEU1  Group
771 #ifdef __KERNEL__
772 79:     sethi           %hi(80f), %o5                           ! IEU0
773         sll             %g7, 1, %g5                             ! IEU0  Group
774         add             %o1, %g7, %o1                           ! IEU1
775         srl             %g7, 1, %g2                             ! IEU0  Group
776         sub             %o5, %g5, %o5                           ! IEU1
777         sub             %o5, %g2, %o5                           ! IEU0  Group
778         jmpl            %o5 + %lo(80f), %g0                     ! CTI   Group brk forced
779          add            %o0, %g7, %o0                           ! IEU0  Group
780 #else
781                                                                 ! Clk1 8-(
782                                                                 ! Clk2 8-(
783                                                                 ! Clk3 8-(
784                                                                 ! Clk4 8-(
785 79:     rd              %pc, %o5                                ! PDU   Group
786         sll             %g7, 1, %g5                             ! IEU0  Group
787         add             %o1, %g7, %o1                           ! IEU1
788         sub             %o5, %g5, %o5                           ! IEU0  Group
789         jmpl            %o5 + %lo(80f - 79b), %g0               ! CTI   Group brk forced
790          add            %o0, %g7, %o0                           ! IEU0  Group
791 #endif
792 36:     MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
793         MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
794         MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
795         MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
796         MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
797         MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
798         MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
799         EXT(36b,80f,VIScopyfixup2)
800 80:     be,pt           %xcc, 81f                               ! CTI
801          andcc          %o2, 4, %g0                             ! IEU1
802 #ifdef REGS_64BIT
803         ASI_SETSRC_NOBLK                                        ! LSU   Group
804         EX(LDX          [%o1] ASINORMAL, %g2,
805                         and %o2, 0xf)                           ! Load  Group
806         add             %o0, 8, %o0                             ! IEU0
807         ASI_SETDST_NOBLK                                        ! LSU   Group
808         EX(STW          %g2, [%o0 - 0x4] ASINORMAL,
809                         and %o2, 0xf)                           ! Store Group
810         add             %o1, 8, %o1                             ! IEU1
811         srlx            %g2, 32, %g2                            ! IEU0  Group
812         EX2(STW         %g2, [%o0 - 0x8] ASINORMAL,
813                         and %o2, 0xf, %o2,
814                         sub %o2, 4)                             ! Store
815 #else /* !REGS_64BIT */
816         lduw            [%o1], %g2                              ! Load  Group
817         add             %o0, 8, %o0                             ! IEU0
818         lduw            [%o1 + 0x4], %g3                        ! Load  Group
819         add             %o1, 8, %o1                             ! IEU0
820         stw             %g2, [%o0 - 0x8]                        ! Store Group
821         stw             %g3, [%o0 - 0x4]                        ! Store Group
822 #endif /* !REGS_64BIT */
823 81:     be,pt           %xcc, 1f                                ! CTI
824          andcc          %o2, 2, %g0                             ! IEU1  Group
825         ASI_SETSRC_NOBLK                                        ! LSU   Group
826         EX(LDUW         [%o1] ASINORMAL, %g2,
827                         and %o2, 0x7)                           ! Load  Group
828         add             %o1, 4, %o1                             ! IEU0
829         ASI_SETDST_NOBLK                                        ! LSU   Group
830         EX(STW          %g2, [%o0] ASINORMAL,
831                         and %o2, 0x7)                           ! Store Group
832         add             %o0, 4, %o0                             ! IEU0
833 1:      be,pt           %xcc, 1f                                ! CTI
834          andcc          %o2, 1, %g0                             ! IEU1  Group
835         ASI_SETSRC_NOBLK                                        ! LSU   Group
836         EX(LDUH         [%o1] ASINORMAL, %g2,
837                         and %o2, 0x3)                           ! Load  Group
838         add             %o1, 2, %o1                             ! IEU0
839         ASI_SETDST_NOBLK                                        ! LSU   Group
840         EX(STH          %g2, [%o0] ASINORMAL,
841                         and %o2, 0x3)                           ! Store Group
842         add             %o0, 2, %o0                             ! IEU0
843 1:      be,pt           %xcc, normal_retl                       ! CTI
844          nop                                                    ! IEU1
845         ASI_SETSRC_NOBLK                                        ! LSU   Group
846         EX(LDUB         [%o1] ASINORMAL, %g2,
847                         add %g0, 1)                             ! Load  Group
848         ASI_SETDST_NOBLK                                        ! LSU   Group
849         EX(STB          %g2, [%o0] ASINORMAL,
850                         add %g0, 1)                             ! Store Group + bubble
851 normal_retl:
852         NORMAL_RETL
854 #ifdef REGS_64BIT
855 82:     MOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
856         MOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
857         EXT(82b,37f,VIScopyfixup3)
858 37:     subcc           %g7, 128, %g7                           ! IEU1  Group
859         add             %o1, 128, %o1                           ! IEU0
860         bne,pt          %xcc, 82b                               ! CTI
861          add            %o0, 128, %o0                           ! IEU0  Group
862         andcc           %o2, 0x70, %g7                          ! IEU1
863         be,pn           %xcc, 84f                               ! CTI
864          andcc          %o2, 8, %g0                             ! IEU1  Group
865 #ifdef __KERNEL__
866 83:     srl             %g7, 1, %g5                             ! IEU0
867         sethi           %hi(84f), %o5                           ! IEU0  Group
868         add             %g7, %g5, %g5                           ! IEU1
869         add             %o1, %g7, %o1                           ! IEU0  Group
870         sub             %o5, %g5, %o5                           ! IEU1
871         jmpl            %o5 + %lo(84f), %g0                     ! CTI   Group brk forced
872          add            %o0, %g7, %o0                           ! IEU0  Group
873 #else
874                                                                 ! Clk1 8-(
875                                                                 ! Clk2 8-(
876                                                                 ! Clk3 8-(
877                                                                 ! Clk4 8-(
878 83:     rd              %pc, %o5                                ! PDU   Group
879         add             %o1, %g7, %o1                           ! IEU0  Group
880         sub             %o5, %g7, %o5                           ! IEU1
881         jmpl            %o5 + %lo(84f - 83b), %g0               ! CTI   Group brk forced
882          add            %o0, %g7, %o0                           ! IEU0  Group
883 #endif
884 38:     MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
885         MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
886         MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
887         MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
888         MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
889         MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
890         MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
891         EXT(38b,84f,VIScopyfixup4)
892 84:     be,pt           %xcc, 85f                               ! CTI   Group
893          andcc          %o2, 4, %g0                             ! IEU1
894         ASI_SETSRC_NOBLK                                        ! LSU   Group
895         EX(LDX          [%o1] ASINORMAL, %g2,
896                         and %o2, 0xf)                           ! Load  Group
897         add             %o0, 8, %o0                             ! IEU0
898         ASI_SETDST_NOBLK                                        ! LSU   Group
899         add             %o1, 8, %o1                             ! IEU0  Group
900         EX(STX          %g2, [%o0 - 0x8] ASINORMAL,
901                         and %o2, 0xf)                           ! Store
902 85:     be,pt           %xcc, 1f                                ! CTI
903          andcc          %o2, 2, %g0                             ! IEU1  Group
904         ASI_SETSRC_NOBLK                                        ! LSU   Group
905         EX(LDUW         [%o1] ASINORMAL, %g2,
906                         and %o2, 0x7)                           ! Load  Group
907         add             %o0, 4, %o0                             ! IEU0
908         ASI_SETDST_NOBLK                                        ! LSU   Group
909         add             %o1, 4, %o1                             ! IEU0  Group
910         EX(STW          %g2, [%o0 - 0x4] ASINORMAL,
911                         and %o2, 0x7)                           ! Store
912 1:      be,pt           %xcc, 1f                                ! CTI
913          andcc          %o2, 1, %g0                             ! IEU1  Group
914         ASI_SETSRC_NOBLK                                        ! LSU   Group
915         EX(LDUH         [%o1] ASINORMAL, %g2,
916                         and %o2, 0x3)                           ! Load  Group
917         add             %o0, 2, %o0                             ! IEU0
918         ASI_SETDST_NOBLK                                        ! LSU   Group
919         add             %o1, 2, %o1                             ! IEU0  Group
920         EX(STH          %g2, [%o0 - 0x2] ASINORMAL,
921                         and %o2, 0x3)                           ! Store
922 1:      be,pt           %xcc, 1f                                ! CTI
923          nop                                                    ! IEU0  Group
924         ASI_SETSRC_NOBLK                                        ! LSU   Group
925         EX(LDUB         [%o1] ASINORMAL, %g2,
926                         add %g0, 1)                             ! Load  Group
927         ASI_SETDST_NOBLK                                        ! LSU   Group
928         EX(STB          %g2, [%o0] ASINORMAL,
929                         add %g0, 1)                             ! Store Group + bubble
930 1:      NORMAL_RETL
931 #endif  /* REGS_64BIT */
933 memcpy_noVIS_misaligned:
934         brz,pt                  %g2, 2f                         ! CTI   Group
935          mov                    8, %g1                          ! IEU0
936         sub                     %g1, %g2, %g2                   ! IEU0  Group
937         sub                     %o2, %g2, %o2                   ! IEU0  Group
938 1:      ASI_SETSRC_NOBLK                                        ! LSU   Group
939         EX(LDUB                 [%o1] ASINORMAL, %g5,
940                                 add %o2, %g2)                   ! Load  Group
941         add                     %o1, 1, %o1                     ! IEU0
942         add                     %o0, 1, %o0                     ! IEU1
943         ASI_SETDST_NOBLK                                        ! LSU   Group
944         subcc                   %g2, 1, %g2                     ! IEU1  Group
945         bne,pt                  %xcc, 1b                        ! CTI
946          EX2(STB                %g5, [%o0 - 1] ASINORMAL,
947                                 add %o2, %g2, %o2,
948                                 add %o2, 1)                     ! Store
950 #ifdef __KERNEL__
951         VISEntry
952 #endif
953         andn                    %o2, 7, %g5                     ! IEU0  Group
954         and                     %o2, 7, %o2                     ! IEU1
955         fmovd                   %f0, %f2                        ! FPU
956         ASI_SETSRC_NOBLK                                        ! LSU   Group
957         alignaddr               %o1, %g0, %g1                   ! GRU   Group
958         EXO2(LDDF               [%g1] ASINORMAL, %f4)           ! Load  Group
959 1:      EX(LDDF                 [%g1 + 0x8] ASINORMAL, %f6,
960                                 add %o2, %g5)                   ! Load  Group
961         add                     %g1, 0x8, %g1                   ! IEU0  Group
962         subcc                   %g5, 8, %g5                     ! IEU1
963         ASI_SETDST_NOBLK                                        ! LSU   Group
964         faligndata              %f4, %f6, %f0                   ! GRU   Group
965         EX2(STDF                %f0, [%o0] ASINORMAL,
966                                 add %o2, %g5, %o2,
967                                 add %o2, 8)                     ! Store
968         add                     %o1, 8, %o1                     ! IEU0  Group
969         be,pn                   %xcc, end_cruft                 ! CTI
970          add                    %o0, 8, %o0                     ! IEU1
971         ASI_SETSRC_NOBLK                                        ! LSU   Group
972         EX(LDDF                 [%g1 + 0x8] ASINORMAL, %f4,
973                                 add %o2, %g5)                   ! Load  Group
974         add                     %g1, 8, %g1                     ! IEU0
975         subcc                   %g5, 8, %g5                     ! IEU1
976         ASI_SETDST_NOBLK                                        ! LSU   Group
977         faligndata              %f6, %f4, %f0                   ! GRU   Group
978         EX2(STDF                %f0, [%o0] ASINORMAL,
979                                 add %o2, %g5, %o2,
980                                 add %o2, 8)                     ! Store
981         add                     %o1, 8, %o1                     ! IEU0
982         ASI_SETSRC_NOBLK                                        ! LSU   Group
983         bne,pn                  %xcc, 1b                        ! CTI   Group
984          add                    %o0, 8, %o0                     ! IEU0
985 end_cruft:
986         brz,pn                  %o2, fpu_retl                   ! CTI   Group
987 #ifndef __KERNEL__
988          nop                                                    ! IEU0
989 #else
990          ASI_SETSRC_NOBLK                                       ! LSU   Group
991 #endif
992         EXO2(LDUB       [%o1] ASINORMAL, %g5)                   ! LOAD
993         add             %o1, 1, %o1                             ! IEU0
994         add             %o0, 1, %o0                             ! IEU1
995         ASI_SETDST_NOBLK                                        ! LSU   Group
996         subcc           %o2, 1, %o2                             ! IEU1
997         bne,pt          %xcc, vis_slp                           ! CTI
998          EX(STB         %g5, [%o0 - 1] ASINORMAL,
999                         add %o2, 1)                             ! Store Group
1000 fpu_retl:
1001         FPU_RETL
1003 #ifdef __KERNEL__
1004                 .section        .fixup
1005                 .align          4
1006 VIScopyfixup_reto2:
1007                 mov             %o2, %o1
1008 VIScopyfixup_ret:
1009                 /* If this is copy_from_user(), zero out the rest of the
1010                  * kernel buffer.
1011                  */
1012                 ldub            [%g6 + AOFF_task_thread + AOFF_thread_current_ds], %o4
1013                 andcc           asi_src, 0x1, %g0
1014                 be,pt           %icc, 1f
1015                  VISExit
1016                 andcc           asi_dest, 0x1, %g0
1017                 bne,pn          %icc, 1f
1018                  nop
1019                 save            %sp, -160, %sp
1020                 mov             %i0, %o0
1021                 call            __bzero
1022                  mov            %i1, %o1
1023                 restore
1024 1:              mov             %o1, %o0
1025                 retl
1026                  wr             %o4, %g0, %asi
1027 VIScopyfixup1:  subcc           %g2, 18, %g2
1028                 add             %o0, 32, %o0
1029                 bgeu,a,pt       %icc, VIScopyfixup1
1030                  sub            %g7, 32, %g7
1031                 sub             %o0, 32, %o0
1032                 rd              %pc, %g5
1033                 add             %g2, (18 + 16), %g2
1034                 ldub            [%g5 + %g2], %g2
1035                 ba,a,pt         %xcc, 2f
1036 .byte           0, 0, 0, 0, 0, 0, 0, 4, 4, 8, 12, 12, 16, 20, 20, 24, 28, 28
1037                 .align          4
1038 VIScopyfixup2:  mov             (7 * 16), %g7
1039 1:              subcc           %g2, 10, %g2
1040                 bgeu,a,pt       %icc, 1b
1041                  sub            %g7, 16, %g7
1042                 sub             %o0, %g7, %o0
1043                 rd              %pc, %g5
1044                 add             %g2, (10 + 16), %g2
1045                 ldub            [%g5 + %g2], %g2
1046                 ba,a,pt         %xcc, 4f
1047 .byte           0, 0, 0, 0, 0, 4, 4, 8, 12, 12
1048                 .align          4
1049 VIScopyfixup3:  subcc           %g2, 10, %g2
1050                 add             %o0, 32, %o0
1051                 bgeu,a,pt       %icc, VIScopyfixup3
1052                  sub            %g7, 32, %g7
1053                 sub             %o0, 32, %o0
1054                 rd              %pc, %g5
1055                 add             %g2, (10 + 16), %g2
1056                 ldub            [%g5 + %g2], %g2
1057                 ba,a,pt         %xcc, 2f
1058 .byte           0, 0, 0, 0, 0, 0, 0, 8, 16, 24
1059                 .align          4
1060 2:              and             %o2, 0x7f, %o2
1061                 sub             %g7, %g2, %g7
1062                 ba,pt           %xcc, VIScopyfixup_ret
1063                  add            %g7, %o2, %o1
1064 VIScopyfixup4:  mov             (7 * 16), %g7
1065 3:              subcc           %g2, 6, %g2
1066                 bgeu,a,pt       %icc, 3b
1067                  sub            %g7, 16, %g7
1068                 sub             %o0, %g7, %o0
1069                 rd              %pc, %g5
1070                 add             %g2, (6 + 16), %g2
1071                 ldub            [%g5 + %g2], %g2
1072                 ba,a,pt         %xcc, 4f
1073 .byte           0, 0, 0, 0, 0, 8
1074                 .align          4
1075 4:              and             %o2, 0xf, %o2
1076                 sub             %g7, %g2, %g7
1077                 ba,pt           %xcc, VIScopyfixup_ret
1078                  add            %g7, %o2, %o1
1079 VIScopyfixup_vis3:
1080                 sub             %o2, 0x80, %o2
1081 VIScopyfixup_vis2:
1082                 add             %o2, 0x40, %o2
1083 VIScopyfixup_vis0:
1084                 add             %o2, 0x80, %o2
1085 VIScopyfixup_vis1:
1086                 add             %g7, %g3, %g7
1087                 ba,pt           %xcc, VIScopyfixup_ret
1088                  add            %o2, %g7, %o1
1089 VIScopyfixup_vis5:
1090                 add             %g3, 8, %g3
1091 VIScopyfixup_vis4:
1092                 add             %g3, 8, %g3
1093                 ba,pt           %xcc, VIScopyfixup_ret
1094                  add            %o2, %g3, %o1
1095 #endif
1097 #ifdef __KERNEL__
1098                 .text
1099                 .align          32
1101                 .globl          __memmove
1102                 .type           __memmove,@function
1104                 .globl          memmove
1105                 .type           memmove,@function
1107 memmove:
1108 __memmove:      cmp             %o0, %o1
1109                 blu,pt          %xcc, memcpy_private
1110                  sub            %o0, %o1, %g5
1111                 add             %o1, %o2, %g3
1112                 cmp             %g3, %o0
1113                 bleu,pt         %xcc, memcpy_private
1114                  add            %o1, %o2, %g5
1115                 add             %o0, %o2, %o5
1117                 sub             %g5, 1, %o1
1118                 sub             %o5, 1, %o0
1119 1:              ldub            [%o1], %g5
1120                 subcc           %o2, 1, %o2
1121                 sub             %o1, 1, %o1
1122                 stb             %g5, [%o0]
1123                 bne,pt          %icc, 1b
1124                  sub            %o0, 1, %o0
1126                 retl
1127                  clr            %o0
1128 #endif