sysdeps/sparc/sparc64/sparcv9v/memcpy.S

   1 /* Copy SIZE bytes from SRC to DEST.  For SUN4V Niagara.
   2    Copyright (C) 2006 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by David S. Miller (davem@davemloft.net)
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, write to the Free
  18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307 USA.  */
  20
  21 #include <sysdep.h>
  22
  23 #define ASI_BLK_INIT_QUAD_LDD_P 0xe2
  24 #define ASI_P                   0x80
  25 #define ASI_PNF                 0x82
  26
  27 #define LOAD(type,addr,dest)    type##a [addr] ASI_P, dest
  28 #define LOAD_TWIN(addr_reg,dest0,dest1) \
  29         ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0
  30
  31 #define STORE(type,src,addr)    type src, [addr]
  32 #define STORE_INIT(src,addr)    stxa src, [addr] %asi
  33
  34 #ifndef XCC
  35 #define USE_BPR
  36 #define XCC xcc
  37 #endif
  38
  39         .register       %g2,#scratch
  40         .register       %g3,#scratch
  41         .register       %g6,#scratch
  42
  43         .text
  44         .align          32
  45
  46 ENTRY(bcopy)
  47         sub             %o1, %o0, %o4
  48         mov             %o0, %g4
  49         cmp             %o4, %o2
  50         mov             %o1, %o0
  51         bgeu,pt         %XCC, 100f
  52          mov            %g4, %o1
  53 #ifndef USE_BPR
  54         srl             %o2, 0, %o2
  55 #endif
  56         brnz,pn         %o2, 220f
  57          add            %o0, %o2, %o0
  58         retl
  59          nop
  60 END(bcopy)
  61
  62         .align          32
  63 ENTRY(memcpy)
  64 100:    /* %o0=dst, %o1=src, %o2=len */
  65         mov             %o0, %g5
  66         cmp             %o2, 0
  67         be,pn           %XCC, 85f
  68 218:     or             %o0, %o1, %o3
  69         cmp             %o2, 16
  70         blu,a,pn        %XCC, 80f
  71          or             %o3, %o2, %o3
  72
  73         /* 2 blocks (128 bytes) is the minimum we can do the block
  74          * copy with.  We need to ensure that we'll iterate at least
  75          * once in the block copy loop.  At worst we'll need to align
  76          * the destination to a 64-byte boundary which can chew up
  77          * to (64 - 1) bytes from the length before we perform the
  78          * block copy loop.
  79          */
  80         cmp             %o2, (2 * 64)
  81         blu,pt          %XCC, 70f
  82          andcc          %o3, 0x7, %g0
  83
  84         /* %o0: dst
  85          * %o1: src
  86          * %o2: len  (known to be >= 128)
  87          *
  88          * The block copy loops will use %o4/%o5,%g2/%g3 as
  89          * temporaries while copying the data.
  90          */
  91
  92         LOAD(prefetch, %o1, #one_read)
  93         wr              %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
  94
  95         /* Align destination on 64-byte boundary.  */
  96         andcc           %o0, (64 - 1), %o4
  97         be,pt           %XCC, 2f
  98          sub            %o4, 64, %o4
  99         sub             %g0, %o4, %o4   ! bytes to align dst
 100         sub             %o2, %o4, %o2
 101 1:      subcc           %o4, 1, %o4
 102         LOAD(ldub, %o1, %g1)
 103         STORE(stb, %g1, %o0)
 104         add             %o1, 1, %o1
 105         bne,pt          %XCC, 1b
 106         add             %o0, 1, %o0
 107
 108         /* If the source is on a 16-byte boundary we can do
 109          * the direct block copy loop.  If it is 8-byte aligned
 110          * we can do the 16-byte loads offset by -8 bytes and the
 111          * init stores offset by one register.
 112          *
 113          * If the source is not even 8-byte aligned, we need to do
 114          * shifting and masking (basically integer faligndata).
 115          *
 116          * The careful bit with init stores is that if we store
 117          * to any part of the cache line we have to store the whole
 118          * cacheline else we can end up with corrupt L2 cache line
 119          * contents.  Since the loop works on 64-bytes of 64-byte
 120          * aligned store data at a time, this is easy to ensure.
 121          */
 122 2:
 123         andcc           %o1, (16 - 1), %o4
 124         andn            %o2, (64 - 1), %g1      ! block copy loop iterator
 125         sub             %o2, %g1, %o2           ! final sub-block copy bytes
 126         be,pt           %XCC, 50f
 127          cmp            %o4, 8
 128         be,a,pt         %XCC, 10f
 129          sub            %o1, 0x8, %o1
 130
 131         /* Neither 8-byte nor 16-byte aligned, shift and mask.  */
 132         mov             %g1, %o4
 133         and             %o1, 0x7, %g1
 134         sll             %g1, 3, %g1
 135         mov             64, %o3
 136         andn            %o1, 0x7, %o1
 137         LOAD(ldx, %o1, %g2)
 138         sub             %o3, %g1, %o3
 139         sllx            %g2, %g1, %g2
 140
 141 #define SWIVEL_ONE_DWORD(SRC, TMP1, TMP2, PRE_VAL, PRE_SHIFT, POST_SHIFT, DST)\
 142         LOAD(ldx, SRC, TMP1); \
 143         srlx            TMP1, PRE_SHIFT, TMP2; \
 144         or              TMP2, PRE_VAL, TMP2; \
 145         STORE_INIT(TMP2, DST); \
 146         sllx            TMP1, POST_SHIFT, PRE_VAL;
 147
 148 1:      add             %o1, 0x8, %o1
 149         SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x00)
 150         add             %o1, 0x8, %o1
 151         SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x08)
 152         add             %o1, 0x8, %o1
 153         SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x10)
 154         add             %o1, 0x8, %o1
 155         SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x18)
 156         add             %o1, 32, %o1
 157         LOAD(prefetch, %o1, #one_read)
 158         sub             %o1, 32 - 8, %o1
 159         SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x20)
 160         add             %o1, 8, %o1
 161         SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x28)
 162         add             %o1, 8, %o1
 163         SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x30)
 164         add             %o1, 8, %o1
 165         SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x38)
 166         subcc           %o4, 64, %o4
 167         bne,pt          %XCC, 1b
 168          add            %o0, 64, %o0
 169
 170 #undef SWIVEL_ONE_DWORD
 171
 172         srl             %g1, 3, %g1
 173         ba,pt           %XCC, 60f
 174          add            %o1, %g1, %o1
 175
 176 10:     /* Destination is 64-byte aligned, source was only 8-byte
 177          * aligned but it has been subtracted by 8 and we perform
 178          * one twin load ahead, then add 8 back into source when
 179          * we finish the loop.
 180          */
 181         LOAD_TWIN(%o1, %o4, %o5)
 182 1:      add             %o1, 16, %o1
 183         LOAD_TWIN(%o1, %g2, %g3)
 184         add             %o1, 16 + 32, %o1
 185         LOAD(prefetch, %o1, #one_read)
 186         sub             %o1, 32, %o1
 187         STORE_INIT(%o5, %o0 + 0x00)             ! initializes cache line
 188         STORE_INIT(%g2, %o0 + 0x08)
 189         LOAD_TWIN(%o1, %o4, %o5)
 190         add             %o1, 16, %o1
 191         STORE_INIT(%g3, %o0 + 0x10)
 192         STORE_INIT(%o4, %o0 + 0x18)
 193         LOAD_TWIN(%o1, %g2, %g3)
 194         add             %o1, 16, %o1
 195         STORE_INIT(%o5, %o0 + 0x20)
 196         STORE_INIT(%g2, %o0 + 0x28)
 197         LOAD_TWIN(%o1, %o4, %o5)
 198         STORE_INIT(%g3, %o0 + 0x30)
 199         STORE_INIT(%o4, %o0 + 0x38)
 200         subcc           %g1, 64, %g1
 201         bne,pt          %XCC, 1b
 202          add            %o0, 64, %o0
 203
 204         ba,pt           %XCC, 60f
 205          add            %o1, 0x8, %o1
 206
 207 50:     /* Destination is 64-byte aligned, and source is 16-byte
 208          * aligned.
 209          */
 210 1:      LOAD_TWIN(%o1, %o4, %o5)
 211         add     %o1, 16, %o1
 212         LOAD_TWIN(%o1, %g2, %g3)
 213         add     %o1, 16 + 32, %o1
 214         LOAD(prefetch, %o1, #one_read)
 215         sub     %o1, 32, %o1
 216         STORE_INIT(%o4, %o0 + 0x00)             ! initializes cache line
 217         STORE_INIT(%o5, %o0 + 0x08)
 218         LOAD_TWIN(%o1, %o4, %o5)
 219         add     %o1, 16, %o1
 220         STORE_INIT(%g2, %o0 + 0x10)
 221         STORE_INIT(%g3, %o0 + 0x18)
 222         LOAD_TWIN(%o1, %g2, %g3)
 223         add     %o1, 16, %o1
 224         STORE_INIT(%o4, %o0 + 0x20)
 225         STORE_INIT(%o5, %o0 + 0x28)
 226         STORE_INIT(%g2, %o0 + 0x30)
 227         STORE_INIT(%g3, %o0 + 0x38)
 228         subcc   %g1, 64, %g1
 229         bne,pt  %XCC, 1b
 230          add    %o0, 64, %o0
 231         /* fall through */
 232
 233 60:
 234         /* %o2 contains any final bytes still needed to be copied
 235          * over. If anything is left, we copy it one byte at a time.
 236          */
 237         wr              %g0, ASI_PNF, %asi
 238         brz,pt          %o2, 85f
 239          sub            %o0, %o1, %o3
 240         ba,a,pt         %XCC, 90f
 241
 242         .align          64
 243 70: /* 16 < len <= 64 */
 244         bne,pn          %XCC, 75f
 245          sub            %o0, %o1, %o3
 246
 247 72:
 248         andn            %o2, 0xf, %o4
 249         and             %o2, 0xf, %o2
 250 1:      subcc           %o4, 0x10, %o4
 251         LOAD(ldx, %o1, %o5)
 252         add             %o1, 0x08, %o1
 253         LOAD(ldx, %o1, %g1)
 254         sub             %o1, 0x08, %o1
 255         STORE(stx, %o5, %o1 + %o3)
 256         add             %o1, 0x8, %o1
 257         STORE(stx, %g1, %o1 + %o3)
 258         bgu,pt          %XCC, 1b
 259          add            %o1, 0x8, %o1
 260 73:     andcc           %o2, 0x8, %g0
 261         be,pt           %XCC, 1f
 262          nop
 263         sub             %o2, 0x8, %o2
 264         LOAD(ldx, %o1, %o5)
 265         STORE(stx, %o5, %o1 + %o3)
 266         add             %o1, 0x8, %o1
 267 1:      andcc           %o2, 0x4, %g0
 268         be,pt           %XCC, 1f
 269          nop
 270         sub             %o2, 0x4, %o2
 271         LOAD(lduw, %o1, %o5)
 272         STORE(stw, %o5, %o1 + %o3)
 273         add             %o1, 0x4, %o1
 274 1:      cmp             %o2, 0
 275         be,pt           %XCC, 85f
 276          nop
 277         ba,pt           %XCC, 90f
 278          nop
 279
 280 75:
 281         andcc           %o0, 0x7, %g1
 282         sub             %g1, 0x8, %g1
 283         be,pn           %icc, 2f
 284          sub            %g0, %g1, %g1
 285         sub             %o2, %g1, %o2
 286
 287 1:      subcc           %g1, 1, %g1
 288         LOAD(ldub, %o1, %o5)
 289         STORE(stb, %o5, %o1 + %o3)
 290         bgu,pt          %icc, 1b
 291          add            %o1, 1, %o1
 292
 293 2:      add             %o1, %o3, %o0
 294         andcc           %o1, 0x7, %g1
 295         bne,pt          %icc, 8f
 296          sll            %g1, 3, %g1
 297
 298         cmp             %o2, 16
 299         bgeu,pt         %icc, 72b
 300          nop
 301         ba,a,pt         %XCC, 73b
 302
 303 8:      mov             64, %o3
 304         andn            %o1, 0x7, %o1
 305         LOAD(ldx, %o1, %g2)
 306         sub             %o3, %g1, %o3
 307         andn            %o2, 0x7, %o4
 308         sllx            %g2, %g1, %g2
 309 1:      add             %o1, 0x8, %o1
 310         LOAD(ldx, %o1, %g3)
 311         subcc           %o4, 0x8, %o4
 312         srlx            %g3, %o3, %o5
 313         or              %o5, %g2, %o5
 314         STORE(stx, %o5, %o0)
 315         add             %o0, 0x8, %o0
 316         bgu,pt          %icc, 1b
 317          sllx           %g3, %g1, %g2
 318
 319         srl             %g1, 3, %g1
 320         andcc           %o2, 0x7, %o2
 321         be,pn           %icc, 85f
 322          add            %o1, %g1, %o1
 323         ba,pt           %XCC, 90f
 324          sub            %o0, %o1, %o3
 325
 326         .align          64
 327 80: /* 0 < len <= 16 */
 328         andcc           %o3, 0x3, %g0
 329         bne,pn          %XCC, 90f
 330          sub            %o0, %o1, %o3
 331
 332 1:
 333         subcc           %o2, 4, %o2
 334         LOAD(lduw, %o1, %g1)
 335         STORE(stw, %g1, %o1 + %o3)
 336         bgu,pt          %XCC, 1b
 337          add            %o1, 4, %o1
 338
 339 85:     retl
 340          mov            %g5, %o0
 341
 342         .align          32
 343 90:
 344         subcc           %o2, 1, %o2
 345         LOAD(ldub, %o1, %g1)
 346         STORE(stb, %g1, %o1 + %o3)
 347         bgu,pt          %XCC, 90b
 348          add            %o1, 1, %o1
 349         retl
 350          mov            %g5, %o0
 351
 352 END(memcpy)
 353
 354 #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)        \
 355         ldx             [%src - offset - 0x20], %t0;            \
 356         ldx             [%src - offset - 0x18], %t1;            \
 357         ldx             [%src - offset - 0x10], %t2;            \
 358         ldx             [%src - offset - 0x08], %t3;            \
 359         stw             %t0, [%dst - offset - 0x1c];            \
 360         srlx            %t0, 32, %t0;                           \
 361         stw             %t0, [%dst - offset - 0x20];            \
 362         stw             %t1, [%dst - offset - 0x14];            \
 363         srlx            %t1, 32, %t1;                           \
 364         stw             %t1, [%dst - offset - 0x18];            \
 365         stw             %t2, [%dst - offset - 0x0c];            \
 366         srlx            %t2, 32, %t2;                           \
 367         stw             %t2, [%dst - offset - 0x10];            \
 368         stw             %t3, [%dst - offset - 0x04];            \
 369         srlx            %t3, 32, %t3;                           \
 370         stw             %t3, [%dst - offset - 0x08];
 371
 372 #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)   \
 373         ldx             [%src - offset - 0x20], %t0;            \
 374         ldx             [%src - offset - 0x18], %t1;            \
 375         ldx             [%src - offset - 0x10], %t2;            \
 376         ldx             [%src - offset - 0x08], %t3;            \
 377         stx             %t0, [%dst - offset - 0x20];            \
 378         stx             %t1, [%dst - offset - 0x18];            \
 379         stx             %t2, [%dst - offset - 0x10];            \
 380         stx             %t3, [%dst - offset - 0x08];            \
 381         ldx             [%src - offset - 0x40], %t0;            \
 382         ldx             [%src - offset - 0x38], %t1;            \
 383         ldx             [%src - offset - 0x30], %t2;            \
 384         ldx             [%src - offset - 0x28], %t3;            \
 385         stx             %t0, [%dst - offset - 0x40];            \
 386         stx             %t1, [%dst - offset - 0x38];            \
 387         stx             %t2, [%dst - offset - 0x30];            \
 388         stx             %t3, [%dst - offset - 0x28];
 389
 390 #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)       \
 391         ldx             [%src + offset + 0x00], %t0;            \
 392         ldx             [%src + offset + 0x08], %t1;            \
 393         stw             %t0, [%dst + offset + 0x04];            \
 394         srlx            %t0, 32, %t2;                           \
 395         stw             %t2, [%dst + offset + 0x00];            \
 396         stw             %t1, [%dst + offset + 0x0c];            \
 397         srlx            %t1, 32, %t3;                           \
 398         stw             %t3, [%dst + offset + 0x08];
 399
 400 #define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)          \
 401         ldx             [%src + offset + 0x00], %t0;            \
 402         ldx             [%src + offset + 0x08], %t1;            \
 403         stx             %t0, [%dst + offset + 0x00];            \
 404         stx             %t1, [%dst + offset + 0x08];
 405
 406         .align          32
 407 228:    andcc           %o2, 1, %g0
 408         be,pt           %icc, 2f+4
 409 1:       ldub           [%o1 - 1], %o5
 410         sub             %o1, 1, %o1
 411         sub             %o0, 1, %o0
 412         subcc           %o2, 1, %o2
 413         be,pn           %xcc, 229f
 414          stb            %o5, [%o0]
 415 2:      ldub            [%o1 - 1], %o5
 416         sub             %o0, 2, %o0
 417         ldub            [%o1 - 2], %g5
 418         sub             %o1, 2, %o1
 419         subcc           %o2, 2, %o2
 420         stb             %o5, [%o0 + 1]
 421         bne,pt          %xcc, 2b
 422          stb            %g5, [%o0]
 423 229:    retl
 424          mov            %g4, %o0
 425 out:    retl
 426          mov            %g5, %o0
 427
 428         .align          32
 429 ENTRY(memmove)
 430         mov             %o0, %g5
 431 #ifndef USE_BPR
 432         srl             %o2, 0, %o2
 433 #endif
 434         brz,pn          %o2, out
 435          sub            %o0, %o1, %o4
 436         cmp             %o4, %o2
 437         bgeu,pt         %XCC, 218b
 438          mov            %o0, %g4
 439         add             %o0, %o2, %o0
 440 220:    add             %o1, %o2, %o1
 441         cmp             %o2, 15
 442         bleu,pn         %xcc, 228b
 443          andcc          %o0, 7, %g2
 444         sub             %o0, %o1, %g5
 445         andcc           %g5, 3, %o5
 446         bne,pn          %xcc, 232f
 447          andcc          %o1, 3, %g0
 448         be,a,pt         %xcc, 236f
 449          andcc          %o1, 4, %g0
 450         andcc           %o1, 1, %g0
 451         be,pn           %xcc, 4f
 452          andcc          %o1, 2, %g0
 453         ldub            [%o1 - 1], %g2
 454         sub             %o1, 1, %o1
 455         sub             %o0, 1, %o0
 456         sub             %o2, 1, %o2
 457         be,pn           %xcc, 5f
 458          stb            %g2, [%o0]
 459 4:      lduh            [%o1 - 2], %g2
 460         sub             %o1, 2, %o1
 461         sub             %o0, 2, %o0
 462         sub             %o2, 2, %o2
 463         sth             %g2, [%o0]
 464 5:      andcc           %o1, 4, %g0
 465 236:    be,a,pn         %xcc, 2f
 466          andcc          %o2, -128, %g6
 467         lduw            [%o1 - 4], %g5
 468         sub             %o1, 4, %o1
 469         sub             %o0, 4, %o0
 470         sub             %o2, 4, %o2
 471         stw             %g5, [%o0]
 472         andcc           %o2, -128, %g6
 473 2:      be,pn           %xcc, 235f
 474          andcc          %o0, 4, %g0
 475         be,pn           %xcc, 282f + 4
 476 5:      RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
 477         RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
 478         RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
 479         RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
 480         subcc           %g6, 128, %g6
 481         sub             %o1, 128, %o1
 482         bne,pt          %xcc, 5b
 483          sub            %o0, 128, %o0
 484 235:    andcc           %o2, 0x70, %g6
 485 41:     be,pn           %xcc, 280f
 486          andcc          %o2, 8, %g0
 487
 488 279:    rd              %pc, %o5
 489         sll             %g6, 1, %g5
 490         sub             %o1, %g6, %o1
 491         sub             %o5, %g5, %o5
 492         jmpl            %o5 + %lo(280f - 279b), %g0
 493          sub            %o0, %g6, %o0
 494         RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
 495         RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
 496         RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
 497         RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
 498         RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
 499         RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
 500         RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
 501 280:    be,pt           %xcc, 281f
 502          andcc          %o2, 4, %g0
 503         ldx             [%o1 - 8], %g2
 504         sub             %o0, 8, %o0
 505         stw             %g2, [%o0 + 4]
 506         sub             %o1, 8, %o1
 507         srlx            %g2, 32, %g2
 508         stw             %g2, [%o0]
 509 281:    be,pt           %xcc, 1f
 510          andcc          %o2, 2, %g0
 511         lduw            [%o1 - 4], %g2
 512         sub             %o1, 4, %o1
 513         stw             %g2, [%o0 - 4]
 514         sub             %o0, 4, %o0
 515 1:      be,pt           %xcc, 1f
 516          andcc          %o2, 1, %g0
 517         lduh            [%o1 - 2], %g2
 518         sub             %o1, 2, %o1
 519         sth             %g2, [%o0 - 2]
 520         sub             %o0, 2, %o0
 521 1:      be,pt           %xcc, 211f
 522          nop
 523         ldub            [%o1 - 1], %g2
 524         stb             %g2, [%o0 - 1]
 525 211:    retl
 526          mov            %g4, %o0
 527
 528 282:    RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
 529         RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
 530         subcc           %g6, 128, %g6
 531         sub             %o1, 128, %o1
 532         bne,pt          %xcc, 282b
 533          sub            %o0, 128, %o0
 534         andcc           %o2, 0x70, %g6
 535         be,pn           %xcc, 284f
 536          andcc          %o2, 8, %g0
 537
 538 283:    rd              %pc, %o5
 539         sub             %o1, %g6, %o1
 540         sub             %o5, %g6, %o5
 541         jmpl            %o5 + %lo(284f - 283b), %g0
 542          sub            %o0, %g6, %o0
 543         RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
 544         RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
 545         RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
 546         RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
 547         RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
 548         RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
 549         RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
 550 284:    be,pt           %xcc, 285f
 551          andcc          %o2, 4, %g0
 552         ldx             [%o1 - 8], %g2
 553         sub             %o0, 8, %o0
 554         sub             %o1, 8, %o1
 555         stx             %g2, [%o0]
 556 285:    be,pt           %xcc, 1f
 557          andcc          %o2, 2, %g0
 558         lduw            [%o1 - 4], %g2
 559         sub             %o0, 4, %o0
 560         sub             %o1, 4, %o1
 561         stw             %g2, [%o0]
 562 1:      be,pt           %xcc, 1f
 563          andcc          %o2, 1, %g0
 564         lduh            [%o1 - 2], %g2
 565         sub             %o0, 2, %o0
 566         sub             %o1, 2, %o1
 567         sth             %g2, [%o0]
 568 1:      be,pt           %xcc, 1f
 569          nop
 570         ldub            [%o1 - 1], %g2
 571         stb             %g2, [%o0 - 1]
 572 1:      retl
 573          mov            %g4, %o0
 574
 575 232:    ldub            [%o1 - 1], %g5
 576         sub             %o1, 1, %o1
 577         sub             %o0, 1, %o0
 578         subcc           %o2, 1, %o2
 579         bne,pt          %xcc, 232b
 580          stb            %g5, [%o0]
 581 234:    retl
 582          mov            %g4, %o0
 583 END(memmove)
 584
 585 #ifdef USE_BPR
 586 weak_alias (memcpy, __align_cpy_1)
 587 weak_alias (memcpy, __align_cpy_2)
 588 weak_alias (memcpy, __align_cpy_4)
 589 weak_alias (memcpy, __align_cpy_8)
 590 weak_alias (memcpy, __align_cpy_16)
 591 #endif
 592 libc_hidden_builtin_def (memcpy)
 593 libc_hidden_builtin_def (memmove)