arch/arm/lib/memset.S

   1 /*
   2  *  linux/arch/arm/lib/memset.S
   3  *
   4  *  Copyright (C) 1995-2000 Russell King
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 as
   8  * published by the Free Software Foundation.
   9  *
  10  *  ASM optimised string functions
  11  */
  12 #include <linux/linkage.h>
  13 #include <asm/assembler.h>
  14
  15         .text
  16         .align  5
  17         .word   0
  18
  19 1:      subs    r2, r2, #4              @ 1 do we have enough
  20         blt     5f                      @ 1 bytes to align with?
  21         cmp     r3, #2                  @ 1
  22         strltb  r1, [r0], #1            @ 1
  23         strleb  r1, [r0], #1            @ 1
  24         strb    r1, [r0], #1            @ 1
  25         add     r2, r2, r3              @ 1 (r2 = r2 - (4 - r3))
  26 /*
  27  * The pointer is now aligned and the length is adjusted.  Try doing the
  28  * memset again.
  29  */
  30
  31 ENTRY(memset)
  32         ands    r3, r0, #3              @ 1 unaligned?
  33         bne     1b                      @ 1
  34 /*
  35  * we know that the pointer in r0 is aligned to a word boundary.
  36  */
  37         orr     r1, r1, r1, lsl #8
  38         orr     r1, r1, r1, lsl #16
  39         mov     r3, r1
  40         cmp     r2, #16
  41         blt     4f
  42
  43 #if ! CALGN(1)+0
  44
  45 /*
  46  * We need an extra register for this loop - save the return address and
  47  * use the LR
  48  */
  49         str     lr, [sp, #-4]!
  50         mov     ip, r1
  51         mov     lr, r1
  52
  53 2:      subs    r2, r2, #64
  54         stmgeia r0!, {r1, r3, ip, lr}   @ 64 bytes at a time.
  55         stmgeia r0!, {r1, r3, ip, lr}
  56         stmgeia r0!, {r1, r3, ip, lr}
  57         stmgeia r0!, {r1, r3, ip, lr}
  58         bgt     2b
  59         ldmeqfd sp!, {pc}               @ Now <64 bytes to go.
  60 /*
  61  * No need to correct the count; we're only testing bits from now on
  62  */
  63         tst     r2, #32
  64         stmneia r0!, {r1, r3, ip, lr}
  65         stmneia r0!, {r1, r3, ip, lr}
  66         tst     r2, #16
  67         stmneia r0!, {r1, r3, ip, lr}
  68         ldr     lr, [sp], #4
  69
  70 #else
  71
  72 /*
  73  * This version aligns the destination pointer in order to write
  74  * whole cache lines at once.
  75  */
  76
  77         stmfd   sp!, {r4-r7, lr}
  78         mov     r4, r1
  79         mov     r5, r1
  80         mov     r6, r1
  81         mov     r7, r1
  82         mov     ip, r1
  83         mov     lr, r1
  84
  85         cmp     r2, #96
  86         tstgt   r0, #31
  87         ble     3f
  88
  89         and     ip, r0, #31
  90         rsb     ip, ip, #32
  91         sub     r2, r2, ip
  92         movs    ip, ip, lsl #(32 - 4)
  93         stmcsia r0!, {r4, r5, r6, r7}
  94         stmmiia r0!, {r4, r5}
  95         tst     ip, #(1 << 30)
  96         mov     ip, r1
  97         strne   r1, [r0], #4
  98
  99 3:      subs    r2, r2, #64
 100         stmgeia r0!, {r1, r3-r7, ip, lr}
 101         stmgeia r0!, {r1, r3-r7, ip, lr}
 102         bgt     3b
 103         ldmeqfd sp!, {r4-r7, pc}
 104
 105         tst     r2, #32
 106         stmneia r0!, {r1, r3-r7, ip, lr}
 107         tst     r2, #16
 108         stmneia r0!, {r4-r7}
 109         ldmfd   sp!, {r4-r7, lr}
 110
 111 #endif
 112
 113 4:      tst     r2, #8
 114         stmneia r0!, {r1, r3}
 115         tst     r2, #4
 116         strne   r1, [r0], #4
 117 /*
 118  * When we get here, we've got less than 4 bytes to zero.  We
 119  * may have an unaligned pointer as well.
 120  */
 121 5:      tst     r2, #2
 122         strneb  r1, [r0], #1
 123         strneb  r1, [r0], #1
 124         tst     r2, #1
 125         strneb  r1, [r0], #1
 126         mov     pc, lr
 127 ENDPROC(memset)