libmpeg2/motion_comp_arm_s.S

   1 @ motion_comp_arm_s.S
   2 @ Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
   3 @
   4 @ This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
   5 @ See http://libmpeg2.sourceforge.net/ for updates.
   6 @
   7 @ mpeg2dec is free software; you can redistribute it and/or modify
   8 @ it under the terms of the GNU General Public License as published by
   9 @ the Free Software Foundation; either version 2 of the License, or
  10 @ (at your option) any later version.
  11 @
  12 @ mpeg2dec is distributed in the hope that it will be useful,
  13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 @ GNU General Public License for more details.
  16 @
  17 @ You should have received a copy of the GNU General Public License
  18 @ along with mpeg2dec; if not, write to the Free Software
  19 @ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20
  21
  22 #ifndef HAVE_PLD
  23 .macro pld reg
  24 .endm
  25 #endif
  26
  27         .text
  28
  29 @ ----------------------------------------------------------------
  30         .align
  31         .global MC_put_o_16_arm
  32 MC_put_o_16_arm:
  33         @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
  34         pld [r1]
  35         stmfd sp!, {r4-r11, lr} @ R14 is also called LR
  36         and r4, r1, #3
  37         adr r5, MC_put_o_16_arm_align_jt
  38         add r5, r5, r4, lsl #2
  39         ldr pc, [r5]
  40
  41 MC_put_o_16_arm_align0:
  42         ldmia r1, {r4-r7}
  43         add r1, r1, r2
  44         pld [r1]
  45         stmia r0, {r4-r7}
  46         subs r3, r3, #1
  47         add r0, r0, r2
  48         bne MC_put_o_16_arm_align0
  49         ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
  50
  51 .macro  PROC shift
  52         ldmia r1, {r4-r8}
  53         add r1, r1, r2
  54         mov r9, r4, lsr #(\shift)
  55         pld [r1]
  56         mov r10, r5, lsr #(\shift)
  57         orr r9, r9, r5, lsl #(32-\shift)
  58         mov r11, r6, lsr #(\shift)
  59         orr r10, r10, r6, lsl #(32-\shift)
  60         mov r12, r7, lsr #(\shift)
  61         orr r11, r11, r7, lsl #(32-\shift)
  62         orr r12, r12, r8, lsl #(32-\shift)
  63         stmia r0, {r9-r12}
  64         subs r3, r3, #1
  65         add r0, r0, r2
  66 .endm
  67
  68 MC_put_o_16_arm_align1:
  69         and r1, r1, #0xFFFFFFFC
  70 1:      PROC(8)
  71         bne 1b
  72         ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
  73 MC_put_o_16_arm_align2:
  74         and r1, r1, #0xFFFFFFFC
  75 1:      PROC(16)
  76         bne 1b
  77         ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
  78 MC_put_o_16_arm_align3:
  79         and r1, r1, #0xFFFFFFFC
  80 1:      PROC(24)
  81         bne 1b
  82         ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
  83 MC_put_o_16_arm_align_jt:
  84         .word MC_put_o_16_arm_align0
  85         .word MC_put_o_16_arm_align1
  86         .word MC_put_o_16_arm_align2
  87         .word MC_put_o_16_arm_align3
  88
  89 @ ----------------------------------------------------------------
  90         .align
  91         .global MC_put_o_8_arm
  92 MC_put_o_8_arm:
  93         @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
  94         pld [r1]
  95         stmfd sp!, {r4-r10, lr} @ R14 is also called LR
  96         and r4, r1, #3
  97         adr r5, MC_put_o_8_arm_align_jt
  98         add r5, r5, r4, lsl #2
  99         ldr pc, [r5]
 100 MC_put_o_8_arm_align0:
 101         ldmia r1, {r4-r5}
 102         add r1, r1, r2
 103         pld [r1]
 104         stmia r0, {r4-r5}
 105         add r0, r0, r2
 106         subs r3, r3, #1
 107         bne MC_put_o_8_arm_align0
 108         ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 109
 110 .macro  PROC8 shift
 111         ldmia r1, {r4-r6}
 112         add r1, r1, r2
 113         mov r9, r4, lsr #(\shift)
 114         pld [r1]
 115         mov r10, r5, lsr #(\shift)
 116         orr r9, r9, r5, lsl #(32-\shift)
 117         orr r10, r10, r6, lsl #(32-\shift)
 118         stmia r0, {r9-r10}
 119         subs r3, r3, #1
 120         add r0, r0, r2
 121 .endm
 122
 123 MC_put_o_8_arm_align1:
 124         and r1, r1, #0xFFFFFFFC
 125 1:      PROC8(8)
 126         bne 1b
 127         ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 128
 129 MC_put_o_8_arm_align2:
 130         and r1, r1, #0xFFFFFFFC
 131 1:      PROC8(16)
 132         bne 1b
 133         ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 134
 135 MC_put_o_8_arm_align3:
 136         and r1, r1, #0xFFFFFFFC
 137 1:      PROC8(24)
 138         bne 1b
 139         ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 140
 141 MC_put_o_8_arm_align_jt:
 142         .word MC_put_o_8_arm_align0
 143         .word MC_put_o_8_arm_align1
 144         .word MC_put_o_8_arm_align2
 145         .word MC_put_o_8_arm_align3
 146
 147 @ ----------------------------------------------------------------
 148 .macro  AVG_PW rW1, rW2
 149         mov \rW2, \rW2, lsl #24
 150         orr \rW2, \rW2, \rW1, lsr #8
 151         eor r9, \rW1, \rW2
 152         and \rW2, \rW1, \rW2
 153         and r10, r9, r12
 154         add \rW2, \rW2, r10, lsr #1
 155         and r10, r9, r11
 156         add \rW2, \rW2, r10
 157 .endm
 158
 159         .align
 160         .global MC_put_x_16_arm
 161 MC_put_x_16_arm:
 162         @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
 163         pld [r1]
 164         stmfd sp!, {r4-r11,lr} @ R14 is also called LR
 165         and r4, r1, #3
 166         adr r5, MC_put_x_16_arm_align_jt
 167         ldr r11, [r5]
 168         mvn r12, r11
 169         add r5, r5, r4, lsl #2
 170         ldr pc, [r5, #4]
 171
 172 .macro  ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4
 173         mov \R0, \R0, lsr #(\shift)
 174         orr \R0, \R0, \R1, lsl #(32 - \shift)
 175         mov \R1, \R1, lsr #(\shift)
 176         orr \R1, \R1, \R2, lsl #(32 - \shift)
 177         mov \R2, \R2, lsr #(\shift)
 178         orr \R2, \R2, \R3, lsl #(32 - \shift)
 179         mov \R3, \R3, lsr #(\shift)
 180         orr \R3, \R3, \R4, lsl #(32 - \shift)
 181         mov \R4, \R4, lsr #(\shift)
 182 @       and \R4, \R4, #0xFF
 183 .endm
 184
 185 MC_put_x_16_arm_align0:
 186         ldmia r1, {r4-r8}
 187         add r1, r1, r2
 188         pld [r1]
 189         AVG_PW r7, r8
 190         AVG_PW r6, r7
 191         AVG_PW r5, r6
 192         AVG_PW r4, r5
 193         stmia r0, {r5-r8}
 194         subs r3, r3, #1
 195         add r0, r0, r2
 196         bne MC_put_x_16_arm_align0
 197         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 198 MC_put_x_16_arm_align1:
 199         and r1, r1, #0xFFFFFFFC
 200 1:      ldmia r1, {r4-r8}
 201         add r1, r1, r2
 202         pld [r1]
 203         ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8
 204         AVG_PW r7, r8
 205         AVG_PW r6, r7
 206         AVG_PW r5, r6
 207         AVG_PW r4, r5
 208         stmia r0, {r5-r8}
 209         subs r3, r3, #1
 210         add r0, r0, r2
 211         bne 1b
 212         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 213 MC_put_x_16_arm_align2:
 214         and r1, r1, #0xFFFFFFFC
 215 1:      ldmia r1, {r4-r8}
 216         add r1, r1, r2
 217         pld [r1]
 218         ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8
 219         AVG_PW r7, r8
 220         AVG_PW r6, r7
 221         AVG_PW r5, r6
 222         AVG_PW r4, r5
 223         stmia r0, {r5-r8}
 224         subs r3, r3, #1
 225         add r0, r0, r2
 226         bne 1b
 227         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 228 MC_put_x_16_arm_align3:
 229         and r1, r1, #0xFFFFFFFC
 230 1:      ldmia r1, {r4-r8}
 231         add r1, r1, r2
 232         pld [r1]
 233         ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8
 234         AVG_PW r7, r8
 235         AVG_PW r6, r7
 236         AVG_PW r5, r6
 237         AVG_PW r4, r5
 238         stmia r0, {r5-r8}
 239         subs r3, r3, #1
 240         add r0, r0, r2
 241         bne 1b
 242         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 243 MC_put_x_16_arm_align_jt:
 244         .word 0x01010101
 245         .word MC_put_x_16_arm_align0
 246         .word MC_put_x_16_arm_align1
 247         .word MC_put_x_16_arm_align2
 248         .word MC_put_x_16_arm_align3
 249
 250 @ ----------------------------------------------------------------
 251         .align
 252         .global MC_put_x_8_arm
 253 MC_put_x_8_arm:
 254         @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
 255         pld [r1]
 256         stmfd sp!, {r4-r11,lr} @ R14 is also called LR
 257         and r4, r1, #3
 258         adr r5, MC_put_x_8_arm_align_jt
 259         ldr r11, [r5]
 260         mvn r12, r11
 261         add r5, r5, r4, lsl #2
 262         ldr pc, [r5, #4]
 263
 264 .macro  ADJ_ALIGN_DW shift, R0, R1, R2
 265         mov \R0, \R0, lsr #(\shift)
 266         orr \R0, \R0, \R1, lsl #(32 - \shift)
 267         mov \R1, \R1, lsr #(\shift)
 268         orr \R1, \R1, \R2, lsl #(32 - \shift)
 269         mov \R2, \R2, lsr #(\shift)
 270 @       and \R4, \R4, #0xFF
 271 .endm
 272
 273 MC_put_x_8_arm_align0:
 274         ldmia r1, {r4-r6}
 275         add r1, r1, r2
 276         pld [r1]
 277         AVG_PW r5, r6
 278         AVG_PW r4, r5
 279         stmia r0, {r5-r6}
 280         subs r3, r3, #1
 281         add r0, r0, r2
 282         bne MC_put_x_8_arm_align0
 283         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 284 MC_put_x_8_arm_align1:
 285         and r1, r1, #0xFFFFFFFC
 286 1:      ldmia r1, {r4-r6}
 287         add r1, r1, r2
 288         pld [r1]
 289         ADJ_ALIGN_DW 8, r4, r5, r6
 290         AVG_PW r5, r6
 291         AVG_PW r4, r5
 292         stmia r0, {r5-r6}
 293         subs r3, r3, #1
 294         add r0, r0, r2
 295         bne 1b
 296         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 297 MC_put_x_8_arm_align2:
 298         and r1, r1, #0xFFFFFFFC
 299 1:      ldmia r1, {r4-r6}
 300         add r1, r1, r2
 301         pld [r1]
 302         ADJ_ALIGN_DW 16, r4, r5, r6
 303         AVG_PW r5, r6
 304         AVG_PW r4, r5
 305         stmia r0, {r5-r6}
 306         subs r3, r3, #1
 307         add r0, r0, r2
 308         bne 1b
 309         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 310 MC_put_x_8_arm_align3:
 311         and r1, r1, #0xFFFFFFFC
 312 1:      ldmia r1, {r4-r6}
 313         add r1, r1, r2
 314         pld [r1]
 315         ADJ_ALIGN_DW 24, r4, r5, r6
 316         AVG_PW r5, r6
 317         AVG_PW r4, r5
 318         stmia r0, {r5-r6}
 319         subs r3, r3, #1
 320         add r0, r0, r2
 321         bne 1b
 322         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 323 MC_put_x_8_arm_align_jt:
 324         .word 0x01010101
 325         .word MC_put_x_8_arm_align0
 326         .word MC_put_x_8_arm_align1
 327         .word MC_put_x_8_arm_align2
 328         .word MC_put_x_8_arm_align3