apps/plugins/mpegplayer/motion_comp_arm_s.S

   1 @ motion_comp_arm_s.S
   2 @ Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
   3 @
   4 @ This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
   5 @ See http://libmpeg2.sourceforge.net/ for updates.
   6 @
   7 @ mpeg2dec is free software; you can redistribute it and/or modify
   8 @ it under the terms of the GNU General Public License as published by
   9 @ the Free Software Foundation; either version 2 of the License, or
  10 @ (at your option) any later version.
  11 @
  12 @ mpeg2dec is distributed in the hope that it will be useful,
  13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 @ GNU General Public License for more details.
  16 @
  17 @ You should have received a copy of the GNU General Public License
  18 @ along with this program; if not, write to the Free Software
  19 @ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20
  21         .text
  22
  23 @ ----------------------------------------------------------------
  24         .align
  25         .global MC_put_o_16_arm
  26 MC_put_o_16_arm:
  27         @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
  28         @@ pld [r1]
  29         stmfd sp!, {r4-r11, lr} @ R14 is also called LR
  30         and r4, r1, #3
  31         adr r5, MC_put_o_16_arm_align_jt
  32         add r5, r5, r4, lsl #2
  33         ldr pc, [r5]
  34
  35 MC_put_o_16_arm_align0:
  36         ldmia r1, {r4-r7}
  37         add r1, r1, r2
  38         @@ pld [r1]
  39         stmia r0, {r4-r7}
  40         subs r3, r3, #1
  41         add r0, r0, r2
  42         bne MC_put_o_16_arm_align0
  43         ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
  44
  45 .macro  PROC shift
  46         ldmia r1, {r4-r8}
  47         add r1, r1, r2
  48         mov r9, r4, lsr #(\shift)
  49         @@ pld [r1]
  50         mov r10, r5, lsr #(\shift)
  51         orr r9, r9, r5, lsl #(32-\shift)
  52         mov r11, r6, lsr #(\shift)
  53         orr r10, r10, r6, lsl #(32-\shift)
  54         mov r12, r7, lsr #(\shift)
  55         orr r11, r11, r7, lsl #(32-\shift)
  56         orr r12, r12, r8, lsl #(32-\shift)
  57         stmia r0, {r9-r12}
  58         subs r3, r3, #1
  59         add r0, r0, r2
  60 .endm
  61
  62 MC_put_o_16_arm_align1:
  63         and r1, r1, #0xFFFFFFFC
  64 1:      PROC(8)
  65         bne 1b
  66         ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
  67 MC_put_o_16_arm_align2:
  68         and r1, r1, #0xFFFFFFFC
  69 1:      PROC(16)
  70         bne 1b
  71         ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
  72 MC_put_o_16_arm_align3:
  73         and r1, r1, #0xFFFFFFFC
  74 1:      PROC(24)
  75         bne 1b
  76         ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
  77 MC_put_o_16_arm_align_jt:
  78         .word MC_put_o_16_arm_align0
  79         .word MC_put_o_16_arm_align1
  80         .word MC_put_o_16_arm_align2
  81         .word MC_put_o_16_arm_align3
  82
  83 @ ----------------------------------------------------------------
  84         .align
  85         .global MC_put_o_8_arm
  86 MC_put_o_8_arm:
  87         @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
  88         @@ pld [r1]
  89         stmfd sp!, {r4-r10, lr} @ R14 is also called LR
  90         and r4, r1, #3
  91         adr r5, MC_put_o_8_arm_align_jt
  92         add r5, r5, r4, lsl #2
  93         ldr pc, [r5]
  94 MC_put_o_8_arm_align0:
  95         ldmia r1, {r4-r5}
  96         add r1, r1, r2
  97         @@ pld [r1]
  98         stmia r0, {r4-r5}
  99         add r0, r0, r2
 100         subs r3, r3, #1
 101         bne MC_put_o_8_arm_align0
 102         ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 103
 104 .macro  PROC8 shift
 105         ldmia r1, {r4-r6}
 106         add r1, r1, r2
 107         mov r9, r4, lsr #(\shift)
 108         @@ pld [r1]
 109         mov r10, r5, lsr #(\shift)
 110         orr r9, r9, r5, lsl #(32-\shift)
 111         orr r10, r10, r6, lsl #(32-\shift)
 112         stmia r0, {r9-r10}
 113         subs r3, r3, #1
 114         add r0, r0, r2
 115 .endm
 116
 117 MC_put_o_8_arm_align1:
 118         and r1, r1, #0xFFFFFFFC
 119 1:      PROC8(8)
 120         bne 1b
 121         ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 122
 123 MC_put_o_8_arm_align2:
 124         and r1, r1, #0xFFFFFFFC
 125 1:      PROC8(16)
 126         bne 1b
 127         ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 128
 129 MC_put_o_8_arm_align3:
 130         and r1, r1, #0xFFFFFFFC
 131 1:      PROC8(24)
 132         bne 1b
 133         ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 134
 135 MC_put_o_8_arm_align_jt:
 136         .word MC_put_o_8_arm_align0
 137         .word MC_put_o_8_arm_align1
 138         .word MC_put_o_8_arm_align2
 139         .word MC_put_o_8_arm_align3
 140
 141 @ ----------------------------------------------------------------
 142 .macro  AVG_PW rW1, rW2
 143         mov \rW2, \rW2, lsl #24
 144         orr \rW2, \rW2, \rW1, lsr #8
 145         eor r9, \rW1, \rW2
 146         and \rW2, \rW1, \rW2
 147         and r10, r9, r12
 148         add \rW2, \rW2, r10, lsr #1
 149         and r10, r9, r11
 150         add \rW2, \rW2, r10
 151 .endm
 152
 153         .align
 154         .global MC_put_x_16_arm
 155 MC_put_x_16_arm:
 156         @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
 157         @@ pld [r1]
 158         stmfd sp!, {r4-r11,lr} @ R14 is also called LR
 159         and r4, r1, #3
 160         adr r5, MC_put_x_16_arm_align_jt
 161         ldr r11, [r5]
 162         mvn r12, r11
 163         add r5, r5, r4, lsl #2
 164         ldr pc, [r5, #4]
 165
 166 .macro  ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4
 167         mov \R0, \R0, lsr #(\shift)
 168         orr \R0, \R0, \R1, lsl #(32 - \shift)
 169         mov \R1, \R1, lsr #(\shift)
 170         orr \R1, \R1, \R2, lsl #(32 - \shift)
 171         mov \R2, \R2, lsr #(\shift)
 172         orr \R2, \R2, \R3, lsl #(32 - \shift)
 173         mov \R3, \R3, lsr #(\shift)
 174         orr \R3, \R3, \R4, lsl #(32 - \shift)
 175         mov \R4, \R4, lsr #(\shift)
 176 @       and \R4, \R4, #0xFF
 177 .endm
 178
 179 MC_put_x_16_arm_align0:
 180         ldmia r1, {r4-r8}
 181         add r1, r1, r2
 182         @@ pld [r1]
 183         AVG_PW r7, r8
 184         AVG_PW r6, r7
 185         AVG_PW r5, r6
 186         AVG_PW r4, r5
 187         stmia r0, {r5-r8}
 188         subs r3, r3, #1
 189         add r0, r0, r2
 190         bne MC_put_x_16_arm_align0
 191         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 192 MC_put_x_16_arm_align1:
 193         and r1, r1, #0xFFFFFFFC
 194 1:      ldmia r1, {r4-r8}
 195         add r1, r1, r2
 196         @@ pld [r1]
 197         ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8
 198         AVG_PW r7, r8
 199         AVG_PW r6, r7
 200         AVG_PW r5, r6
 201         AVG_PW r4, r5
 202         stmia r0, {r5-r8}
 203         subs r3, r3, #1
 204         add r0, r0, r2
 205         bne 1b
 206         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 207 MC_put_x_16_arm_align2:
 208         and r1, r1, #0xFFFFFFFC
 209 1:      ldmia r1, {r4-r8}
 210         add r1, r1, r2
 211         @@ pld [r1]
 212         ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8
 213         AVG_PW r7, r8
 214         AVG_PW r6, r7
 215         AVG_PW r5, r6
 216         AVG_PW r4, r5
 217         stmia r0, {r5-r8}
 218         subs r3, r3, #1
 219         add r0, r0, r2
 220         bne 1b
 221         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 222 MC_put_x_16_arm_align3:
 223         and r1, r1, #0xFFFFFFFC
 224 1:      ldmia r1, {r4-r8}
 225         add r1, r1, r2
 226         @@ pld [r1]
 227         ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8
 228         AVG_PW r7, r8
 229         AVG_PW r6, r7
 230         AVG_PW r5, r6
 231         AVG_PW r4, r5
 232         stmia r0, {r5-r8}
 233         subs r3, r3, #1
 234         add r0, r0, r2
 235         bne 1b
 236         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 237 MC_put_x_16_arm_align_jt:
 238         .word 0x01010101
 239         .word MC_put_x_16_arm_align0
 240         .word MC_put_x_16_arm_align1
 241         .word MC_put_x_16_arm_align2
 242         .word MC_put_x_16_arm_align3
 243
 244 @ ----------------------------------------------------------------
 245         .align
 246         .global MC_put_x_8_arm
 247 MC_put_x_8_arm:
 248         @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
 249         @@ pld [r1]
 250         stmfd sp!, {r4-r11,lr} @ R14 is also called LR
 251         and r4, r1, #3
 252         adr r5, MC_put_x_8_arm_align_jt
 253         ldr r11, [r5]
 254         mvn r12, r11
 255         add r5, r5, r4, lsl #2
 256         ldr pc, [r5, #4]
 257
 258 .macro  ADJ_ALIGN_DW shift, R0, R1, R2
 259         mov \R0, \R0, lsr #(\shift)
 260         orr \R0, \R0, \R1, lsl #(32 - \shift)
 261         mov \R1, \R1, lsr #(\shift)
 262         orr \R1, \R1, \R2, lsl #(32 - \shift)
 263         mov \R2, \R2, lsr #(\shift)
 264 @       and \R4, \R4, #0xFF
 265 .endm
 266
 267 MC_put_x_8_arm_align0:
 268         ldmia r1, {r4-r6}
 269         add r1, r1, r2
 270         @@ pld [r1]
 271         AVG_PW r5, r6
 272         AVG_PW r4, r5
 273         stmia r0, {r5-r6}
 274         subs r3, r3, #1
 275         add r0, r0, r2
 276         bne MC_put_x_8_arm_align0
 277         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 278 MC_put_x_8_arm_align1:
 279         and r1, r1, #0xFFFFFFFC
 280 1:      ldmia r1, {r4-r6}
 281         add r1, r1, r2
 282         @@ pld [r1]
 283         ADJ_ALIGN_DW 8, r4, r5, r6
 284         AVG_PW r5, r6
 285         AVG_PW r4, r5
 286         stmia r0, {r5-r6}
 287         subs r3, r3, #1
 288         add r0, r0, r2
 289         bne 1b
 290         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 291 MC_put_x_8_arm_align2:
 292         and r1, r1, #0xFFFFFFFC
 293 1:      ldmia r1, {r4-r6}
 294         add r1, r1, r2
 295         @@ pld [r1]
 296         ADJ_ALIGN_DW 16, r4, r5, r6
 297         AVG_PW r5, r6
 298         AVG_PW r4, r5
 299         stmia r0, {r5-r6}
 300         subs r3, r3, #1
 301         add r0, r0, r2
 302         bne 1b
 303         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 304 MC_put_x_8_arm_align3:
 305         and r1, r1, #0xFFFFFFFC
 306 1:      ldmia r1, {r4-r6}
 307         add r1, r1, r2
 308         @@ pld [r1]
 309         ADJ_ALIGN_DW 24, r4, r5, r6
 310         AVG_PW r5, r6
 311         AVG_PW r4, r5
 312         stmia r0, {r5-r6}
 313         subs r3, r3, #1
 314         add r0, r0, r2
 315         bne 1b
 316         ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 317 MC_put_x_8_arm_align_jt:
 318         .word 0x01010101
 319         .word MC_put_x_8_arm_align0
 320         .word MC_put_x_8_arm_align1
 321         .word MC_put_x_8_arm_align2
 322         .word MC_put_x_8_arm_align3