Branch optimisation in both C (giving hints to gcc - verified using -fprofile-arcs...
[kugel-rb.git] / apps / codecs / demac / libdemac / predictor-arm.S
blobd62b6186f8d8f29728b99dccc6718964ed9a1707
1 /*
3 libdemac - A Monkey's Audio decoder
5 $Id$
7 Copyright (C) Dave Chapman 2007
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24 #include "demac_config.h"
26     .section    ICODE_SECTION_DEMAC_ARM,"ax",%progbits
28     .align      2
30     .global     predictor_decode_stereo
31     .type       predictor_decode_stereo,%function
34 /* NOTE: The following need to be kept in sync with parser.h */
36 #define YDELAYA        200
37 #define YDELAYB        168
38 #define XDELAYA        136
39 #define XDELAYB        104
40 #define YADAPTCOEFFSA   72
41 #define XADAPTCOEFFSA   56
42 #define YADAPTCOEFFSB   40
43 #define XADAPTCOEFFSB   20
45 /* struct predictor_t members: */
46 #define buf              0    /* int32_t* buf */
48 #define YlastA           4    /* int32_t YlastA; */
49 #define XlastA           8    /* int32_t XlastA; */
51 #define YfilterB        12    /* int32_t YfilterB; */
52 #define XfilterA        16    /* int32_t XfilterA; */
54 #define XfilterB        20    /* int32_t XfilterB; */
55 #define YfilterA        24    /* int32_t YfilterA; */
56         
57 #define YcoeffsA        28    /* int32_t YcoeffsA[4]; */
58 #define XcoeffsA        44    /* int32_t XcoeffsA[4]; */
59 #define YcoeffsB        60    /* int32_t YcoeffsB[5]; */
60 #define XcoeffsB        80    /* int32_t XcoeffsB[5]; */
62 #define historybuffer  100    /* int32_t historybuffer[] */
64 @ Register usage:
66 @ r0-r11 - scratch
67 @ r12 - struct predictor_t* p
68 @ r14 - int32_t* p->buf
70 @ void predictor_decode_stereo(struct predictor_t* p,
71 @                              int32_t* decoded0,
72 @                              int32_t* decoded1,
73 @                              int count)
75 predictor_decode_stereo:
76     stmdb   sp!, {r1-r11, lr}
78     @ r1 (decoded0) is [sp]
79     @ r2 (decoded1) is [sp, #4]
80     @ r3 (count)    is [sp, #8]
82     mov     r12, r0       @ r12 := p
83     ldr     r14, [r0]     @ r14 := p->buf
85 loop:
87 @@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR Y
89 @ Predictor Y, Filter A
91     ldr     r10, [r12, #YlastA]     @ r10 := p->YlastA
92     add     r11, r14, #YDELAYA-12   @ r11 := &p->buf[YDELAYA-3]
94     ldmia   r11, { r2 - r4 }        @ r2 := p->buf[YDELAYA-3]
95                                     @ r3 := p->buf[YDELAYA-2]
96                                     @ r4 := p->buf[YDELAYA-1]
98     subs    r4, r10, r4             @ r4 := r10 - r4
100     add     r1, r12, #YcoeffsA
101     ldmia   r1, {r6 - r9}           @ r6 := p->YcoeffsA[0]
102                                     @ r7 := p->YcoeffsA[1]
103                                     @ r8 := p->YcoeffsA[2]
104                                     @ r9 := p->YcoeffsA[3]
106     mul     r0, r10, r6             @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0]
107     mla     r0, r4, r7, r0          @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
108     mla     r0, r3, r8, r0          @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
109     mla     r0, r2, r9, r0          @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
111     add     r11, r14, #YDELAYA-4
112     stmia   r11, { r4, r10 }        @ p->buf[YDELAYA-1] = r4
113                                     @ p->buf[YDELAYA] = r10
115     @ flags were set above, in the subs instruction
116     mvngt   r4, #0
117     movlt   r4, #1                  @ r4 := SIGN(r4) (see .c for SIGN macro)
119     cmp     r10, #0
120     mvngt   r10, #0
121     movlt   r10, #1                 @ r10 := SIGN(r10) (see .c for SIGN macro)
123     add     r1, r14, #YADAPTCOEFFSA-4
124     stmia   r1, {r4, r10}           @ p->buf[YADAPTCOEFFSA-1] := r4
125                                     @ p->buf[YADAPTCOEFFSA] := r10
127     @ NOTE: r0 now contains predictionA - don't overwrite.
129 @ Predictor Y, Filter B
131     add     r2, r12, #YfilterB
132     ldmia   r2, {r2, r11}           @ r2 := p->YfilterB
133                                     @ r11 := p->XfilterA
135     rsb     r2, r2, r2, lsl #5      @ r2 := r2 * 32 - r2 ( == r2*31)
136     sub     r10, r11, r2, asr #5    @ r10 (p->buf[YDELAYB]) := r11 - (r2 >> 5)
138     str     r11, [r12, #YfilterB]   @ p->YfilterB := r11 (p->XfilterA)
140     add     r11, r14, #YDELAYB-16   @ r11 := &p->buf[YDELAYB-4]
142     ldmia   r11, { r2 - r5 }        @ r2 := p->buf[YDELAYB-4]
143                                     @ r3 := p->buf[YDELAYB-3]
144                                     @ r4 := p->buf[YDELAYB-2]
145                                     @ r5 := p->buf[YDELAYB-1]
147     subs    r5, r10, r5             @ r5 := r10 - r5
149     add     r1, r12, #YcoeffsB
150     ldmia   r1, {r6,r7,r8,r9,r11}   @ r6 := p->YcoeffsB[0]
151                                     @ r7 := p->YcoeffsB[1]
152                                     @ r8 := p->YcoeffsB[2]
153                                     @ r9 := p->YcoeffsB[3]
154                                     @ r11 := p->YcoeffsB[4]
156     mul     r1, r10, r6             @ r1 := p->buf[YDELAYB] * p->YcoeffsB[0]
157     mla     r1, r5, r7, r1          @ r1 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
158     mla     r1, r4, r8, r1          @ r1 += p->buf[YDELAYB-2] * p->YcoeffsB[2]
159     mla     r1, r3, r9, r1          @ r1 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
160     mla     r1, r2, r11, r1         @ r1 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
162     add     r2, r14, #YDELAYB-4     @ r2 := &p->buf[YDELAYB-1]
163     stmia   r2, { r5, r10 }         @ p->buf[YDELAYB-1] = r5
164                                     @ p->buf[YDELAYB] = r10
166     @ flags were set above, in the subs instruction
167     mvngt   r5, #0
168     movlt   r5, #1                 @ r5 := SIGN(r5) (see .c for SIGN macro)
170     cmp     r10, #0
171     mvngt   r10, #0
172     movlt   r10, #1                @ r10 := SIGN(r10) (see .c for SIGN macro)
174     add     r2, r14, #YADAPTCOEFFSB-4
175     stmia   r2, {r5, r10}           @ p->buf[YADAPTCOEFFSB-1] := r5
176                                     @ p->buf[YADAPTCOEFFSB] := r10
178     @ r0 still contains predictionA
179     @ r1 contains predictionB
181     @ Finish Predictor Y
183     ldr     r2, [sp]                @ r2 := decoded0
184     add     r0, r0, r1, asr #1      @ r0 := r0 + (r1 >> 1)
185     ldr     r3, [r2]                @ r3 := *decoded0
186     add     r1, r3, r0, asr #10     @ r1 := r3 + (r0 >> 10)
187     str     r1, [r12, #YlastA]      @ p->YlastA := r1
189     ldr     r4, [r12, #YfilterA]    @ r4 := p->YfilterA
190     rsb     r4, r4, r4, lsl #5      @ r4 := r4 * 32 - r4 ( == r4*31)
191     add     r1, r1, r4, asr #5      @ r1 := r1 + (r4 >> 5)
192     str     r1, [r12, #YfilterA]    @ p->YfilterA := r1
194     @ r1 contains p->YfilterA
195     @ r2 contains decoded0
196     @ r3 contains *decoded0
198     @ r6, r7, r8, r9, r11 contain p->YcoeffsB[0..4]
199     @ r5, r10 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB]
201     cmp     r3, #0
202     stmia   r2!, {r1}               @ *(decoded0++) := r1  (p->YfilterA)
203     str     r2, [sp]                @ save decoded0
204     beq     2f
206     add     r1, r14, #YADAPTCOEFFSB-16
207     ldmia   r1, { r2, r3, r4 }      @ r2 := p->buf[YADAPTCOEFFSB-4]
208                                     @ r3 := p->buf[YADAPTCOEFFSB-3]
209                                     @ r4 := p->buf[YADAPTCOEFFSB-2]
210     blt     1f
212     @ *decoded0 > 0
214     sub     r6, r6, r10       @ r6 := p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
215     sub     r7, r7, r5        @ r7 := p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
216     sub     r8, r8, r4        @ r8 := p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
217     sub     r9, r9, r3        @ r9 := p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
218     sub     r11, r11, r2      @ r11 := p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
220     add     r0, r12, #YcoeffsB      
221     stmia   r0, {r6,r7,r8,r9,r11}   @ Save p->YcoeffsB[]
223     add     r1, r12, #YcoeffsA
224     ldmia   r1, { r2-r5 }           @ r2 := p->YcoeffsA[0]
225                                     @ r3 := p->YcoeffsA[1]
226                                     @ r4 := p->YcoeffsA[2]
227                                     @ r5 := p->YcoeffsA[3]
229     add     r0, r14, #YADAPTCOEFFSA-12
230     ldmia   r0, { r6-r9}            @ r6 := p->buf[YADAPTCOEFFSA-3]
231                                     @ r7 := p->buf[YADAPTCOEFFSA-2]
232                                     @ r8 := p->buf[YADAPTCOEFFSA-1]
233                                     @ r9 := p->buf[YADAPTCOEFFSA]
235     sub     r2, r2, r9        @ r2 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
236     sub     r3, r3, r8        @ r3 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
237     sub     r4, r4, r7        @ r4 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
238     sub     r5, r5, r6        @ r5 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
239     
240     stmia   r1, {r2-r5}             @ Save p->YcoeffsA
241     b       2f
244 1:  @ *decoded0 < 0
246     add     r6, r6, r10       @ r6 := p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
247     add     r7, r7, r5        @ r7 := p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
248     add     r8, r8, r4        @ r8 := p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
249     add     r9, r9, r3        @ r9 := p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
250     add     r11, r11, r2      @ r11 := p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
252     add     r0, r12, #YcoeffsB      
253     stmia   r0, {r6,r7,r8,r9,r11}   @ Save p->YcoeffsB[]
255     add     r1, r12, #YcoeffsA
256     ldmia   r1, { r2-r5 }           @ r2 := p->YcoeffsA[0]
257                                     @ r3 := p->YcoeffsA[1]
258                                     @ r4 := p->YcoeffsA[2]
259                                     @ r5 := p->YcoeffsA[3]
261     add     r0, r14, #YADAPTCOEFFSA-12
262     ldmia   r0, { r6-r9}            @ r6 := p->buf[YADAPTCOEFFSA-3]
263                                     @ r7 := p->buf[YADAPTCOEFFSA-2]
264                                     @ r8 := p->buf[YADAPTCOEFFSA-1]
265                                     @ r9 := p->buf[YADAPTCOEFFSA]
267     add     r2, r2, r9        @ r2 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
268     add     r3, r3, r8        @ r3 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
269     add     r4, r4, r7        @ r4 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
270     add     r5, r5, r6        @ r5 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
271     
272     stmia   r1, {r2-r5}       @ Save p->YcoeffsA
276 @@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR X
278 @ Predictor X, Filter A
280     ldr     r10, [r12, #XlastA]     @ r10 := p->XlastA
281     add     r11, r14, #XDELAYA-12   @ r11 := &p->buf[XDELAYA-3]
283     ldmia   r11, { r2 - r4 }        @ r2 := p->buf[XDELAYA-3]
284                                     @ r3 := p->buf[XDELAYA-2]
285                                     @ r4 := p->buf[XDELAYA-1]
287     subs    r4, r10, r4             @ r4 := r10 - r4
289     add     r1, r12, #XcoeffsA
290     ldmia   r1, {r6 - r9}           @ r6 := p->XcoeffsA[0]
291                                     @ r7 := p->XcoeffsA[1]
292                                     @ r8 := p->XcoeffsA[2]
293                                     @ r9 := p->XcoeffsA[3]
295     mul     r0, r10, r6             @ r0 := p->buf[XDELAYA] * p->XcoeffsA[0]
296     mla     r0, r4, r7, r0          @ r0 += p->buf[XDELAYA-1] * p->XcoeffsA[1]
297     mla     r0, r3, r8, r0          @ r0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
298     mla     r0, r2, r9, r0          @ r0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
300     add     r11, r14, #XDELAYA-4
301     stmia   r11, { r4, r10 }        @ p->buf[XDELAYA-1] = r4
302                                     @ p->buf[XDELAYA] = r10
304     @ flags were set above, in the subs instruction
305     mvngt   r4, #0
306     movlt   r4, #1                 @ r4 := SIGN(r4) (see .c for SIGN macro)
308     cmp     r10, #0
309     mvngt   r10, #0
310     movlt   r10, #1                @ r10 := SIGN(r10) (see .c for SIGN macro)
312     add     r1, r14, #XADAPTCOEFFSA-4
313     stmia   r1, {r4, r10}           @ p->buf[XADAPTCOEFFSA-1] := r4
314                                     @ p->buf[XADAPTCOEFFSA] := r10
316     @ NOTE: r0 now contains predictionA - don't overwrite.
318 @ Predictor X, Filter B
320     add     r2, r12, #XfilterB
321     ldmia   r2, {r2, r11}           @ r2 := p->XfilterB
322                                     @ r11 := p->YfilterA
324     rsb     r2, r2, r2, lsl #5      @ r2 := r2 * 32 - r2 ( == r2*31)
325     sub     r10, r11, r2, asr #5    @ r10 (p->buf[XDELAYB]) := r11 - (r2 >> 5)
327     str     r11, [r12, #XfilterB]   @ p->XfilterB := r11 (p->YfilterA)
329     add     r11, r14, #XDELAYB-16   @ r11 := &p->buf[XDELAYB-4]
331     ldmia   r11, { r2 - r5 }        @ r2 := p->buf[XDELAYB-4]
332                                     @ r3 := p->buf[XDELAYB-3]
333                                     @ r4 := p->buf[XDELAYB-2]
334                                     @ r5 := p->buf[XDELAYB-1]
336     subs     r5, r10, r5             @ r5 := r10 - r5
338     add     r1, r12, #XcoeffsB
339     ldmia   r1, {r6,r7,r8,r9,r11}   @ r6 := p->XcoeffsB[0]
340                                     @ r7 := p->XcoeffsB[1]
341                                     @ r8 := p->XcoeffsB[2]
342                                     @ r9 := p->XcoeffsB[3]
343                                     @ r11 := p->XcoeffsB[4]
345     mul     r1, r10, r6             @ r1 := p->buf[XDELAYB] * p->XcoeffsB[0]
346     mla     r1, r5, r7, r1          @ r1 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
347     mla     r1, r4, r8, r1          @ r1 += p->buf[XDELAYB-2] * p->XcoeffsB[2]
348     mla     r1, r3, r9, r1          @ r1 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
349     mla     r1, r2, r11, r1         @ r1 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
351     add     r2, r14, #XDELAYB-4     @ r2 := &p->buf[XDELAYB-1]
352     stmia   r2, { r5, r10 }         @ p->buf[XDELAYB-1] = r5
353                                     @ p->buf[XDELAYB] = r10
355     @ flags were set above, in the subs instruction
356     mvngt   r5, #0
357     movlt   r5, #1                 @ r5 := SIGN(r5) (see .c for SIGN macro)
359     cmp     r10, #0
360     mvngt   r10, #0
361     movlt   r10, #1                @ r10 := SIGN(r10) (see .c for SIGN macro)
363     add     r2, r14, #XADAPTCOEFFSB-4
364     stmia   r2, {r5, r10}           @ p->buf[XADAPTCOEFFSB-1] := r5
365                                     @ p->buf[XADAPTCOEFFSB] := r10
367     @ r0 still contains predictionA
368     @ r1 contains predictionB
370     @ Finish Predictor X
372     ldr     r2, [sp, #4]            @ r2 := decoded1
373     add     r0, r0, r1, asr #1      @ r0 := r0 + (r1 >> 1)
374     ldr     r3, [r2]                @ r3 := *decoded1
375     add     r1, r3, r0, asr #10     @ r1 := r3 + (r0 >> 10)
376     str     r1, [r12, #XlastA]      @ p->XlastA := r1
378     ldr     r4, [r12, #XfilterA]    @ r4 := p->XfilterA
379     rsb     r4, r4, r4, lsl #5      @ r4 := r4 * 32 - r4 ( == r4*31)
380     add     r1, r1, r4, asr #5      @ r1 := r1 + (r4 >> 5)
381     str     r1, [r12, #XfilterA]    @ p->XfilterA := r1
383     @ r1 contains p->XfilterA
384     @ r2 contains decoded1
385     @ r3 contains *decoded1
387     @ r6, r7, r8, r9, r11 contain p->XcoeffsB[0..4]
388     @ r5, r10 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB]
390     cmp     r3, #0
391     stmia   r2!, {r1}               @ *(decoded1++) := r1  (p->XfilterA)
392     str     r2, [sp, #4]            @ save decoded1
393     beq     2f
395     add     r1, r14, #XADAPTCOEFFSB-16
396     ldmia   r1, { r2, r3, r4 }      @ r2 := p->buf[XADAPTCOEFFSB-4]
397                                     @ r3 := p->buf[XADAPTCOEFFSB-3]
398                                     @ r4 := p->buf[XADAPTCOEFFSB-2]
399     blt     1f
401     @ *decoded1 > 0
403     sub     r6, r6, r10       @ r6 := p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
404     sub     r7, r7, r5        @ r7 := p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
405     sub     r8, r8, r4        @ r8 := p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
406     sub     r9, r9, r3        @ r9 := p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
407     sub     r11, r11, r2            @ r11 := p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
409     add     r0, r12, #XcoeffsB      
410     stmia   r0, {r6,r7,r8,r9,r11}   @ Save p->XcoeffsB[]
412     add     r1, r12, #XcoeffsA
413     ldmia   r1, { r2-r5 }           @ r2 := p->XcoeffsA[0]
414                                     @ r3 := p->XcoeffsA[1]
415                                     @ r4 := p->XcoeffsA[2]
416                                     @ r5 := p->XcoeffsA[3]
418     add     r0, r14, #XADAPTCOEFFSA-12
419     ldmia   r0, { r6-r9}            @ r6 := p->buf[XADAPTCOEFFSA-3]
420                                     @ r7 := p->buf[XADAPTCOEFFSA-2]
421                                     @ r8 := p->buf[XADAPTCOEFFSA-1]
422                                     @ r9 := p->buf[XADAPTCOEFFSA]
424     sub     r2, r2, r9        @ r2 := p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA]
425     sub     r3, r3, r8        @ r3 := p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1]
426     sub     r4, r4, r7        @ r4 := p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2]
427     sub     r5, r5, r6        @ r5 := p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3]
428     
429     stmia   r1, {r2-r5}             @ Save p->XcoeffsA
430     b       2f
433 1:  @ *decoded1 < 0
435     add     r6, r6, r10       @ r6 := p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
436     add     r7, r7, r5        @ r7 := p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
437     add     r8, r8, r4        @ r8 := p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
438     add     r9, r9, r3        @ r9 := p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
439     add     r11, r11, r2      @ r11 := p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
441     add     r0, r12, #XcoeffsB      
442     stmia   r0, {r6,r7,r8,r9,r11}   @ Save p->XcoeffsB[]
444     add     r1, r12, #XcoeffsA
445     ldmia   r1, { r2-r5 }           @ r2 := p->XcoeffsA[0]
446                                     @ r3 := p->XcoeffsA[1]
447                                     @ r4 := p->XcoeffsA[2]
448                                     @ r5 := p->XcoeffsA[3]
450     add     r0, r14, #XADAPTCOEFFSA-12
451     ldmia   r0, { r6-r9}            @ r6 := p->buf[XADAPTCOEFFSA-3]
452                                     @ r7 := p->buf[XADAPTCOEFFSA-2]
453                                     @ r8 := p->buf[XADAPTCOEFFSA-1]
454                                     @ r9 := p->buf[XADAPTCOEFFSA]
456     add     r2, r2, r9        @ r2 := p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
457     add     r3, r3, r8        @ r3 := p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1]
458     add     r4, r4, r7        @ r4 := p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2]
459     add     r5, r5, r6        @ r5 := p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3]
460     
461     stmia   r1, {r2-r5}             @ Save p->XcoeffsA
464     
465 @@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON
467     add     r14, r14, #4                @ p->buf++
469     add     r11, r12, #historybuffer    @ r11 := &p->historybuffer[0]
471     sub     r10, r14, #PREDICTOR_HISTORY_SIZE*4
472                                        @ r10 := p->buf - PREDICTOR_HISTORY_SIZE
474     cmp     r10, r11
475     beq     move_hist     @ The history buffer is full, we need to do a memmove
477     @ Check loop count
478     ldr     r0, [sp, #8]
479     subs    r0, r0, #1
480     strne   r0, [sp, #8]
481     bne     loop
483 done:
484     str     r14, [r12]              @ Save value of p->buf
485     add     sp, sp, #12             @ Don't bother restoring r1-r3 
486     ldmia   sp!, {r4-r11, pc}
488 move_hist:
489     @ dest = r11 (p->historybuffer)
490     @ src = r14 (p->buf)
491     @ n = 200
493     ldmia   r14!, {r0-r9}    @ 40 bytes
494     stmia   r11!, {r0-r9}
495     ldmia   r14!, {r0-r9}    @ 40 bytes
496     stmia   r11!, {r0-r9}
497     ldmia   r14!, {r0-r9}    @ 40 bytes
498     stmia   r11!, {r0-r9}
499     ldmia   r14!, {r0-r9}    @ 40 bytes
500     stmia   r11!, {r0-r9}
501     ldmia   r14!, {r0-r9}    @ 40 bytes
502     stmia   r11!, {r0-r9}
504     add     r14, r12, #historybuffer    @ p->buf = &p->historybuffer[0]
506     @ Check loop count
507     ldr     r0, [sp, #8]
508     subs    r0, r0, #1
509     strne   r0, [sp, #8]
510     bne     loop
511     
512     b       done