1 /*****************************************************************************
2 * Copyright (C) 2000-2001 Andre McCurdy <armccurdy@yahoo.co.uk>
4 * This program is free software. you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation@ either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY, without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program@ if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 *****************************************************************************
23 *****************************************************************************
27 * 2001/03/24: Andre McCurdy <armccurdy@yahoo.co.uk>
28 * - Corrected PIC unsafe loading of address of 'imdct36_long_karray'
30 * 2000/09/20: Robert Leslie <rob@mars.org>
31 * - Added a global symbol with leading underscore per suggestion of
32 * Simon Burge to support linking with the a.out format.
34 * 2000/09/15: Robert Leslie <rob@mars.org>
35 * - Fixed a small bug where flags were changed before a conditional branch.
37 * 2000/09/15: Andre McCurdy <armccurdy@yahoo.co.uk>
38 * - Applied Nicolas Pitre's rounding optimisation in all remaining places.
40 * 2000/09/09: Nicolas Pitre <nico@cam.org>
41 * - Optimized rounding + scaling operations.
43 * 2000/08/09: Andre McCurdy <armccurdy@yahoo.co.uk>
46 ****************************************************************************/
52 r0 = pointer to 18 element input array
53 r1 = pointer to 36 element output array
54 r2 = windowing block type
57 Stack frame created during execution of the function:
75 40 r2 : windowing block type
99 #define BLOCK_MODE_NORMAL 0
100 #define BLOCK_MODE_START 1
101 #define BLOCK_MODE_STOP 3
160 #define K00 0x0ffc19fd
161 #define K01 0x00b2aa3e
162 #define K02 0x0fdcf549
163 #define K03 0x0216a2a2
164 #define K04 0x0f9ee890
165 #define K05 0x03768962
166 #define K06 0x0f426cb5
167 #define K07 0x04cfb0e2
168 #define K08 0x0ec835e8
169 #define K09 0x061f78aa
170 #define K10 0x0e313245
171 #define K11 0x07635284
172 #define K12 0x0d7e8807
173 #define K13 0x0898c779
174 #define K14 0x0cb19346
175 #define K15 0x09bd7ca0
176 #define K16 0x0bcbe352
177 #define K17 0x0acf37ad
179 #define minus_K02 0xf0230ab7
181 #define WL0 0x00b2aa3e
182 #define WL1 0x0216a2a2
183 #define WL2 0x03768962
184 #define WL3 0x04cfb0e2
185 #define WL4 0x061f78aa
186 #define WL5 0x07635284
187 #define WL6 0x0898c779
188 #define WL7 0x09bd7ca0
189 #define WL8 0x0acf37ad
190 #define WL9 0x0bcbe352
191 #define WL10 0x0cb19346
192 #define WL11 0x0d7e8807
193 #define WL12 0x0e313245
194 #define WL13 0x0ec835e8
195 #define WL14 0x0f426cb5
196 #define WL15 0x0f9ee890
197 #define WL16 0x0fdcf549
198 #define WL17 0x0ffc19fd
201 @*****************************************************************************
213 stmdb sp!, { r2, r4 - r11, lr } @ all callee saved regs, plus arg3
215 ldr r4, =K08 @ r4 = K08
216 ldr r5, =K09 @ r5 = K09
217 ldr r8, [r0, #X4] @ r8 = X4
218 ldr r9, [r0, #X13] @ r9 = X13
219 rsb r6, r4, #0 @ r6 = -K08
220 rsb r7, r5, #0 @ r7 = -K09
222 smull r2, r3, r4, r8 @ r2..r3 = (X4 * K08)
223 smlal r2, r3, r5, r9 @ r2..r3 = (X4 * K08) + (X13 * K09) = ct01
225 smull r10, lr, r8, r5 @ r10..lr = (X4 * K09)
226 smlal r10, lr, r9, r6 @ r10..lr = (X4 * K09) + (X13 * -K08) = ct00
228 ldr r8, [r0, #X7] @ r8 = X7
229 ldr r9, [r0, #X16] @ r9 = X16
231 stmdb sp!, { r2, r3, r10, lr } @ stack ct00_h, ct00_l, ct01_h, ct01_l
233 add r8, r8, r9 @ r8 = (X7 + X16)
234 ldr r9, [r0, #X1] @ r9 = X1
236 smlal r2, r3, r6, r8 @ r2..r3 = ct01 + ((X7 + X16) * -K08)
237 smlal r2, r3, r7, r9 @ r2..r3 += (X1 * -K09)
239 ldr r7, [r0, #X10] @ r7 = X10
242 rsc lr, lr, #0 @ r10..lr = -ct00
244 smlal r2, r3, r5, r7 @ r2..r3 += (X10 * K09) = ct06
246 smlal r10, lr, r9, r6 @ r10..lr = -ct00 + ( X1 * -K08)
247 smlal r10, lr, r8, r5 @ r10..lr += ((X7 + X16) * K09)
248 smlal r10, lr, r7, r4 @ r10..lr += ( X10 * K08) = ct04
250 stmdb sp!, { r2, r3, r10, lr } @ stack ct04_h, ct04_l, ct06_h, ct06_l
258 sub r7, r7, r9 @ r7 = (X0 - X11 -X12) = ct14
264 add r8, r8, r11 @ r8 = (X8 - X3 + X15) = ct16
266 add r11, r7, r8 @ r11 = ct14 + ct16 = ct18
268 smlal r2, r3, r6, r11 @ r2..r3 = ct06 + ((X0 - X11 - X3 + X15 + X8 - X12) * -K08)
274 sub r6, r6, r12 @ r6 = (X2 - X9 - X14) = ct15
280 sub r9, r9, r12 @ r9 = (X5 - X6 - X17) = ct17
282 add r12, r9, r6 @ r12 = ct15 + ct17 = ct19
284 smlal r2, r3, r5, r12 @ r2..r3 += ((X2 - X9 + X5 - X6 - X17 - X14) * K09)
286 smlal r10, lr, r11, r5 @ r10..lr = ct04 + (ct18 * K09)
287 smlal r10, lr, r12, r4 @ r10..lr = ct04 + (ct18 * K09) + (ct19 * K08)
290 adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3
291 str r2, [r1, #x22] @ store result x22
293 movs r10, r10, lsr #28
294 adc r10, r10, lr, lsl #4 @ r10 = bits[59..28] of r10..lr
295 str r10, [r1, #x4] @ store result x4
299 ldmia sp, { r2, r3, r4, r5 } @ r2..r3 = ct06, r4..r5 = ct04 (dont update sp)
312 ldr r10, =K03 @ r10 = K03
313 ldr lr, =K15 @ lr = K15
315 smlal r2, r3, r10, r7 @ r2..r3 = ct06 + (ct14 * K03)
316 smlal r4, r5, lr, r7 @ r4..r5 = ct04 + (ct14 * K15)
318 ldr r12, =K14 @ r12 = K14
319 rsb r10, r10, #0 @ r10 = -K03
321 smlal r2, r3, lr, r6 @ r2..r3 += (ct15 * K15)
322 smlal r4, r5, r10, r6 @ r4..r5 += (ct15 * -K03)
323 smlal r2, r3, r12, r8 @ r2..r3 += (ct16 * K14)
325 ldr r11, =minus_K02 @ r11 = -K02
326 rsb r12, r12, #0 @ r12 = -K14
328 smlal r4, r5, r12, r9 @ r4..r5 += (ct17 * -K14)
329 smlal r2, r3, r11, r9 @ r2..r3 += (ct17 * -K02)
330 smlal r4, r5, r11, r8 @ r4..r5 += (ct16 * -K02)
333 adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3
334 str r2, [r1, #x7] @ store result x7
337 adc r4, r4, r5, lsl #4 @ r4 = bits[59..28] of r4..r5
338 str r4, [r1, #x1] @ store result x1
342 ldmia sp, { r2, r3, r4, r5 } @ r2..r3 = ct06, r4..r5 = ct04 (dont update sp)
356 rsc r3, r3, #0 @ r2..r3 = -ct06
358 smlal r2, r3, r12, r7 @ r2..r3 = -ct06 + (ct14 * -K14)
359 smlal r2, r3, r10, r8 @ r2..r3 += (ct16 * -K03)
361 smlal r4, r5, r12, r6 @ r4..r5 = ct04 + (ct15 * -K14)
362 smlal r4, r5, r10, r9 @ r4..r5 += (ct17 * -K03)
363 smlal r4, r5, lr, r8 @ r4..r5 += (ct16 * K15)
364 smlal r4, r5, r11, r7 @ r4..r5 += (ct14 * -K02)
366 rsb lr, lr, #0 @ lr = -K15
367 rsb r11, r11, #0 @ r11 = K02
369 smlal r2, r3, lr, r9 @ r2..r3 += (ct17 * -K15)
370 smlal r2, r3, r11, r6 @ r2..r3 += (ct15 * K02)
373 adc r4, r4, r5, lsl #4 @ r4 = bits[59..28] of r4..r5
374 str r4, [r1, #x25] @ store result x25
377 adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3
378 str r2, [r1, #x19] @ store result x19
382 ldr r2, [sp, #16] @ r2 = ct01_l
383 ldr r3, [sp, #20] @ r3 = ct01_h
391 rsc r3, r3, #0 @ r2..r3 = -ct01
394 mov r5, r3 @ r4..r5 = -ct01
407 smlal r4, r5, r12, r7 @ r4..r5 = -ct01 + (X16 * -K14)
408 smlal r2, r3, lr, r9 @ r2..r3 = -ct01 + (X10 * -K15)
410 smlal r4, r5, r10, r8 @ r4..r5 += (X7 * -K03)
411 smlal r2, r3, r10, r7 @ r2..r3 += (X16 * -K03)
413 smlal r4, r5, r11, r9 @ r4..r5 += (X10 * K02)
414 smlal r2, r3, r12, r8 @ r2..r3 += (X7 * -K14)
416 rsb lr, lr, #0 @ lr = K15
417 rsb r11, r11, #0 @ r11 = -K02
419 smlal r4, r5, lr, r6 @ r4..r5 += (X1 * K15) = ct05
420 smlal r2, r3, r11, r6 @ r2..r3 += (X1 * -K02) = ct03
422 stmdb sp!, { r2, r3, r4, r5 } @ stack ct05_h, ct05_l, ct03_h, ct03_l
425 rsc r5, r5, #0 @ r4..r5 = -ct05
427 stmdb sp!, { r4, r5 } @ stack -ct05_h, -ct05_l
429 ldr r2, [sp, #48] @ r2 = ct00_l
430 ldr r3, [sp, #52] @ r3 = ct00_h
432 rsb r10, r10, #0 @ r10 = K03
435 rsc r5, r3, #0 @ r4..r5 = -ct00
448 smlal r4, r5, r10, r6 @ r4..r5 = -ct00 + (X1 * K03)
449 smlal r2, r3, r10, r9 @ r2..r3 = ct00 + (X10 * K03)
451 smlal r4, r5, r12, r9 @ r4..r5 += (X10 * -K14)
452 smlal r2, r3, r12, r6 @ r2..r3 += (X1 * -K14)
454 smlal r4, r5, r11, r7 @ r4..r5 += (X16 * -K02)
455 smlal r4, r5, lr, r8 @ r4..r5 += (X7 * K15) = ct07
457 rsb lr, lr, #0 @ lr = -K15
458 rsb r11, r11, #0 @ r11 = K02
460 smlal r2, r3, r11, r8 @ r2..r3 += (X7 * K02)
461 smlal r2, r3, lr, r7 @ r2..r3 += (X16 * -K15) = ct02
464 rsc r7, r5, #0 @ r6..r7 = -ct07
466 stmdb sp!, { r2 - r7 } @ stack -ct07_h, -ct07_l, ct07_h, ct07_l, ct02_h, ct02_l
471 add r2, pc, #(imdct36_long_karray-.-8) @ r2 = base address of Knn array (PIC safe ?)
477 ldmia r2!, { r5 - r11 } @ first 7 words from Karray element
479 smull r3, r4, r5, r12 @ sum = (Kxx * X0)
482 smlal r3, r4, r6, r12 @ sum += (Kxx * X2)
485 smlal r3, r4, r7, r5 @ sum += (Kxx * X3)
486 smlal r3, r4, r8, r12 @ sum += (Kxx * X5)
489 smlal r3, r4, r9, r6 @ sum += (Kxx * X6)
490 smlal r3, r4, r10, r12 @ sum += (Kxx * X8)
491 smlal r3, r4, r11, r5 @ sum += (Kxx * X9)
493 ldmia r2!, { r5 - r10 } @ final 6 words from Karray element
497 smlal r3, r4, r5, r11 @ sum += (Kxx * X11)
500 smlal r3, r4, r6, r12 @ sum += (Kxx * X12)
501 smlal r3, r4, r7, r11 @ sum += (Kxx * X14)
503 smlal r3, r4, r8, r5 @ sum += (Kxx * X15)
504 smlal r3, r4, r9, r11 @ sum += (Kxx * X17)
506 add r5, sp, r10, lsr #16 @ create index back into stack for required ctxx
508 ldmia r5, { r6, r7 } @ r6..r7 = ctxx
510 mov r8, r10, lsl #16 @ push ctxx index off the top end
512 adds r3, r3, r6 @ add low words
513 adc r4, r4, r7 @ add high words, with carry
515 adc r3, r3, r4, lsl #4 @ r3 = bits[59..28] of r3..r4
517 str r3, [r1, r8, lsr #24] @ push completion flag off the bottom end
519 movs r8, r8, lsl #8 @ push result location index off the top end
520 beq loop @ loop back if completion flag not set
521 b imdct_l_windowing @ branch to windowing stage if looping finished
525 .word K17, -K13, K10, -K06, -K05, K01, -K00, K04, -K07, K11, K12, -K16, 0x00000000
526 .word K13, K07, K16, K01, K10, -K05, K04, -K11, K00, -K17, K06, -K12, 0x00200800
527 .word K11, K17, K05, K12, -K01, K06, -K07, K00, -K13, K04, -K16, K10, 0x00200c00
528 .word K07, K00, -K12, K05, -K16, -K10, K11, -K17, K04, K13, K01, K06, 0x00001400
529 .word K05, K10, -K00, -K17, K07, -K13, K12, K06, -K16, K01, -K11, -K04, 0x00181800
530 .word K01, K05, -K07, -K11, K13, K17, -K16, -K12, K10, K06, -K04, -K00, 0x00102000
531 .word -K16, K12, -K11, K07, K04, -K00, -K01, K05, -K06, K10, K13, -K17, 0x00284800
532 .word -K12, K06, K17, -K00, -K11, K04, K05, -K10, K01, K16, -K07, -K13, 0x00085000
533 .word -K10, K16, K04, -K13, -K00, K07, K06, -K01, -K12, -K05, K17, K11, 0x00105400
534 .word -K06, -K01, K13, K04, K17, -K11, -K10, -K16, -K05, K12, K00, K07, 0x00185c00
535 .word -K04, -K11, -K01, K16, K06, K12, K13, -K07, -K17, -K00, -K10, -K05, 0x00006000
536 .word -K00, -K04, -K06, -K10, -K12, -K16, -K17, -K13, -K11, -K07, -K05, -K01, 0x00206801
540 @-------------------------------------------------------------------------
545 ldr r11, [sp, #80] @ fetch function parameter 3 from out of the stack
546 ldmia r1!, { r0, r2 - r9 } @ load 9 words from x0, update pointer
559 @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block)
563 cmp r11, #BLOCK_MODE_STOP @ setup flags
564 rsb r10, r0, #0 @ r10 = -x0 (DONT change flags !!)
565 beq stop_block_x0_to_x17
568 @ start and normal blocks are treated the same for x[0]..x[17]
570 normal_block_x0_to_x17:
572 ldr r12, =WL9 @ r12 = window_l[9]
574 rsb r0, r9, #0 @ r0 = -x8
575 rsb r9, r2, #0 @ r9 = -x1
576 rsb r2, r8, #0 @ r2 = -x7
577 rsb r8, r3, #0 @ r8 = -x2
578 rsb r3, r7, #0 @ r3 = -x6
579 rsb r7, r4, #0 @ r7 = -x3
580 rsb r4, r6, #0 @ r4 = -x5
581 rsb r6, r5, #0 @ r6 = -x4
594 @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block)
598 smull r5, lr, r12, r0 @ r5..lr = (window_l[9] * (x[9] == -x[8]))
599 ldr r12, =WL10 @ r12 = window_l[10]
601 adc r0, r5, lr, lsl #4 @ r0 = bits[59..28] of windowed x9
603 smull r5, lr, r12, r2 @ r5..lr = (window_l[10] * (x[10] == -x[7]))
604 ldr r12, =WL11 @ r12 = window_l[11]
606 adc r2, r5, lr, lsl #4 @ r2 = bits[59..28] of windowed x10
608 smull r5, lr, r12, r3 @ r5..lr = (window_l[11] * (x[11] == -x[6]))
609 ldr r12, =WL12 @ r12 = window_l[12]
611 adc r3, r5, lr, lsl #4 @ r3 = bits[59..28] of windowed x11
613 smull r5, lr, r12, r4 @ r5..lr = (window_l[12] * (x[12] == -x[5]))
614 ldr r12, =WL13 @ r12 = window_l[13]
616 adc r4, r5, lr, lsl #4 @ r4 = bits[59..28] of windowed x12
618 smull r5, lr, r12, r6 @ r5..lr = (window_l[13] * (x[13] == -x[4]))
619 ldr r12, =WL14 @ r12 = window_l[14]
621 adc r6, r5, lr, lsl #4 @ r6 = bits[59..28] of windowed x13
623 smull r5, lr, r12, r7 @ r5..lr = (window_l[14] * (x[14] == -x[3]))
624 ldr r12, =WL15 @ r12 = window_l[15]
626 adc r7, r5, lr, lsl #4 @ r7 = bits[59..28] of windowed x14
628 smull r5, lr, r12, r8 @ r5..lr = (window_l[15] * (x[15] == -x[2]))
629 ldr r12, =WL16 @ r12 = window_l[16]
631 adc r8, r5, lr, lsl #4 @ r8 = bits[59..28] of windowed x15
633 smull r5, lr, r12, r9 @ r5..lr = (window_l[16] * (x[16] == -x[1]))
634 ldr r12, =WL17 @ r12 = window_l[17]
636 adc r9, r5, lr, lsl #4 @ r9 = bits[59..28] of windowed x16
638 smull r5, lr, r12, r10 @ r5..lr = (window_l[17] * (x[17] == -x[0]))
639 ldr r12, =WL0 @ r12 = window_l[0]
641 adc r10, r5, lr, lsl #4 @ r10 = bits[59..28] of windowed x17
644 stmia r1, { r0, r2 - r4, r6 - r10 } @ store windowed x[9] .. x[17]
645 ldmdb r1!, { r0, r2 - r9 } @ load 9 words downto (and including) x0
648 smull r10, lr, r12, r0 @ r10..lr = (window_l[0] * x[0])
649 ldr r12, =WL1 @ r12 = window_l[1]
650 movs r10, r10, lsr #28
651 adc r0, r10, lr, lsl #4 @ r0 = bits[59..28] of windowed x0
653 smull r10, lr, r12, r2 @ r10..lr = (window_l[1] * x[1])
654 ldr r12, =WL2 @ r12 = window_l[2]
655 movs r10, r10, lsr #28
656 adc r2, r10, lr, lsl #4 @ r2 = bits[59..28] of windowed x1
658 smull r10, lr, r12, r3 @ r10..lr = (window_l[2] * x[2])
659 ldr r12, =WL3 @ r12 = window_l[3]
660 movs r10, r10, lsr #28
661 adc r3, r10, lr, lsl #4 @ r3 = bits[59..28] of windowed x2
663 smull r10, lr, r12, r4 @ r10..lr = (window_l[3] * x[3])
664 ldr r12, =WL4 @ r12 = window_l[4]
665 movs r10, r10, lsr #28
666 adc r4, r10, lr, lsl #4 @ r4 = bits[59..28] of windowed x3
668 smull r10, lr, r12, r5 @ r10..lr = (window_l[4] * x[4])
669 ldr r12, =WL5 @ r12 = window_l[5]
670 movs r10, r10, lsr #28
671 adc r5, r10, lr, lsl #4 @ r5 = bits[59..28] of windowed x4
673 smull r10, lr, r12, r6 @ r10..lr = (window_l[5] * x[5])
674 ldr r12, =WL6 @ r12 = window_l[6]
675 movs r10, r10, lsr #28
676 adc r6, r10, lr, lsl #4 @ r6 = bits[59..28] of windowed x5
678 smull r10, lr, r12, r7 @ r10..lr = (window_l[6] * x[6])
679 ldr r12, =WL7 @ r12 = window_l[7]
680 movs r10, r10, lsr #28
681 adc r7, r10, lr, lsl #4 @ r7 = bits[59..28] of windowed x6
683 smull r10, lr, r12, r8 @ r10..lr = (window_l[7] * x[7])
684 ldr r12, =WL8 @ r12 = window_l[8]
685 movs r10, r10, lsr #28
686 adc r8, r10, lr, lsl #4 @ r8 = bits[59..28] of windowed x7
688 smull r10, lr, r12, r9 @ r10..lr = (window_l[8] * x[8])
689 movs r10, r10, lsr #28
690 adc r9, r10, lr, lsl #4 @ r9 = bits[59..28] of windowed x8
692 stmia r1, { r0, r2 - r9 } @ store windowed x[0] .. x[8]
694 cmp r11, #BLOCK_MODE_START
695 beq start_block_x18_to_x35
701 normal_block_x18_to_x35:
703 ldr r11, =WL3 @ r11 = window_l[3]
704 ldr r12, =WL4 @ r12 = window_l[4]
706 add r1, r1, #(18*4) @ r1 = &x[18]
708 ldmia r1!, { r0, r2 - r4, r6 - r10 } @ load 9 words from x18, update pointer
725 smull r5, lr, r12, r6 @ r5..lr = (window_l[4] * (x[22] == x[31]))
727 adc r5, r5, lr, lsl #4 @ r5 = bits[59..28] of windowed x31
729 smull r6, lr, r11, r4 @ r5..lr = (window_l[3] * (x[21] == x[32]))
730 ldr r12, =WL5 @ r12 = window_l[5]
732 adc r6, r6, lr, lsl #4 @ r6 = bits[59..28] of windowed x32
734 smull r4, lr, r12, r7 @ r4..lr = (window_l[5] * (x[23] == x[30]))
735 ldr r11, =WL1 @ r11 = window_l[1]
736 ldr r12, =WL2 @ r12 = window_l[2]
738 adc r4, r4, lr, lsl #4 @ r4 = bits[59..28] of windowed x30
740 smull r7, lr, r12, r3 @ r7..lr = (window_l[2] * (x[20] == x[33]))
741 ldr r12, =WL6 @ r12 = window_l[6]
743 adc r7, r7, lr, lsl #4 @ r7 = bits[59..28] of windowed x33
745 smull r3, lr, r12, r8 @ r3..lr = (window_l[6] * (x[24] == x[29]))
747 adc r3, r3, lr, lsl #4 @ r3 = bits[59..28] of windowed x29
749 smull r8, lr, r11, r2 @ r7..lr = (window_l[1] * (x[19] == x[34]))
750 ldr r12, =WL7 @ r12 = window_l[7]
751 ldr r11, =WL8 @ r11 = window_l[8]
753 adc r8, r8, lr, lsl #4 @ r8 = bits[59..28] of windowed x34
755 smull r2, lr, r12, r9 @ r7..lr = (window_l[7] * (x[25] == x[28]))
756 ldr r12, =WL0 @ r12 = window_l[0]
758 adc r2, r2, lr, lsl #4 @ r2 = bits[59..28] of windowed x28
760 smull r9, lr, r12, r0 @ r3..lr = (window_l[0] * (x[18] == x[35]))
762 adc r9, r9, lr, lsl #4 @ r9 = bits[59..28] of windowed x35
764 smull r0, lr, r11, r10 @ r7..lr = (window_l[8] * (x[26] == x[27]))
765 ldr r11, =WL16 @ r11 = window_l[16]
766 ldr r12, =WL17 @ r12 = window_l[17]
768 adc r0, r0, lr, lsl #4 @ r0 = bits[59..28] of windowed x27
771 stmia r1, { r0, r2 - r9 } @ store windowed x[27] .. x[35]
772 ldmdb r1!, { r0, r2 - r9 } @ load 9 words downto (and including) x18
775 smull r10, lr, r12, r0 @ r10..lr = (window_l[17] * x[18])
776 movs r10, r10, lsr #28
777 adc r0, r10, lr, lsl #4 @ r0 = bits[59..28] of windowed x0
779 smull r10, lr, r11, r2 @ r10..lr = (window_l[16] * x[19])
780 ldr r11, =WL14 @ r11 = window_l[14]
781 ldr r12, =WL15 @ r12 = window_l[15]
782 movs r10, r10, lsr #28
783 adc r2, r10, lr, lsl #4 @ r2 = bits[59..28] of windowed x1
785 smull r10, lr, r12, r3 @ r10..lr = (window_l[15] * x[20])
786 movs r10, r10, lsr #28
787 adc r3, r10, lr, lsl #4 @ r3 = bits[59..28] of windowed x2
789 smull r10, lr, r11, r4 @ r10..lr = (window_l[14] * x[21])
790 ldr r11, =WL12 @ r11 = window_l[12]
791 ldr r12, =WL13 @ r12 = window_l[13]
792 movs r10, r10, lsr #28
793 adc r4, r10, lr, lsl #4 @ r4 = bits[59..28] of windowed x3
795 smull r10, lr, r12, r5 @ r10..lr = (window_l[13] * x[22])
796 movs r10, r10, lsr #28
797 adc r5, r10, lr, lsl #4 @ r5 = bits[59..28] of windowed x4
799 smull r10, lr, r11, r6 @ r10..lr = (window_l[12] * x[23])
800 ldr r11, =WL10 @ r12 = window_l[10]
801 ldr r12, =WL11 @ r12 = window_l[11]
802 movs r10, r10, lsr #28
803 adc r6, r10, lr, lsl #4 @ r6 = bits[59..28] of windowed x5
805 smull r10, lr, r12, r7 @ r10..lr = (window_l[11] * x[24])
806 movs r10, r10, lsr #28
807 adc r7, r10, lr, lsl #4 @ r7 = bits[59..28] of windowed x6
809 smull r10, lr, r11, r8 @ r10..lr = (window_l[10] * x[25])
810 ldr r12, =WL9 @ r12 = window_l[9]
811 movs r10, r10, lsr #28
812 adc r8, r10, lr, lsl #4 @ r8 = bits[59..28] of windowed x7
814 smull r10, lr, r12, r9 @ r10..lr = (window_l[9] * x[26])
816 movs r10, r10, lsr #28
817 adc r9, r10, lr, lsl #4 @ r9 = bits[59..28] of windowed x8
819 stmia r1, { r0, r2 - r9 } @ store windowed x[18] .. x[26]
822 @ NB there are 2 possible exits from this function - this is only one of them
825 add sp, sp, #(21*4) @ return stack frame
826 ldmia sp!, { r4 - r11, pc } @ restore callee saved regs, and return
831 stop_block_x0_to_x17:
844 @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block)
848 rsb r0, r6, #0 @ r0 = -x5
849 rsb r6, r2, #0 @ r6 = -x1
850 rsb r2, r5, #0 @ r2 = -x4
851 rsb r5, r3, #0 @ r5 = -x2
852 rsb r3, r4, #0 @ r3 = -x3
854 add r1, r1, #(3*4) @ r1 = &x[12]
855 stmia r1, { r0, r2, r3, r5, r6, r10 } @ store unchanged x[12] .. x[17]
857 ldr r0, =WL1 @ r0 = window_l[1] == window_s[0]
859 rsb r10, r9, #0 @ r10 = -x8
860 rsb r12, r8, #0 @ r12 = -x7
861 rsb lr, r7, #0 @ lr = -x6
874 @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block)
878 smull r5, r6, r0, r7 @ r5..r6 = (window_l[1] * x[6])
879 ldr r2, =WL4 @ r2 = window_l[4] == window_s[1]
881 adc r7, r5, r6, lsl #4 @ r7 = bits[59..28] of windowed x6
883 smull r5, r6, r2, r8 @ r5..r6 = (window_l[4] * x[7])
884 ldr r3, =WL7 @ r3 = window_l[7] == window_s[2]
886 adc r8, r5, r6, lsl #4 @ r8 = bits[59..28] of windowed x7
888 smull r5, r6, r3, r9 @ r5..r6 = (window_l[7] * x[8])
889 ldr r4, =WL10 @ r4 = window_l[10] == window_s[3]
891 adc r9, r5, r6, lsl #4 @ r9 = bits[59..28] of windowed x8
893 smull r5, r6, r4, r10 @ r5..r6 = (window_l[10] * (x[9] == -x[8]))
894 ldr r0, =WL13 @ r0 = window_l[13] == window_s[4]
896 adc r10, r5, r6, lsl #4 @ r10 = bits[59..28] of windowed x9
898 smull r5, r6, r0, r12 @ r5..r6 = (window_l[13] * (x[10] == -x[7]))
899 ldr r2, =WL16 @ r2 = window_l[16] == window_s[5]
901 adc r12, r5, r6, lsl #4 @ r10 = bits[59..28] of windowed x9
903 smull r5, r6, r2, lr @ r5..r6 = (window_l[16] * (x[11] == -x[6]))
908 adc lr, r5, r6, lsl #4 @ r10 = bits[59..28] of windowed x9
910 stmdb r1!, { r7 - r10, r12, lr } @ store windowed x[6] .. x[11]
918 stmdb r1!, { r0, r2 - r6 } @ store windowed x[0] .. x[5]
920 b normal_block_x18_to_x35
926 start_block_x18_to_x35:
928 ldr r4, =WL1 @ r0 = window_l[1] == window_s[0]
930 add r1, r1, #(24*4) @ r1 = &x[24]
932 ldmia r1, { r0, r2, r3 } @ load 3 words from x24, dont update pointer
949 ldr r5, =WL4 @ r5 = window_l[4] == window_s[1]
951 smull r10, r11, r4, r0 @ r10..r11 = (window_l[1] * (x[24] == x[29]))
952 ldr r6, =WL7 @ r6 = window_l[7] == window_s[2]
953 movs r10, r10, lsr #28
954 adc lr, r10, r11, lsl #4 @ lr = bits[59..28] of windowed x29
956 smull r10, r11, r5, r2 @ r10..r11 = (window_l[4] * (x[25] == x[28]))
957 ldr r7, =WL10 @ r7 = window_l[10] == window_s[3]
958 movs r10, r10, lsr #28
959 adc r12, r10, r11, lsl #4 @ r12 = bits[59..28] of windowed x28
961 smull r10, r11, r6, r3 @ r10..r11 = (window_l[7] * (x[26] == x[27]))
962 ldr r8, =WL13 @ r8 = window_l[13] == window_s[4]
963 movs r10, r10, lsr #28
964 adc r4, r10, r11, lsl #4 @ r4 = bits[59..28] of windowed x27
966 smull r10, r11, r7, r3 @ r10..r11 = (window_l[10] * x[26])
967 ldr r9, =WL16 @ r9 = window_l[16] == window_s[5]
968 movs r10, r10, lsr #28
969 adc r3, r10, r11, lsl #4 @ r3 = bits[59..28] of windowed x26
971 smull r10, r11, r8, r2 @ r10..r11 = (window_l[13] * x[25])
973 movs r10, r10, lsr #28
974 adc r2, r10, r11, lsl #4 @ r2 = bits[59..28] of windowed x25
976 smull r10, r11, r9, r0 @ r10..r11 = (window_l[16] * x[24])
978 movs r10, r10, lsr #28
979 adc r0, r10, r11, lsl #4 @ r0 = bits[59..28] of windowed x24
981 stmia r1!, { r0, r2, r3, r4, r12, lr } @ store windowed x[24] .. x[29]
988 stmia r1!, { r5 - r10 } @ store windowed x[30] .. x[35]
991 @ NB there are 2 possible exits from this function - this is only one of them
994 add sp, sp, #(21*4) @ return stack frame
995 ldmia sp!, { r4 - r11, pc } @ restore callee saved regs, and return