From c1869b591f363b3e2cbb7e7f6b51a7fbe3890771 Mon Sep 17 00:00:00 2001 From: Buschel Date: Mon, 13 Dec 2010 20:56:53 +0000 Subject: [PATCH] Speedup of iPod nano 2G YUV blitting by 3%. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28825 a1c6a512-1295-4272-9138-f99709370657 --- .../target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S | 72 +++++++++++----------- 1 file changed, 35 insertions(+), 37 deletions(-) diff --git a/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S b/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S index 3902d3413..7fd703972 100755 --- a/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S +++ b/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S @@ -100,7 +100,7 @@ lcd_write_yuv420_lines: mov r7, r2 /* r7 = loop count */ add r8, sp, #16 /* chroma buffer */ - mov lr, r1 /* LCD data port = LCD_BASE */ + add lr, r1, #0x40 /* LCD data port = LCD_BASE + 0x40 */ /* 1st loop start */ 10: /* loop start */ @@ -153,21 +153,21 @@ lcd_write_yuv420_lines: andhi r4, r4, #31 15: /* no clamp */ - /* calculate pixel_1 and save to r5 for later pixel packing */ + /* calculate pixel_1 and save to r4 for later pixel packing */ orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */ - orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */ + orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */ /* 1st loop, second pixel */ - ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */ - sub r4, r4, #16 /* r4 = (Y'-16) * 74 */ - add r3, r4, r4, asl #2 - add r4, r3, r4, asl #5 + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 - add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */ - add r3, r2, r4, asr #7 /* r3 = g = (Y >> 8) + guv */ - add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */ + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */ - orr r0, r6, r4 /* check if clamping is needed... */ + orr r0, r6, r5 /* check if clamping is needed... */ orr r0, r0, r3, asr #1 /* ...at all */ cmp r0, #31 bls 15f /* -> no clamp */ @@ -177,23 +177,22 @@ lcd_write_yuv420_lines: cmp r3, #63 /* clamp g */ mvnhi r3, r3, asr #31 andhi r3, r3, #63 - cmp r4, #31 /* clamp b */ - mvnhi r4, r4, asr #31 - andhi r4, r4, #31 + cmp r5, #31 /* clamp b */ + mvnhi r5, r5, asr #31 + andhi r5, r5, #31 15: /* no clamp */ /* calculate pixel_2 and pack with pixel_1 before writing */ - orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ - orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */ + orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ + orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */ /* wait for FIFO half full */ .fifo_wait1: - ldr r3, [lr, #0x1C] /* while (LCD_STATUS & 0x08); */ + ldr r3, [lr, #-0x24] /* while (LCD_STATUS & 0x08); */ tst r3, #0x8 bgt .fifo_wait1 - str r5, [lr, #0x40] /* write pixel_1 */ - str r4, [lr, #0x40] /* write pixel_2 */ + stmia lr, {r4,r5} /* write pixel_1 and pixel_2 */ subs r7, r7, #2 /* check for loop end */ bgt 10b /* back to beginning */ @@ -234,21 +233,21 @@ lcd_write_yuv420_lines: mvnhi r4, r4, asr #31 andhi r4, r4, #31 15: /* no clamp */ - /* calculate pixel_1 and save to r5 for later pixel packing */ + /* calculate pixel_1 and save to r4 for later pixel packing */ orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */ - orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */ + orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */ /* 2nd loop, second pixel */ - ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */ - sub r4, r4, #16 /* r4 = (Y'-16) * 74 */ - add r3, r4, r4, asl #2 - add r4, r3, r4, asl #5 + ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */ + sub r5, r5, #16 /* r5 = (Y'-16) * 74 */ + add r3, r5, r5, asl #2 + add r5, r3, r5, asl #5 - add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */ - add r3, r2, r4, asr #7 /* r3 = g = (Y >> 8) + guv */ - add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */ + add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */ + add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */ + add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */ - orr r0, r6, r4 /* check if clamping is needed... */ + orr r0, r6, r5 /* check if clamping is needed... */ orr r0, r0, r3, asr #1 /* ...at all */ cmp r0, #31 bls 15f /* -> no clamp */ @@ -258,23 +257,22 @@ lcd_write_yuv420_lines: cmp r3, #63 /* clamp g */ mvnhi r3, r3, asr #31 andhi r3, r3, #63 - cmp r4, #31 /* clamp b */ - mvnhi r4, r4, asr #31 - andhi r4, r4, #31 + cmp r5, #31 /* clamp b */ + mvnhi r5, r5, asr #31 + andhi r5, r5, #31 15: /* no clamp */ /* calculate pixel_2 and pack with pixel_1 before writing */ - orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ - orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */ + orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */ + orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */ /* wait for FIFO half full */ .fifo_wait2: - ldr r3, [lr, #0x1C] /* while (LCD_STATUS & 0x08); */ + ldr r3, [lr, #-0x24] /* while (LCD_STATUS & 0x08); */ tst r3, #0x8 bgt .fifo_wait2 - str r5, [lr, #0x40] /* write pixel_1 */ - str r4, [lr, #0x40] /* write pixel_2 */ + stmia lr, {r4,r5} /* write pixel_1 and pixel_2 */ subs r7, r7, #2 /* check for loop end */ bgt 20b /* back to beginning */ -- 2.11.4.GIT