From f9cfce5cd0528abf989c182d294aad0a3c29c922 Mon Sep 17 00:00:00 2001 From: Buschel Date: Wed, 29 Dec 2010 23:17:47 +0000 Subject: [PATCH] Speed up of iPod nano 1G and iPod color LCD. Use HDD6330 asm part for YUV blitting, introduce special handling for full width screen updates. Speed up is about +30% for YUV on both color/nano1G. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28930 a1c6a512-1295-4272-9138-f99709370657 --- firmware/target/arm/ipod/lcd-as-color-nano.S | 152 +++++++++++++++++++++++++++ firmware/target/arm/ipod/lcd-color_nano.c | 132 ++++++----------------- 2 files changed, 182 insertions(+), 102 deletions(-) create mode 100755 firmware/target/arm/ipod/lcd-as-color-nano.S diff --git a/firmware/target/arm/ipod/lcd-as-color-nano.S b/firmware/target/arm/ipod/lcd-as-color-nano.S new file mode 100755 index 0000000000..d4df4d496a --- /dev/null +++ b/firmware/target/arm/ipod/lcd-as-color-nano.S @@ -0,0 +1,152 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id:$ + * + * Copyright (C) 2010 by Andree Buschmann + * + * Generic asm helper function used by YUV blitting. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" +#include "cpu.h" + + .section .icode, "ax", %progbits + +/**************************************************************************** +* void lcd_yuv_write_inner_loop(unsigned char const * const ysrc, +* unsigned char const * const usrc, +* unsigned char const * const vsrc, +* int width); +* +* YUV- > RGB565 conversion +* |R| |1.000000 -0.000001 1.402000| |Y'| +* |G| = |1.000000 -0.334136 -0.714136| |Pb| +* |B| |1.000000 1.772000 0.000000| |Pr| +* Scaled, normalized, rounded and tweaked to yield RGB 565: +* |R| |74 0 101| |Y' - 16| >> 9 +* |G| = |74 -24 -51| |Cb - 128| >> 8 +* |B| |74 128 0| |Cr - 128| >> 9 +* +*/ + .align 2 + .global lcd_yuv_write_inner_loop + .type lcd_yuv_write_inner_loop, %function + +lcd_yuv_write_inner_loop: + @ r0 = ysrc + @ r1 = usrc + @ r2 = vsrc + @ r3 = width + stmfd sp!, { r4-r11, lr } @ save regs + mov r4, #0x70000000 @ r4 = LCD2_BLOCK_CTRL - 0x20 + add r4, r4, #0x8a00 @ + add r5, r4, #0x100 @ r5 = LCD2_BLOCK_DATA +10: @ loop + + ldrb r7, [r1], #1 @ *usrc++ + ldrb r8, [r2], #1 @ *vsrc++ + + sub r7, r7, #128 @ Cb -= 128 + sub r8, r8, #128 @ Cr -= 128 + + add r10, r8, r8, asl #2 @ Cr*101 + add r10, r10, r8, asl #5 + add r10, r10, r8, asl #6 + + add r11, r8, r8, asl #1 @ Cr*51 + Cb*24 + add r11, r11, r11, asl #4 + add r11, r11, r7, asl #3 + add r11, r11, r7, asl #4 + + add r12, r7, #2 @ r12 = bu = (Cb*128 + 256) >> 9 + mov r12, r12, asr #2 + add r10, r10, #256 @ r10 = rv = (Cr*101 + 256) >> 9 + mov r10, r10, asr #9 + rsb r11, r11, #128 @ r11 = guv = (-r11 + 128) >> 8 + mov r11, r11, asr #8 + +@ pixel_1 + ldrb r7, [r0], #1 @ *ysrc++ + sub r7, r7, #16 @ Y = (Y' - 16) * 37 + add r8, r7, r7, asl #2 + add r7, r8, r7, asl #5 + + add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv + add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv + add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu + + cmp r9, #31 @ clamp R + mvnhi r9, r9, asr #31 + andhi r9, r9, #31 + + cmp r8, #63 @ clamp G + mvnhi r8, r8, asr #31 + andhi r8, r8, #63 + + cmp r7, #31 @ clamp B + mvnhi r7, r7, asr #31 + andhi r7, r7, #31 + + orr r6, r7, r8, lsl #5 @ pack pixel + orr r6, r6, r9, lsl #11 + + mov r7, r6, lsl #8 @ swap bytes + and r7, r7, #0xff00 + add r6, r7, r6, lsr #8 + +@ pixel_2 + ldrb r7, [r0], #1 @ *ysrc++ + sub r7, r7, #16 @ Y = (Y' - 16) * 37 + add r8, r7, r7, asl #2 + add r7, r8, r7, asl #5 + + add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv + add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv + add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu + + cmp r9, #31 @ clamp R + mvnhi r9, r9, asr #31 + andhi r9, r9, #31 + + cmp r8, #63 @ clamp G + mvnhi r8, r8, asr #31 + andhi r8, r8, #63 + + cmp r7, #31 @ clamp B + mvnhi r7, r7, asr #31 + andhi r7, r7, #31 + + orr r7, r7, r8, lsl #5 @ pack pixel + orr r7, r7, r9, lsl #11 + + orr r6, r6, r7, lsl #24 @ swap bytes and add pixels simultaneously + mov r7, r7, lsr #8 + orr r6, r6, r7, lsl #16 +#if 1 +11: @ while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK)); + ldr r11, [r4, #0x20] @ + tst r11, #0x1000000 @ + beq 11b @ +#endif + str r6, [r5] @ send two pixels + + subs r3, r3, #2 @ decrease width + bgt 10b @ loop + + ldmpc regs=r4-r11 @ restore regs + .ltorg @ dump constant pool + .size lcd_yuv_write_inner_loop, .-lcd_yuv_write_inner_loop diff --git a/firmware/target/arm/ipod/lcd-color_nano.c b/firmware/target/arm/ipod/lcd-color_nano.c index 7d004cb0f2..e3b9ea8eb6 100644 --- a/firmware/target/arm/ipod/lcd-color_nano.c +++ b/firmware/target/arm/ipod/lcd-color_nano.c @@ -121,38 +121,14 @@ void lcd_init_device(void) } /*** update functions ***/ +extern void lcd_yuv_write_inner_loop(unsigned char const * const ysrc, + unsigned char const * const usrc, + unsigned char const * const vsrc, + int width); #define CSUB_X 2 #define CSUB_Y 2 -/* YUV- > RGB565 conversion - * |R| |1.000000 -0.000001 1.402000| |Y'| - * |G| = |1.000000 -0.334136 -0.714136| |Pb| - * |B| |1.000000 1.772000 0.000000| |Pr| - * Scaled, normalized, rounded and tweaked to yield RGB 565: - * |R| |74 0 101| |Y' - 16| >> 9 - * |G| = |74 -24 -51| |Cb - 128| >> 8 - * |B| |74 128 0| |Cr - 128| >> 9 -*/ - -#define RGBYFAC 74 /* 1.0 */ -#define RVFAC 101 /* 1.402 */ -#define GVFAC (-51) /* -0.714136 */ -#define GUFAC (-24) /* -0.334136 */ -#define BUFAC 128 /* 1.772 */ - -/* ROUNDOFFS contain constant for correct round-offs as well as - constant parts of the conversion matrix (e.g. (Y'-16)*RGBYFAC - -> constant part = -16*RGBYFAC). Through extraction of these - constant parts we save at leat 4 substractions in the conversion - loop */ -#define ROUNDOFFSR (256 - 16*RGBYFAC - 128*RVFAC) -#define ROUNDOFFSG (128 - 16*RGBYFAC - 128*GVFAC - 128*GUFAC) -#define ROUNDOFFSB (256 - 16*RGBYFAC - 128*BUFAC) - -#define MAX_5BIT 0x1f -#define MAX_6BIT 0x3f - /* Performance function to blit a YUV bitmap directly to the LCD */ void lcd_blit_yuv(unsigned char * const src[3], int src_x, int src_y, int stride, @@ -222,7 +198,8 @@ void lcd_blit_yuv(unsigned char * const src[3], const int stride_div_csub_x = stride/CSUB_X; h=0; - while (1) { + while (1) + { /* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */ const unsigned char *ysrc = src[0] + stride * src_y + src_x; @@ -231,17 +208,11 @@ void lcd_blit_yuv(unsigned char * const src[3], const unsigned char *usrc = src[1] + uvoffset; const unsigned char *vsrc = src[2] + uvoffset; - const unsigned char *row_end = ysrc + width; - - int yp, up, vp; - int red1, green1, blue1; - int red2, green2, blue2; - int rc, gc, bc; int pixels_to_write; - fb_data pixel1,pixel2; - if (h==0) { + if (h==0) + { while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY)); LCD2_BLOCK_CONFIG = 0; @@ -251,7 +222,8 @@ void lcd_blit_yuv(unsigned char * const src[3], h = height; /* calculate how much we can do in one go */ - if (pixels_to_write > 0x10000) { + if (pixels_to_write > 0x10000) + { h = (0x10000/2) / width; pixels_to_write = (width * h) * 2; } @@ -262,61 +234,7 @@ void lcd_blit_yuv(unsigned char * const src[3], LCD2_BLOCK_CTRL = 0x34000000; } - do - { - up = *usrc++; - vp = *vsrc++; - rc = RVFAC * vp + ROUNDOFFSR; - gc = GVFAC * vp + GUFAC * up + ROUNDOFFSG; - bc = BUFAC * up + ROUNDOFFSB; - - /* Pixel 1 -> RGB565 */ - yp = *ysrc++ * RGBYFAC; - red1 = (yp + rc) >> 9; - green1 = (yp + gc) >> 8; - blue1 = (yp + bc) >> 9; - - /* Pixel 2 -> RGB565 */ - yp = *ysrc++ * RGBYFAC; - red2 = (yp + rc) >> 9; - green2 = (yp + gc) >> 8; - blue2 = (yp + bc) >> 9; - - /* Since out of bounds errors are relatively rare, we check two - pixels at once to see if any components are out of bounds, and - then fix whichever is broken. This works due to high values and - negative values both being !=0 when bitmasking them. - We first check for red and blue components (5bit range). */ - if ((red1 | blue1 | red2 | blue2) & ~MAX_5BIT) - { - if (red1 & ~MAX_5BIT) - red1 = (red1 >> 31) ? 0 : MAX_5BIT; - if (blue1 & ~MAX_5BIT) - blue1 = (blue1 >> 31) ? 0 : MAX_5BIT; - if (red2 & ~MAX_5BIT) - red2 = (red2 >> 31) ? 0 : MAX_5BIT; - if (blue2 & ~MAX_5BIT) - blue2 = (blue2 >> 31) ? 0 : MAX_5BIT; - } - /* We second check for green component (6bit range) */ - if ((green1 | green2) & ~MAX_6BIT) - { - if (green1 & ~MAX_6BIT) - green1 = (green1 >> 31) ? 0 : MAX_6BIT; - if (green2 & ~MAX_6BIT) - green2 = (green2 >> 31) ? 0 : MAX_6BIT; - } - - pixel1 = swap16((red1 << 11) | (green1 << 5) | blue1); - - pixel2 = swap16((red2 << 11) | (green2 << 5) | blue2); - - while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK)); - - /* output 2 pixels */ - LCD2_BLOCK_DATA = (pixel2 << 16) | pixel1; - } - while (ysrc < row_end); + lcd_yuv_write_inner_loop(ysrc,usrc,vsrc,width); src_y++; h--; @@ -415,16 +333,26 @@ void lcd_update_rect(int x, int y, int width, int height) LCD2_BLOCK_CONFIG = 0xc0010000 | (pixels_to_write - 1); LCD2_BLOCK_CTRL = 0x34000000; - /* for each row */ - for (r = 0; r < h; r++) { - /* for each column */ - for (c = 0; c < width; c += 2) { - while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK)); - - /* output 2 pixels */ - LCD2_BLOCK_DATA = *addr++; + if (LCD_WIDTH == width) { + /* for each row and column in a single loop */ + for (r = 0; r < h*width; r += 2) { + while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK)); + + /* output 2 pixels */ + LCD2_BLOCK_DATA = *addr++; + } + } else { + /* for each row */ + for (r = 0; r < h; r++) { + /* for each column */ + for (c = 0; c < width; c += 2) { + while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK)); + + /* output 2 pixels */ + LCD2_BLOCK_DATA = *addr++; + } + addr += (LCD_WIDTH - width)/2; } - addr += (LCD_WIDTH - width)/2; } while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY)); -- 2.11.4.GIT