1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2007-2008 by Michael Sevakis
11 * Adapted for the Packard Bell Vibe 500 by Szymon Dziok
13 * Packard Bell Vibe 500 LCD assembly routines
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
21 * KIND, either express or implied.
23 ****************************************************************************/
28 /****************************************************************************
29 * void lcd_write_yuv_420_lines(unsigned char const * const src[3],
33 * |R| |1.000000 -0.000001 1.402000| |Y'|
34 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
35 * |B| |1.000000 1.772000 0.000000| |Pr|
36 * Scaled, normalized, rounded and tweaked to yield RGB 565:
37 * |R| |74 0 101| |Y' - 16| >> 9
38 * |G| = |74 -24 -51| |Cb - 128| >> 8
39 * |B| |74 128 0| |Cr - 128| >> 9
41 * Write four RGB565 pixels in the following order on each loop:
45 .section .icode, "ax", %progbits
47 .global lcd_write_yuv420_lines
48 .type lcd_write_yuv420_lines, %function
49 lcd_write_yuv420_lines:
53 stmfd sp!, { r4-r11, lr } @ save non-scratch
54 ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
55 @ r5 = yuv_src[1] = Cb_p
56 @ r6 = yuv_src[2] = Cr_p
60 sub r2, r2, #1 @ Adjust stride because of increment
62 ldrb r7, [r4], #1 @ r7 = *Y'_p++;
63 ldrb r8, [r5], #1 @ r8 = *Cb_p++;
64 ldrb r9, [r6], #1 @ r9 = *Cr_p++;
66 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
67 add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
68 add r7, r12, r7, asl #5 @ by one less when adding - same for all
70 sub r8, r8, #128 @ Cb -= 128
71 sub r9, r9, #128 @ Cr -= 128
73 add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
74 add r10, r10, r10, asl #4 @
75 add r10, r10, r8, asl #3 @
76 add r10, r10, r8, asl #4 @
78 add r11, r9, r9, asl #2 @ r9 = Cr*101
79 add r11, r11, r9, asl #5 @
80 add r9, r11, r9, asl #6 @
82 add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
84 add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9
86 rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8
87 mov r10, r10, asr #8 @
89 add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
90 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
91 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
93 orr r12, r3, r11 @ check if clamping is needed...
94 orr r12, r12, r7, asr #1 @ ...at all
98 mvnhi r3, r3, asr #31 @
100 cmp r11, #31 @ clamp r
101 mvnhi r11, r11, asr #31 @
102 andhi r11, r11, #31 @
103 cmp r7, #63 @ clamp g
104 mvnhi r7, r7, asr #31 @
108 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
110 orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
111 orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
113 movs r7, r3, lsr #8 @ store pixel
116 tst r11, #LCD1_BUSY_MASK @
118 str r7, [r0, #0x10] @
121 tst r11, #LCD1_BUSY_MASK @
123 str r3, [r0, #0x10] @
125 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
126 add r12, r7, r7, asl #2 @
127 add r7, r12, r7, asl #5 @
128 @ compute R, G, and B
129 add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
130 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
131 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
133 orr r12, r3, r11 @ check if clamping is needed...
134 orr r12, r12, r7, asr #1 @ ...at all
137 cmp r3, #31 @ clamp b
138 mvnhi r3, r3, asr #31 @
140 cmp r11, #31 @ clamp r
141 mvnhi r11, r11, asr #31 @
142 andhi r11, r11, #31 @
143 cmp r7, #63 @ clamp g
144 mvnhi r7, r7, asr #31 @
148 ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
150 orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
151 orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
153 movs r7, r3, lsr #8 @ store pixel
156 tst r11, #LCD1_BUSY_MASK @
158 str r7, [r0, #0x10] @
161 tst r11, #LCD1_BUSY_MASK @
163 str r3, [r0, #0x10] @
165 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
166 add r12, r7, r7, asl #2 @
167 add r7, r12, r7, asl #5 @
168 @ compute R, G, and B
169 add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
170 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
171 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
173 orr r12, r3, r11 @ check if clamping is needed...
174 orr r12, r12, r7, asr #1 @ ...at all
177 cmp r3, #31 @ clamp b
178 mvnhi r3, r3, asr #31 @
180 cmp r11, #31 @ clamp r
181 mvnhi r11, r11, asr #31 @
182 andhi r11, r11, #31 @
183 cmp r7, #63 @ clamp g
184 mvnhi r7, r7, asr #31 @
188 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
190 orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5)
191 orr r3, r3, r11, lsl #11 @ r3 |= (r << 11)
193 movs r7, r3, lsr #8 @ store pixel
196 tst r11, #LCD1_BUSY_MASK @
198 str r7, [r0, #0x10] @
201 tst r11, #LCD1_BUSY_MASK @
203 str r3, [r0, #0x10] @
205 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
206 add r12, r7, r7, asl #2 @
207 add r7, r12, r7, asl #5 @
208 @ compute R, G, and B
209 add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
210 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
211 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
213 orr r12, r3, r11 @ check if clamping is needed...
214 orr r12, r12, r7, asr #1 @ ...at all
217 cmp r3, #31 @ clamp b
218 mvnhi r3, r3, asr #31 @
220 cmp r11, #31 @ clamp r
221 mvnhi r11, r11, asr #31 @
222 andhi r11, r11, #31 @
223 cmp r7, #63 @ clamp g
224 mvnhi r7, r7, asr #31 @
228 orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
229 orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
231 movs r7, r3, lsr #8 @ store pixel
234 tst r11, #LCD1_BUSY_MASK @
236 str r7, [r0, #0x10] @
239 tst r11, #LCD1_BUSY_MASK @
241 str r3, [r0, #0x10] @
243 subs r1, r1, #2 @ subtract block from width
244 bgt 10b @ loop line @
246 ldmfd sp!, { r4-r11, pc } @ restore registers and return
247 .ltorg @ dump constant pool
248 .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
251 /****************************************************************************
252 * void lcd_write_yuv_420_lines_odither(unsigned char const * const src[3],
258 * |R| |1.000000 -0.000001 1.402000| |Y'|
259 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
260 * |B| |1.000000 1.772000 0.000000| |Pr|
261 * Red scaled at twice g & b but at same precision to place it in correct
262 * bit position after multiply and leave instruction count lower.
263 * |R| |258 0 408| |Y' - 16|
264 * |G| = |149 -49 -104| |Cb - 128|
265 * |B| |149 258 0| |Cr - 128|
267 * Write four RGB565 pixels in the following order on each loop:
271 * Kernel pattern (raw|use order):
272 * 5 3 4 2 row0 row2 > down
273 * 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left
274 * 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/
277 .section .icode, "ax", %progbits
279 .global lcd_write_yuv420_lines_odither
280 .type lcd_write_yuv420_lines_odither, %function
281 lcd_write_yuv420_lines_odither:
287 stmfd sp!, { r4-r11, lr } @ save non-scratch
288 ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
289 @ r5 = yuv_src[1] = Cb_p
290 @ r6 = yuv_src[2] = Cr_p
292 ldr r0, [sp, #36] @ Line up pattern and kernel quadrant
295 mov r14, r14, lsl #6 @ 0x00 or 0x80
299 sub r2, r2, #1 @ Adjust stride because of increment
302 ldrb r7, [r4], #1 @ r7 = *Y'_p++;
303 ldrb r8, [r5], #1 @ r8 = *Cb_p++;
304 ldrb r9, [r6], #1 @ r9 = *Cr_p++;
306 eor r14, r14, #0x80 @ flip pattern quadrant
308 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
309 add r12, r7, r7, asl #2 @
310 add r12, r12, r12, asl #4 @
311 add r7, r12, r7, asl #6 @
313 sub r8, r8, #128 @ Cb -= 128
314 sub r9, r9, #128 @ Cr -= 128
316 add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
317 add r10, r10, r8, asl #5 @
318 add r10, r10, r9, asl #3 @
319 add r10, r10, r9, asl #5 @
320 add r10, r10, r9, asl #6 @
322 mov r8, r8, asl #1 @ r8 = bu = Cb*258
323 add r8, r8, r8, asl #7 @
325 add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
326 add r9, r9, r9, asl #4 @
329 @ compute R, G, and B
330 add r3, r8, r7 @ r3 = b' = Y + bu
331 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
332 rsb r7, r10, r7 @ r7 = g' = Y + guv
334 @ r8 = bu, r9 = rv, r10 = guv
336 sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
337 add r3, r12, r3, lsr #8 @
339 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
340 add r11, r12, r11, lsr #8 @
342 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
343 add r7, r12, r7, lsr #8 @
345 add r12, r14, #0x200 @
347 add r3, r3, r12 @ b = r3 + delta
348 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
349 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
351 orr r12, r3, r11, asr #1 @ check if clamping is needed...
352 orr r12, r12, r7 @ ...at all
353 movs r12, r12, asr #15 @
355 movs r12, r3, asr #15 @ clamp b
356 mvnne r3, r12, lsr #15 @
357 andne r3, r3, #0x7c00 @ mask b only if clamped
358 movs r12, r11, asr #16 @ clamp r
359 mvnne r11, r12, lsr #16 @
360 movs r12, r7, asr #15 @ clamp g
361 mvnne r7, r12, lsr #15 @
364 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
366 and r11, r11, #0xf800 @ pack pixel
367 and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
368 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
369 orr r3, r11, r3, lsr #10 @ (b >> 10)
371 movs r7, r3, lsr #8 @ store pixel
374 tst r11, #LCD1_BUSY_MASK @
376 str r7, [r0, #0x10] @
379 tst r11, #LCD1_BUSY_MASK @
381 str r3, [r0, #0x10] @
383 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
384 add r12, r7, r7, asl #2 @
385 add r12, r12, r12, asl #4 @
386 add r7, r12, r7, asl #6 @
387 @ compute R, G, and B
388 add r3, r8, r7 @ r3 = b' = Y + bu
389 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
390 rsb r7, r10, r7 @ r7 = g' = Y + guv
392 sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
393 add r3, r12, r3, lsr #8 @
395 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
396 add r11, r12, r11, lsr #8 @
398 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
399 add r7, r12, r7, lsr #8 @
401 @ This element is zero - use r14 @
403 add r3, r3, r14 @ b = r3 + delta
404 add r11, r11, r14, lsl #1 @ r = r11 + delta*2
405 add r7, r7, r14, lsr #1 @ g = r7 + delta/2
407 orr r12, r3, r11, asr #1 @ check if clamping is needed...
408 orr r12, r12, r7 @ ...at all
409 movs r12, r12, asr #15 @
411 movs r12, r3, asr #15 @ clamp b
412 mvnne r3, r12, lsr #15 @
413 andne r3, r3, #0x7c00 @ mask b only if clamped
414 movs r12, r11, asr #16 @ clamp r
415 mvnne r11, r12, lsr #16 @
416 movs r12, r7, asr #15 @ clamp g
417 mvnne r7, r12, lsr #15 @
420 ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
422 and r11, r11, #0xf800 @ pack pixel
423 and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
424 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
425 orr r3, r11, r3, lsr #10 @ (b >> 10)
427 movs r7, r3, lsr #8 @ store pixel
430 tst r11, #LCD1_BUSY_MASK @
432 str r7, [r0, #0x10] @
435 tst r11, #LCD1_BUSY_MASK @
437 str r3, [r0, #0x10] @
439 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
440 add r12, r7, r7, asl #2 @
441 add r12, r12, r12, asl #4 @
442 add r7, r12, r7, asl #6 @
443 @ compute R, G, and B
444 add r3, r8, r7 @ r3 = b' = Y + bu
445 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
446 rsb r7, r10, r7 @ r7 = g' = Y + guv
448 @ r8 = bu, r9 = rv, r10 = guv
450 sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
451 add r3, r12, r3, lsr #8 @
453 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
454 add r11, r12, r11, lsr #8 @
456 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
457 add r7, r12, r7, lsr #8 @
459 add r12, r14, #0x100 @
461 add r3, r3, r12 @ b = r3 + delta
462 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
463 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
465 orr r12, r3, r11, asr #1 @ check if clamping is needed...
466 orr r12, r12, r7 @ ...at all
467 movs r12, r12, asr #15 @
469 movs r12, r3, asr #15 @ clamp b
470 mvnne r3, r12, lsr #15 @
471 andne r3, r3, #0x7c00 @ mask b only if clamped
472 movs r12, r11, asr #16 @ clamp r
473 mvnne r11, r12, lsr #16 @
474 movs r12, r7, asr #15 @ clamp g
475 mvnne r7, r12, lsr #15 @
478 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
480 and r11, r11, #0xf800 @ pack pixel
481 and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
482 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
483 orr r3, r11, r3, lsr #10 @ (b >> 10)
485 movs r7, r3, lsr #8 @ store pixel
488 tst r11, #LCD1_BUSY_MASK @
490 str r7, [r0, #0x10] @
493 tst r11, #LCD1_BUSY_MASK @
495 str r3, [r0, #0x10] @
497 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
498 add r12, r7, r7, asl #2 @
499 add r12, r12, r12, asl #4 @
500 add r7, r12, r7, asl #6 @
501 @ compute R, G, and B
502 add r3, r8, r7 @ r3 = b' = Y + bu
503 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
504 rsb r7, r10, r7 @ r7 = g' = Y + guv
506 sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
507 add r3, r12, r3, lsr #8 @
509 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
510 add r11, r12, r11, lsr #8 @
512 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
513 add r7, r12, r7, lsr #8 @
515 add r12, r14, #0x300 @
517 add r3, r3, r12 @ b = r3 + delta
518 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
519 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
521 orr r12, r3, r11, asr #1 @ check if clamping is needed...
522 orr r12, r12, r7 @ ...at all
523 movs r12, r12, asr #15 @
525 movs r12, r3, asr #15 @ clamp b
526 mvnne r3, r12, lsr #15 @
527 andne r3, r3, #0x7c00 @ mask b only if clamped
528 movs r12, r11, asr #16 @ clamp r
529 mvnne r11, r12, lsr #16 @
530 movs r12, r7, asr #15 @ clamp g
531 mvnne r7, r12, lsr #15 @
534 and r11, r11, #0xf800 @ pack pixel
535 and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
536 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
537 orr r3, r11, r3, lsr #10 @ (b >> 10)
539 movs r7, r3, lsr #8 @ store pixel
542 tst r11, #LCD1_BUSY_MASK @
544 str r7, [r0, #0x10] @
547 tst r11, #LCD1_BUSY_MASK @
549 str r3, [r0, #0x10] @
551 subs r1, r1, #2 @ subtract block from width
552 bgt 10b @ loop line @
554 ldmfd sp!, { r4-r11, pc } @ restore registers and return
555 .ltorg @ dump constant pool
556 .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither