1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2007-2008 by Michael Sevakis
12 * H10 20GB LCD assembly routines
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
19 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20 * KIND, either express or implied.
22 ****************************************************************************/
27 /****************************************************************************
28 * void lcd_write_yuv_420_lines(unsigned char const * const src[3],
32 * |R| |1.000000 -0.000001 1.402000| |Y'|
33 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
34 * |B| |1.000000 1.772000 0.000000| |Pr|
35 * Scaled, normalized, rounded and tweaked to yield RGB 565:
36 * |R| |74 0 101| |Y' - 16| >> 9
37 * |G| = |74 -24 -51| |Cb - 128| >> 8
38 * |B| |74 128 0| |Cr - 128| >> 9
40 * Write four RGB565 pixels in the following order on each loop:
44 .section .icode, "ax", %progbits
46 .global lcd_write_yuv420_lines
47 .type lcd_write_yuv420_lines, %function
48 lcd_write_yuv420_lines:
52 stmfd sp!, { r4-r11, lr } @ save non-scratch
53 ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
54 @ r5 = yuv_src[1] = Cb_p
55 @ r6 = yuv_src[2] = Cr_p
57 mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c
59 mov r14, #LCD2_DATA_MASK @
61 sub r2, r2, #1 @ Adjust stride because of increment
63 ldrb r7, [r4], #1 @ r7 = *Y'_p++;
64 ldrb r8, [r5], #1 @ r8 = *Cb_p++;
65 ldrb r9, [r6], #1 @ r9 = *Cr_p++;
67 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
68 add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
69 add r7, r12, r7, asl #5 @ by one less when adding - same for all
71 sub r8, r8, #128 @ Cb -= 128
72 sub r9, r9, #128 @ Cr -= 128
74 add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
75 add r10, r10, r10, asl #4 @
76 add r10, r10, r8, asl #3 @
77 add r10, r10, r8, asl #4 @
79 add r11, r9, r9, asl #2 @ r9 = Cr*101
80 add r11, r11, r9, asl #5 @
81 add r9, r11, r9, asl #6 @
83 add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
85 add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9
87 rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8
88 mov r10, r10, asr #8 @
90 add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
91 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
92 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
94 orr r12, r3, r11 @ check if clamping is needed...
95 orr r12, r12, r7, asr #1 @ ...at all
99 mvnhi r3, r3, asr #31 @
101 cmp r11, #31 @ clamp r
102 mvnhi r11, r11, asr #31 @
103 andhi r11, r11, #31 @
104 cmp r7, #63 @ clamp g
105 mvnhi r7, r7, asr #31 @
109 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
111 orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
112 orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
114 orr r7, r14, r3, lsr #8 @ store pixel
118 tst r3, #LCD2_BUSY_MASK @
123 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
124 add r12, r7, r7, asl #2 @
125 add r7, r12, r7, asl #5 @
126 @ compute R, G, and B
127 add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
128 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
129 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
131 orr r12, r3, r11 @ check if clamping is needed...
132 orr r12, r12, r7, asr #1 @ ...at all
135 cmp r3, #31 @ clamp b
136 mvnhi r3, r3, asr #31 @
138 cmp r11, #31 @ clamp r
139 mvnhi r11, r11, asr #31 @
140 andhi r11, r11, #31 @
141 cmp r7, #63 @ clamp g
142 mvnhi r7, r7, asr #31 @
146 ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
148 orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
149 orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
151 orr r7, r14, r3, lsr #8 @ store pixel
155 tst r3, #LCD2_BUSY_MASK @
160 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
161 add r12, r7, r7, asl #2 @
162 add r7, r12, r7, asl #5 @
163 @ compute R, G, and B
164 add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
165 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
166 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
168 orr r12, r3, r11 @ check if clamping is needed...
169 orr r12, r12, r7, asr #1 @ ...at all
172 cmp r3, #31 @ clamp b
173 mvnhi r3, r3, asr #31 @
175 cmp r11, #31 @ clamp r
176 mvnhi r11, r11, asr #31 @
177 andhi r11, r11, #31 @
178 cmp r7, #63 @ clamp g
179 mvnhi r7, r7, asr #31 @
183 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
185 orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5)
186 orr r3, r3, r11, lsl #11 @ r3 |= (r << 11)
188 orr r7, r14, r3, lsr #8 @ store pixel
192 tst r3, #LCD2_BUSY_MASK @
197 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
198 add r12, r7, r7, asl #2 @
199 add r7, r12, r7, asl #5 @
200 @ compute R, G, and B
201 add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
202 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
203 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
205 orr r12, r3, r11 @ check if clamping is needed...
206 orr r12, r12, r7, asr #1 @ ...at all
209 cmp r3, #31 @ clamp b
210 mvnhi r3, r3, asr #31 @
212 cmp r11, #31 @ clamp r
213 mvnhi r11, r11, asr #31 @
214 andhi r11, r11, #31 @
215 cmp r7, #63 @ clamp g
216 mvnhi r7, r7, asr #31 @
220 orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
221 orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
223 orr r7, r14, r3, lsr #8 @ store pixel
227 tst r3, #LCD2_BUSY_MASK @
232 subs r1, r1, #2 @ subtract block from width
233 bgt 10b @ loop line @
235 ldmfd sp!, { r4-r11, pc } @ restore registers and return
236 .ltorg @ dump constant pool
237 .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
240 /****************************************************************************
241 * void lcd_write_yuv_420_lines_odither(unsigned char const * const src[3],
247 * |R| |1.000000 -0.000001 1.402000| |Y'|
248 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
249 * |B| |1.000000 1.772000 0.000000| |Pr|
250 * Red scaled at twice g & b but at same precision to place it in correct
251 * bit position after multiply and leave instruction count lower.
252 * |R| |258 0 408| |Y' - 16|
253 * |G| = |149 -49 -104| |Cb - 128|
254 * |B| |149 258 0| |Cr - 128|
256 * Write four RGB565 pixels in the following order on each loop:
260 * Kernel pattern (raw|use order):
261 * 5 3 4 2 row0 row2 > down
262 * 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left
263 * 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/
266 .section .icode, "ax", %progbits
268 .global lcd_write_yuv420_lines_odither
269 .type lcd_write_yuv420_lines_odither, %function
270 lcd_write_yuv420_lines_odither:
276 stmfd sp!, { r4-r11, lr } @ save non-scratch
277 ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
278 @ r5 = yuv_src[1] = Cb_p
279 @ r6 = yuv_src[2] = Cr_p
281 ldr r0, [sp, #36] @ Line up pattern and kernel quadrant
284 mov r14, r14, lsl #6 @ 0x00 or 0x80
286 mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c
287 add r0, r0, #0x8a00 @
289 sub r2, r2, #1 @ Adjust stride because of increment
292 ldrb r7, [r4], #1 @ r7 = *Y'_p++;
293 ldrb r8, [r5], #1 @ r8 = *Cb_p++;
294 ldrb r9, [r6], #1 @ r9 = *Cr_p++;
296 eor r14, r14, #0x80 @ flip pattern quadrant
298 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
299 add r12, r7, r7, asl #2 @
300 add r12, r12, r12, asl #4 @
301 add r7, r12, r7, asl #6 @
303 sub r8, r8, #128 @ Cb -= 128
304 sub r9, r9, #128 @ Cr -= 128
306 add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
307 add r10, r10, r8, asl #5 @
308 add r10, r10, r9, asl #3 @
309 add r10, r10, r9, asl #5 @
310 add r10, r10, r9, asl #6 @
312 mov r8, r8, asl #1 @ r8 = bu = Cb*258
313 add r8, r8, r8, asl #7 @
315 add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
316 add r9, r9, r9, asl #4 @
319 @ compute R, G, and B
320 add r3, r8, r7 @ r3 = b' = Y + bu
321 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
322 rsb r7, r10, r7 @ r7 = g' = Y + guv
324 @ r8 = bu, r9 = rv, r10 = guv
326 sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
327 add r3, r12, r3, lsr #8 @
329 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
330 add r11, r12, r11, lsr #8 @
332 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
333 add r7, r12, r7, lsr #8 @
335 add r12, r14, #0x200 @
337 add r3, r3, r12 @ b = r3 + delta
338 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
339 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
341 orr r12, r3, r11, asr #1 @ check if clamping is needed...
342 orr r12, r12, r7 @ ...at all
343 movs r12, r12, asr #15 @
345 movs r12, r3, asr #15 @ clamp b
346 mvnne r3, r12, lsr #15 @
347 andne r3, r3, #0x7c00 @ mask b only if clamped
348 movs r12, r11, asr #16 @ clamp r
349 mvnne r11, r12, lsr #16 @
350 movs r12, r7, asr #15 @ clamp g
351 mvnne r7, r12, lsr #15 @
354 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
356 and r11, r11, #0xf800 @ pack pixel
357 and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
358 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
359 orr r3, r11, r3, lsr #10 @ (b >> 10)
361 mov r11, #LCD2_DATA_MASK @ store pixel
362 orr r7, r11, r3, lsr #8 @
366 tst r3, #LCD2_BUSY_MASK @
371 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
372 add r12, r7, r7, asl #2 @
373 add r12, r12, r12, asl #4 @
374 add r7, r12, r7, asl #6 @
375 @ compute R, G, and B
376 add r3, r8, r7 @ r3 = b' = Y + bu
377 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
378 rsb r7, r10, r7 @ r7 = g' = Y + guv
380 sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
381 add r3, r12, r3, lsr #8 @
383 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
384 add r11, r12, r11, lsr #8 @
386 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
387 add r7, r12, r7, lsr #8 @
389 @ This element is zero - use r14 @
391 add r3, r3, r14 @ b = r3 + delta
392 add r11, r11, r14, lsl #1 @ r = r11 + delta*2
393 add r7, r7, r14, lsr #1 @ g = r7 + delta/2
395 orr r12, r3, r11, asr #1 @ check if clamping is needed...
396 orr r12, r12, r7 @ ...at all
397 movs r12, r12, asr #15 @
399 movs r12, r3, asr #15 @ clamp b
400 mvnne r3, r12, lsr #15 @
401 andne r3, r3, #0x7c00 @ mask b only if clamped
402 movs r12, r11, asr #16 @ clamp r
403 mvnne r11, r12, lsr #16 @
404 movs r12, r7, asr #15 @ clamp g
405 mvnne r7, r12, lsr #15 @
408 ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
410 and r11, r11, #0xf800 @ pack pixel
411 and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
412 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
413 orr r3, r11, r3, lsr #10 @ (b >> 10)
415 mov r11, #LCD2_DATA_MASK @ store pixel
416 orr r7, r11, r3, lsr #8 @
420 tst r3, #LCD2_BUSY_MASK @
425 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
426 add r12, r7, r7, asl #2 @
427 add r12, r12, r12, asl #4 @
428 add r7, r12, r7, asl #6 @
429 @ compute R, G, and B
430 add r3, r8, r7 @ r3 = b' = Y + bu
431 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
432 rsb r7, r10, r7 @ r7 = g' = Y + guv
434 @ r8 = bu, r9 = rv, r10 = guv
436 sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
437 add r3, r12, r3, lsr #8 @
439 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
440 add r11, r12, r11, lsr #8 @
442 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
443 add r7, r12, r7, lsr #8 @
445 add r12, r14, #0x100 @
447 add r3, r3, r12 @ b = r3 + delta
448 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
449 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
451 orr r12, r3, r11, asr #1 @ check if clamping is needed...
452 orr r12, r12, r7 @ ...at all
453 movs r12, r12, asr #15 @
455 movs r12, r3, asr #15 @ clamp b
456 mvnne r3, r12, lsr #15 @
457 andne r3, r3, #0x7c00 @ mask b only if clamped
458 movs r12, r11, asr #16 @ clamp r
459 mvnne r11, r12, lsr #16 @
460 movs r12, r7, asr #15 @ clamp g
461 mvnne r7, r12, lsr #15 @
464 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
466 and r11, r11, #0xf800 @ pack pixel
467 and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
468 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
469 orr r3, r11, r3, lsr #10 @ (b >> 10)
471 mov r11, #LCD2_DATA_MASK @ store pixel
472 orr r7, r11, r3, lsr #8 @
476 tst r3, #LCD2_BUSY_MASK @
481 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
482 add r12, r7, r7, asl #2 @
483 add r12, r12, r12, asl #4 @
484 add r7, r12, r7, asl #6 @
485 @ compute R, G, and B
486 add r3, r8, r7 @ r3 = b' = Y + bu
487 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
488 rsb r7, r10, r7 @ r7 = g' = Y + guv
490 sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
491 add r3, r12, r3, lsr #8 @
493 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
494 add r11, r12, r11, lsr #8 @
496 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
497 add r7, r12, r7, lsr #8 @
499 add r12, r14, #0x300 @
501 add r3, r3, r12 @ b = r3 + delta
502 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
503 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
505 orr r12, r3, r11, asr #1 @ check if clamping is needed...
506 orr r12, r12, r7 @ ...at all
507 movs r12, r12, asr #15 @
509 movs r12, r3, asr #15 @ clamp b
510 mvnne r3, r12, lsr #15 @
511 andne r3, r3, #0x7c00 @ mask b only if clamped
512 movs r12, r11, asr #16 @ clamp r
513 mvnne r11, r12, lsr #16 @
514 movs r12, r7, asr #15 @ clamp g
515 mvnne r7, r12, lsr #15 @
518 and r11, r11, #0xf800 @ pack pixel
519 and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
520 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
521 orr r3, r11, r3, lsr #10 @ (b >> 10)
523 mov r11, #LCD2_DATA_MASK @ store pixel
524 orr r7, r11, r3, lsr #8 @
528 tst r3, #LCD2_BUSY_MASK @
533 subs r1, r1, #2 @ subtract block from width
534 bgt 10b @ loop line @
536 ldmfd sp!, { r4-r11, pc } @ restore registers and return
537 .ltorg @ dump constant pool
538 .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither