1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2007 by Jens Arnold
11 * Heavily based on lcd-as-memframe.c by Michael Sevakis
12 * Adapted for Sansa Fuze/e200v2 by Rafaël Carré
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
19 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20 * KIND, either express or implied.
22 ****************************************************************************/
27 #define DBOP_BUSY (1<<10)
29 /****************************************************************************
30 * void lcd_write_yuv_420_lines(unsigned char const * const src[3],
34 * |R| |1.000000 -0.000001 1.402000| |Y'|
35 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
36 * |B| |1.000000 1.772000 0.000000| |Pr|
37 * Scaled, normalized, rounded and tweaked to yield RGB 565:
38 * |R| |74 0 101| |Y' - 16| >> 9
39 * |G| = |74 -24 -51| |Cb - 128| >> 8
40 * |B| |74 128 0| |Cr - 128| >> 9
42 * Write four RGB565 pixels in the following order on each loop:
46 .section .icode, "ax", %progbits
48 .global lcd_write_yuv420_lines
49 .type lcd_write_yuv420_lines, %function
50 lcd_write_yuv420_lines:
54 stmfd sp!, { r4-r11, lr } @ save non-scratch
57 orr r3, r3, #0x120000 @ r3 = DBOP_BASE
59 ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
60 @ r5 = yuv_src[1] = Cb_p
61 @ r6 = yuv_src[2] = Cr_p
64 sub r2, r2, #1 @ stride -= 1
65 orr r12, r12, #3<<13 @
66 str r12, [r3, #8] @ DBOP_CTRL |= (1<<13|1<<14) (32bit mode)
68 ldrb r7, [r4], #1 @ r7 = *Y'_p++;
69 ldrb r8, [r5], #1 @ r8 = *Cb_p++;
70 ldrb r9, [r6], #1 @ r9 = *Cr_p++;
72 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
73 add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
74 add r7, r12, r7, asl #5 @ by one less when adding - same for all
76 sub r8, r8, #128 @ Cb -= 128
77 sub r9, r9, #128 @ Cr -= 128
79 add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
80 add r10, r10, r10, asl #4 @
81 add r10, r10, r8, asl #3 @
82 add r10, r10, r8, asl #4 @
84 add lr, r9, r9, asl #2 @ r9 = Cr*101
85 add lr, lr, r9, asl #5 @
86 add r9, lr, r9, asl #6 @
88 add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
90 add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9
92 rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8
93 mov r10, r10, asr #8 @
95 add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
96 add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
97 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
99 orr r12, r0, lr @ check if clamping is needed...
100 orr r12, r12, r7, asr #1 @ ...at all
103 cmp r0, #31 @ clamp b
104 mvnhi r0, r0, asr #31 @
106 cmp lr, #31 @ clamp r
107 mvnhi lr, lr, asr #31 @
109 cmp r7, #63 @ clamp g
110 mvnhi r7, r7, asr #31 @
114 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
116 orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
117 orr r11, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
119 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
120 add r12, r7, r7, asl #2 @
121 add r7, r12, r7, asl #5 @
122 @ compute R, G, and B
123 add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
124 add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
125 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
127 orr r12, r0, lr @ check if clamping is needed...
128 orr r12, r12, r7, asr #1 @ ...at all
131 cmp r0, #31 @ clamp b
132 mvnhi r0, r0, asr #31 @
134 cmp lr, #31 @ clamp r
135 mvnhi lr, lr, asr #31 @
137 cmp r7, #63 @ clamp g
138 mvnhi r7, r7, asr #31 @
142 ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
144 orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
145 orr r0, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
146 orr r0, r11, r0, lsl#16 @ pack with 2nd pixel
147 str r0, [r3, #0x10] @ write pixel
149 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
150 add r12, r7, r7, asl #2 @
151 add r7, r12, r7, asl #5 @
152 @ compute R, G, and B
153 add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
154 add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
155 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
157 orr r12, r0, lr @ check if clamping is needed...
158 orr r12, r12, r7, asr #1 @ ...at all
161 cmp r0, #31 @ clamp b
162 mvnhi r0, r0, asr #31 @
164 cmp lr, #31 @ clamp r
165 mvnhi lr, lr, asr #31 @
167 cmp r7, #63 @ clamp g
168 mvnhi r7, r7, asr #31 @
172 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
175 orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
176 orr r11, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
178 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
179 add r12, r7, r7, asl #2 @
180 add r7, r12, r7, asl #5 @
181 @ compute R, G, and B
182 add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
183 add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
184 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
186 orr r12, r0, lr @ check if clamping is needed...
187 orr r12, r12, r7, asr #1 @ ...at all
190 cmp r0, #31 @ clamp b
191 mvnhi r0, r0, asr #31 @
193 cmp lr, #31 @ clamp r
194 mvnhi lr, lr, asr #31 @
196 cmp r7, #63 @ clamp g
197 mvnhi r7, r7, asr #31 @
201 orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
202 orr r0, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
203 orr r0, r11, r0, lsl#16 @ pack with 2nd pixel
204 str r0, [r3, #0x10] @ write pixel
206 subs r1, r1, #2 @ subtract block from width
207 bgt 10b @ loop line @
210 @ writing at max 110*32 its (LCD_WIDTH/2), the fifo is bigger
211 @ so polling fifo empty after the loops is save
212 ldr r7, [r3,#0xc] @ r7 = DBOP_STATUS
213 tst r7, #DBOP_BUSY @ fifo not empty?
216 ldmfd sp!, { r4-r11, pc } @ restore registers and return
218 .ltorg @ dump constant pool
219 .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
221 /****************************************************************************
222 * void lcd_write_yuv_420_lines_odither(unsigned char const * const src[3],
228 * |R| |1.000000 -0.000001 1.402000| |Y'|
229 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
230 * |B| |1.000000 1.772000 0.000000| |Pr|
231 * Red scaled at twice g & b but at same precision to place it in correct
232 * bit position after multiply and leave instruction count lower.
233 * |R| |258 0 408| |Y' - 16|
234 * |G| = |149 -49 -104| |Cb - 128|
235 * |B| |149 258 0| |Cr - 128|
237 * Write four RGB565 pixels in the following order on each loop:
241 * Kernel pattern (raw|rotated|use order):
242 * 5 3 4 2 2 6 3 7 row0 row2 > down
243 * 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left
244 * 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/
247 .section .icode, "ax", %progbits
249 .global lcd_write_yuv420_lines_odither
250 .type lcd_write_yuv420_lines_odither, %function
251 lcd_write_yuv420_lines_odither:
257 stmfd sp!, { r4-r11, lr } @ save non-scratch
258 ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
259 @ r5 = yuv_src[1] = Cb_p
260 @ r6 = yuv_src[2] = Cr_p
262 ldr r14, [sp, #40] @ Line up pattern and kernel quadrant
263 sub r2, r2, #1 @ stride =- 1
266 mov r14, r14, lsl #6 @ 0x00 or 0x80
268 mov r3, #0xC8000000 @
269 orr r3, r3, #0x120000 @ r3 = DBOP_BASE, need to be redone
270 @ due to lack of registers
272 orr r12, r12, #3<<13 @ DBOP_CTRL |= (1<<13|1<<14)
273 str r12, [r3, #8] @ (32bit mode)
276 ldrb r7, [r4], #1 @ r7 = *Y'_p++;
277 ldrb r8, [r5], #1 @ r8 = *Cb_p++;
278 ldrb r9, [r6], #1 @ r9 = *Cr_p++;
280 eor r14, r14, #0x80 @ flip pattern quadrant
282 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
283 add r12, r7, r7, asl #2 @
284 add r12, r12, r12, asl #4 @
285 add r7, r12, r7, asl #6 @
287 sub r8, r8, #128 @ Cb -= 128
288 sub r9, r9, #128 @ Cr -= 128
290 add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
291 add r10, r10, r8, asl #5 @
292 add r10, r10, r9, asl #3 @
293 add r10, r10, r9, asl #5 @
294 add r10, r10, r9, asl #6 @
296 mov r8, r8, asl #1 @ r8 = bu = Cb*258
297 add r8, r8, r8, asl #7 @
299 add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
300 add r9, r9, r9, asl #4 @
303 @ compute R, G, and B
304 add r0, r8, r7 @ r0 = b' = Y + bu
305 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
306 rsb r7, r10, r7 @ r7 = g' = Y + guv
308 @ r8 = bu, r9 = rv, r10 = guv
310 sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
311 add r0, r12, r0, lsr #8 @
313 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
314 add r11, r12, r11, lsr #8 @
316 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
317 add r7, r12, r7, lsr #8 @
319 add r12, r14, #0x100 @
321 add r0, r0, r12 @ b = r0 + delta
322 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
323 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
325 orr r12, r0, r11, asr #1 @ check if clamping is needed...
326 orr r12, r12, r7 @ ...at all
327 movs r12, r12, asr #15 @
329 movs r12, r0, asr #15 @ clamp b
330 mvnne r0, r12, lsr #15 @
331 andne r0, r0, #0x7c00 @ mask b only if clamped
332 movs r12, r11, asr #16 @ clamp r
333 mvnne r11, r12, lsr #16 @
334 movs r12, r7, asr #15 @ clamp g
335 mvnne r7, r12, lsr #15 @
338 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
340 and r11, r11, #0xf800 @ pack pixel
341 and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
342 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
343 orr r3, r11, r0, lsr #10 @ (b >> 10)
345 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
346 add r12, r7, r7, asl #2 @
347 add r12, r12, r12, asl #4 @
348 add r7, r12, r7, asl #6 @
349 @ compute R, G, and B
350 add r0, r8, r7 @ r0 = b' = Y + bu
351 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
352 rsb r7, r10, r7 @ r7 = g' = Y + guv
354 sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
355 add r0, r12, r0, lsr #8 @
357 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
358 add r11, r12, r11, lsr #8 @
360 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
361 add r7, r12, r7, lsr #8 @
363 add r12, r14, #0x200 @
365 add r0, r0, r12 @ b = r0 + delta
366 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
367 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
369 orr r12, r0, r11, asr #1 @ check if clamping is needed...
370 orr r12, r12, r7 @ ...at all
371 movs r12, r12, asr #15 @
373 movs r12, r0, asr #15 @ clamp b
374 mvnne r0, r12, lsr #15 @
375 andne r0, r0, #0x7c00 @ mask b only if clamped
376 movs r12, r11, asr #16 @ clamp r
377 mvnne r11, r12, lsr #16 @
378 movs r12, r7, asr #15 @ clamp g
379 mvnne r7, r12, lsr #15 @
382 ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
384 and r11, r11, #0xf800 @ pack pixel
385 and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
386 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
387 orr r0, r11, r0, lsr #10 @ (b >> 10)
388 orr r3, r3, r0, lsl#16 @ pack with 2nd pixel
389 mov r0, #0xC8000000 @
390 orr r0, r0, #0x120000 @ r3 = DBOP_BASE
392 str r3, [r0, #0x10] @ write pixel
394 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
395 add r12, r7, r7, asl #2 @
396 add r12, r12, r12, asl #4 @
397 add r7, r12, r7, asl #6 @
398 @ compute R, G, and B
399 add r0, r8, r7 @ r0 = b' = Y + bu
400 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
401 rsb r7, r10, r7 @ r7 = g' = Y + guv
403 @ r8 = bu, r9 = rv, r10 = guv
405 sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
406 add r0, r12, r0, lsr #8 @
408 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
409 add r11, r12, r11, lsr #8 @
411 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
412 add r7, r12, r7, lsr #8 @
414 add r12, r14, #0x300 @
416 add r0, r0, r12 @ b = r0 + delta
417 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
418 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
420 orr r12, r0, r11, asr #1 @ check if clamping is needed...
421 orr r12, r12, r7 @ ...at all
422 movs r12, r12, asr #15 @
424 movs r12, r0, asr #15 @ clamp b
425 mvnne r0, r12, lsr #15 @
426 andne r0, r0, #0x7c00 @ mask b only if clamped
427 movs r12, r11, asr #16 @ clamp r
428 mvnne r11, r12, lsr #16 @
429 movs r12, r7, asr #15 @ clamp g
430 mvnne r7, r12, lsr #15 @
433 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
435 and r11, r11, #0xf800 @ pack pixel
436 and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
437 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
438 orr r3, r11, r0, lsr #10 @ (b >> 10)
441 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
442 add r12, r7, r7, asl #2 @
443 add r12, r12, r12, asl #4 @
444 add r7, r12, r7, asl #6 @
445 @ compute R, G, and B
446 add r0, r8, r7 @ r0 = b' = Y + bu
447 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
448 rsb r7, r10, r7 @ r7 = g' = Y + guv
450 sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
451 add r0, r12, r0, lsr #8 @
453 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
454 add r11, r12, r11, lsr #8 @
456 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
457 add r7, r12, r7, lsr #8 @
459 @ This element is zero - use r14 @
461 add r0, r0, r14 @ b = r0 + delta
462 add r11, r11, r14, lsl #1 @ r = r11 + delta*2
463 add r7, r7, r14, lsr #1 @ g = r7 + delta/2
465 orr r12, r0, r11, asr #1 @ check if clamping is needed...
466 orr r12, r12, r7 @ ...at all
467 movs r12, r12, asr #15 @
469 movs r12, r0, asr #15 @ clamp b
470 mvnne r0, r12, lsr #15 @
471 andne r0, r0, #0x7c00 @ mask b only if clamped
472 movs r12, r11, asr #16 @ clamp r
473 mvnne r11, r12, lsr #16 @
474 movs r12, r7, asr #15 @ clamp g
475 mvnne r7, r12, lsr #15 @
478 and r11, r11, #0xf800 @ pack pixel
479 and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
480 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
481 orr r0, r11, r0, lsr #10 @ (b >> 10)
482 orr r3, r3, r0, lsl#16 @ pack with 2nd pixel
483 mov r0, #0xC8000000 @
484 orr r0, r0, #0x120000 @ r3 = DBOP_BASE
486 str r3, [r0, #0x10] @ write pixel
488 subs r1, r1, #2 @ subtract block from width
489 bgt 10b @ loop line @
492 @ writing at max 110*32 its (LCD_WIDTH/2), the fifo is bigger
493 @ so polling fifo empty after the loops is save
494 ldr r7, [r0,#0xc] @ r7 = DBOP_STATUS
495 tst r7, #DBOP_BUSY @ fifo not empty?
498 ldmfd sp!, { r4-r11, pc } @ restore registers and return
499 .ltorg @ dump constant pool
500 .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither