1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2007 by Jens Arnold
11 * Heavily based on lcd-as-memframe.c by Michael Sevakis
12 * Adapted for Sansa Fuzev2 by Rafaël Carré
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
19 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20 * KIND, either express or implied.
22 ****************************************************************************/
27 #define DBOP_BUSY (1<<10)
29 /****************************************************************************
30 * void lcd_write_yuv_420_lines(unsigned char const * const src[3],
34 * |R| |1.000000 -0.000001 1.402000| |Y'|
35 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
36 * |B| |1.000000 1.772000 0.000000| |Pr|
37 * Scaled, normalized, rounded and tweaked to yield RGB 565:
38 * |R| |74 0 101| |Y' - 16| >> 9
39 * |G| = |74 -24 -51| |Cb - 128| >> 8
40 * |B| |74 128 0| |Cr - 128| >> 9
42 * Write four RGB565 pixels in the following order on each loop:
46 .section .icode, "ax", %progbits
48 .global lcd_write_yuv420_lines
49 .type lcd_write_yuv420_lines, %function
50 lcd_write_yuv420_lines:
54 stmfd sp!, { r4-r11, lr } @ save non-scratch
57 orr r3, r3, #0x120000 @ r3 = DBOP_BASE
59 ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
60 @ r5 = yuv_src[1] = Cb_p
61 @ r6 = yuv_src[2] = Cr_p
64 sub r2, r2, #1 @ stride -= 1
67 ldrb r7, [r4], #1 @ r7 = *Y'_p++;
68 ldrb r8, [r5], #1 @ r8 = *Cb_p++;
69 ldrb r9, [r6], #1 @ r9 = *Cr_p++;
71 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
72 add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
73 add r7, r12, r7, asl #5 @ by one less when adding - same for all
75 sub r8, r8, #128 @ Cb -= 128
76 sub r9, r9, #128 @ Cr -= 128
78 add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
79 add r10, r10, r10, asl #4 @
80 add r10, r10, r8, asl #3 @
81 add r10, r10, r8, asl #4 @
83 add lr, r9, r9, asl #2 @ r9 = Cr*101
84 add lr, lr, r9, asl #5 @
85 add r9, lr, r9, asl #6 @
87 add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
89 add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9
91 rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8
92 mov r10, r10, asr #8 @
94 add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
95 add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
96 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
98 orr r12, r0, lr @ check if clamping is needed...
99 orr r12, r12, r7, asr #1 @ ...at all
102 cmp r0, #31 @ clamp b
103 mvnhi r0, r0, asr #31 @
105 cmp lr, #31 @ clamp r
106 mvnhi lr, lr, asr #31 @
108 cmp r7, #63 @ clamp g
109 mvnhi r7, r7, asr #31 @
113 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
115 orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
116 orr r11, r0, r7, lsl #5 @ r11 = (r << 11) | (g << 5) | b
118 mov r0, r11, lsr #8 @
119 bic r11, r11, #0xff00 @
120 orr r11, r0, r11, lsl #8 @ swap bytes
122 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
123 add r12, r7, r7, asl #2 @
124 add r7, r12, r7, asl #5 @
125 @ compute R, G, and B
126 add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
127 add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
128 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
130 orr r12, r0, lr @ check if clamping is needed...
131 orr r12, r12, r7, asr #1 @ ...at all
134 cmp r0, #31 @ clamp b
135 mvnhi r0, r0, asr #31 @
137 cmp lr, #31 @ clamp r
138 mvnhi lr, lr, asr #31 @
140 cmp r7, #63 @ clamp g
141 mvnhi r7, r7, asr #31 @
145 ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
147 orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
148 orr r0, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
151 bic r7, r7, #0xff00 @
152 orr r0, r7, r0, lsl #8 @ swap bytes
154 orr r0, r11, r0, lsl#16 @ pack with 2nd pixel
155 str r0, [r3, #0x10] @ write pixel
157 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
158 add r12, r7, r7, asl #2 @
159 add r7, r12, r7, asl #5 @
160 @ compute R, G, and B
161 add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
162 add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
163 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
165 orr r12, r0, lr @ check if clamping is needed...
166 orr r12, r12, r7, asr #1 @ ...at all
169 cmp r0, #31 @ clamp b
170 mvnhi r0, r0, asr #31 @
172 cmp lr, #31 @ clamp r
173 mvnhi lr, lr, asr #31 @
175 cmp r7, #63 @ clamp g
176 mvnhi r7, r7, asr #31 @
180 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
183 orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
184 orr r11, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
186 mov r0, r11, lsr #8 @
187 bic r11, r11, #0xff00 @
188 orr r11, r0, r11, lsl #8 @ swap bytes
190 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
191 add r12, r7, r7, asl #2 @
192 add r7, r12, r7, asl #5 @
193 @ compute R, G, and B
194 add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
195 add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
196 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
198 orr r12, r0, lr @ check if clamping is needed...
199 orr r12, r12, r7, asr #1 @ ...at all
202 cmp r0, #31 @ clamp b
203 mvnhi r0, r0, asr #31 @
205 cmp lr, #31 @ clamp r
206 mvnhi lr, lr, asr #31 @
208 cmp r7, #63 @ clamp g
209 mvnhi r7, r7, asr #31 @
213 orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
214 orr r0, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
217 bic r7, r7, #0xff00 @
218 orr r0, r7, r0, lsl #8 @ swap bytes
220 orr r0, r11, r0, lsl#16 @ pack with 2nd pixel
221 str r0, [r3, #0x10] @ write pixel
223 subs r1, r1, #2 @ subtract block from width
224 bgt 10b @ loop line @
227 @ writing at max 110*32 its (LCD_WIDTH/2), the fifo is bigger
228 @ so polling fifo empty after the loops is save
229 ldr r7, [r3,#0xc] @ r7 = DBOP_STATUS
230 tst r7, #DBOP_BUSY @ fifo not empty?
233 ldmfd sp!, { r4-r11, pc } @ restore registers and return
235 .ltorg @ dump constant pool
236 .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
238 /****************************************************************************
239 * void lcd_write_yuv_420_lines_odither(unsigned char const * const src[3],
245 * |R| |1.000000 -0.000001 1.402000| |Y'|
246 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
247 * |B| |1.000000 1.772000 0.000000| |Pr|
248 * Red scaled at twice g & b but at same precision to place it in correct
249 * bit position after multiply and leave instruction count lower.
250 * |R| |258 0 408| |Y' - 16|
251 * |G| = |149 -49 -104| |Cb - 128|
252 * |B| |149 258 0| |Cr - 128|
254 * Write four RGB565 pixels in the following order on each loop:
258 * Kernel pattern (raw|rotated|use order):
259 * 5 3 4 2 2 6 3 7 row0 row2 > down
260 * 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left
261 * 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/
264 .section .icode, "ax", %progbits
266 .global lcd_write_yuv420_lines_odither
267 .type lcd_write_yuv420_lines_odither, %function
268 lcd_write_yuv420_lines_odither:
274 stmfd sp!, { r4-r11, lr } @ save non-scratch
275 ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
276 @ r5 = yuv_src[1] = Cb_p
277 @ r6 = yuv_src[2] = Cr_p
279 ldr r14, [sp, #40] @ Line up pattern and kernel quadrant
280 sub r2, r2, #1 @ stride =- 1
283 mov r14, r14, lsl #6 @ 0x00 or 0x80
285 mov r3, #0xC8000000 @
286 orr r3, r3, #0x120000 @ r3 = DBOP_BASE, need to be redone
287 @ due to lack of registers
290 ldrb r7, [r4], #1 @ r7 = *Y'_p++;
291 ldrb r8, [r5], #1 @ r8 = *Cb_p++;
292 ldrb r9, [r6], #1 @ r9 = *Cr_p++;
294 eor r14, r14, #0x80 @ flip pattern quadrant
296 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
297 add r12, r7, r7, asl #2 @
298 add r12, r12, r12, asl #4 @
299 add r7, r12, r7, asl #6 @
301 sub r8, r8, #128 @ Cb -= 128
302 sub r9, r9, #128 @ Cr -= 128
304 add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
305 add r10, r10, r8, asl #5 @
306 add r10, r10, r9, asl #3 @
307 add r10, r10, r9, asl #5 @
308 add r10, r10, r9, asl #6 @
310 mov r8, r8, asl #1 @ r8 = bu = Cb*258
311 add r8, r8, r8, asl #7 @
313 add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
314 add r9, r9, r9, asl #4 @
317 @ compute R, G, and B
318 add r0, r8, r7 @ r0 = b' = Y + bu
319 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
320 rsb r7, r10, r7 @ r7 = g' = Y + guv
322 @ r8 = bu, r9 = rv, r10 = guv
324 sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
325 add r0, r12, r0, lsr #8 @
327 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
328 add r11, r12, r11, lsr #8 @
330 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
331 add r7, r12, r7, lsr #8 @
333 add r12, r14, #0x100 @
335 add r0, r0, r12 @ b = r0 + delta
336 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
337 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
339 orr r12, r0, r11, asr #1 @ check if clamping is needed...
340 orr r12, r12, r7 @ ...at all
341 movs r12, r12, asr #15 @
343 movs r12, r0, asr #15 @ clamp b
344 mvnne r0, r12, lsr #15 @
345 andne r0, r0, #0x7c00 @ mask b only if clamped
346 movs r12, r11, asr #16 @ clamp r
347 mvnne r11, r12, lsr #16 @
348 movs r12, r7, asr #15 @ clamp g
349 mvnne r7, r12, lsr #15 @
352 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
354 and r11, r11, #0xf800 @ pack pixel
355 and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
356 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
357 orr r3, r11, r0, lsr #10 @ (b >> 10)
360 bic r3, r3, #0xff00 @
361 orr r3, r7, r3, lsl #8 @ swap pixel
363 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
364 add r12, r7, r7, asl #2 @
365 add r12, r12, r12, asl #4 @
366 add r7, r12, r7, asl #6 @
367 @ compute R, G, and B
368 add r0, r8, r7 @ r0 = b' = Y + bu
369 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
370 rsb r7, r10, r7 @ r7 = g' = Y + guv
372 sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
373 add r0, r12, r0, lsr #8 @
375 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
376 add r11, r12, r11, lsr #8 @
378 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
379 add r7, r12, r7, lsr #8 @
381 add r12, r14, #0x200 @
383 add r0, r0, r12 @ b = r0 + delta
384 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
385 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
387 orr r12, r0, r11, asr #1 @ check if clamping is needed...
388 orr r12, r12, r7 @ ...at all
389 movs r12, r12, asr #15 @
391 movs r12, r0, asr #15 @ clamp b
392 mvnne r0, r12, lsr #15 @
393 andne r0, r0, #0x7c00 @ mask b only if clamped
394 movs r12, r11, asr #16 @ clamp r
395 mvnne r11, r12, lsr #16 @
396 movs r12, r7, asr #15 @ clamp g
397 mvnne r7, r12, lsr #15 @
400 ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
402 and r11, r11, #0xf800 @ pack pixel
403 and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
404 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
405 orr r0, r11, r0, lsr #10 @ (b >> 10)
408 bic r0, r0, #0xff00 @
409 orr r0, r7, r0, lsl #8 @ swap pixel
410 orr r3, r3, r0, lsl#16 @ pack with 2nd pixel
411 mov r0, #0xC8000000 @
412 orr r0, r0, #0x120000 @ r3 = DBOP_BASE
414 str r3, [r0, #0x10] @ write pixel
416 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
417 add r12, r7, r7, asl #2 @
418 add r12, r12, r12, asl #4 @
419 add r7, r12, r7, asl #6 @
420 @ compute R, G, and B
421 add r0, r8, r7 @ r0 = b' = Y + bu
422 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
423 rsb r7, r10, r7 @ r7 = g' = Y + guv
425 @ r8 = bu, r9 = rv, r10 = guv
427 sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
428 add r0, r12, r0, lsr #8 @
430 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
431 add r11, r12, r11, lsr #8 @
433 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
434 add r7, r12, r7, lsr #8 @
436 add r12, r14, #0x300 @
438 add r0, r0, r12 @ b = r0 + delta
439 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
440 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
442 orr r12, r0, r11, asr #1 @ check if clamping is needed...
443 orr r12, r12, r7 @ ...at all
444 movs r12, r12, asr #15 @
446 movs r12, r0, asr #15 @ clamp b
447 mvnne r0, r12, lsr #15 @
448 andne r0, r0, #0x7c00 @ mask b only if clamped
449 movs r12, r11, asr #16 @ clamp r
450 mvnne r11, r12, lsr #16 @
451 movs r12, r7, asr #15 @ clamp g
452 mvnne r7, r12, lsr #15 @
455 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
457 and r11, r11, #0xf800 @ pack pixel
458 and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
459 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
460 orr r3, r11, r0, lsr #10 @ (b >> 10)
463 bic r3, r3, #0xff00 @
464 orr r3, r7, r3, lsl #8 @ swap pixel
467 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
468 add r12, r7, r7, asl #2 @
469 add r12, r12, r12, asl #4 @
470 add r7, r12, r7, asl #6 @
471 @ compute R, G, and B
472 add r0, r8, r7 @ r0 = b' = Y + bu
473 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
474 rsb r7, r10, r7 @ r7 = g' = Y + guv
476 sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
477 add r0, r12, r0, lsr #8 @
479 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
480 add r11, r12, r11, lsr #8 @
482 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
483 add r7, r12, r7, lsr #8 @
485 @ This element is zero - use r14 @
487 add r0, r0, r14 @ b = r0 + delta
488 add r11, r11, r14, lsl #1 @ r = r11 + delta*2
489 add r7, r7, r14, lsr #1 @ g = r7 + delta/2
491 orr r12, r0, r11, asr #1 @ check if clamping is needed...
492 orr r12, r12, r7 @ ...at all
493 movs r12, r12, asr #15 @
495 movs r12, r0, asr #15 @ clamp b
496 mvnne r0, r12, lsr #15 @
497 andne r0, r0, #0x7c00 @ mask b only if clamped
498 movs r12, r11, asr #16 @ clamp r
499 mvnne r11, r12, lsr #16 @
500 movs r12, r7, asr #15 @ clamp g
501 mvnne r7, r12, lsr #15 @
504 and r11, r11, #0xf800 @ pack pixel
505 and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
506 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
507 orr r0, r11, r0, lsr #10 @ (b >> 10)
510 bic r0, r0, #0xff00 @
511 orr r0, r7, r0, lsl #8 @ swap pixel
512 orr r3, r3, r0, lsl#16 @ pack with 2nd pixel
513 mov r0, #0xC8000000 @
514 orr r0, r0, #0x120000 @ r3 = DBOP_BASE
516 str r3, [r0, #0x10] @ write pixel
518 subs r1, r1, #2 @ subtract block from width
519 bgt 10b @ loop line @
522 @ writing at max 110*32 its (LCD_WIDTH/2), the fifo is bigger
523 @ so polling fifo empty after the loops is save
524 ldr r7, [r0,#0xc] @ r7 = DBOP_STATUS
525 tst r7, #DBOP_BUSY @ fifo not empty?
528 ldmfd sp!, { r4-r11, pc } @ restore registers and return
529 .ltorg @ dump constant pool
530 .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither