1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2007 by Jens Arnold
11 * Heavily based on lcd-as-memframe.c by Michael Sevakis
12 * Adapted for Sansa Fuze/e200v2 by Rafaël Carré
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
19 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20 * KIND, either express or implied.
22 ****************************************************************************/
27 #define DBOP_BUSY (1<<10)
29 /****************************************************************************
30 * void lcd_write_yuv_420_lines(unsigned char const * const src[3],
34 * |R| |1.000000 -0.000001 1.402000| |Y'|
35 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
36 * |B| |1.000000 1.772000 0.000000| |Pr|
37 * Scaled, normalized, rounded and tweaked to yield RGB 565:
38 * |R| |74 0 101| |Y' - 16| >> 9
39 * |G| = |74 -24 -51| |Cb - 128| >> 8
40 * |B| |74 128 0| |Cr - 128| >> 9
42 * Write four RGB565 pixels in the following order on each loop:
46 .section .icode, "ax", %progbits
48 .global lcd_write_yuv420_lines
49 .type lcd_write_yuv420_lines, %function
50 lcd_write_yuv420_lines:
54 stmfd sp!, { r4-r12 } @ save non-scratch
55 ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
56 @ r5 = yuv_src[1] = Cb_p
57 @ r6 = yuv_src[2] = Cr_p
61 orr r3, r3, #0x120000 @ r3 = DBOP_BASE
63 ldrb r7, [r4], #1 @ r7 = *Y'_p++;
64 ldrb r8, [r5], #1 @ r8 = *Cb_p++;
65 ldrb r9, [r6], #1 @ r9 = *Cr_p++;
67 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
68 add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
69 add r7, r12, r7, asl #5 @ by one less when adding - same for all
71 sub r8, r8, #128 @ Cb -= 128
72 sub r9, r9, #128 @ Cr -= 128
74 add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
75 add r10, r10, r10, asl #4 @
76 add r10, r10, r8, asl #3 @
77 add r10, r10, r8, asl #4 @
79 add r11, r9, r9, asl #2 @ r9 = Cr*101
80 add r11, r11, r9, asl #5 @
81 add r9, r11, r9, asl #6 @
83 add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
85 add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9
87 rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8
88 mov r10, r10, asr #8 @
90 add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
91 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
92 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
94 orr r12, r0, r11 @ check if clamping is needed...
95 orr r12, r12, r7, asr #1 @ ...at all
99 mvnhi r0, r0, asr #31 @
101 cmp r11, #31 @ clamp r
102 mvnhi r11, r11, asr #31 @
103 andhi r11, r11, #31 @
104 cmp r7, #63 @ clamp g
105 mvnhi r7, r7, asr #31 @
109 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
111 orr r0, r0, r11, lsl #11 @ r0 = (r << 11) | b
112 orr r0, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
113 strh r0, [r3, #0x10] @ write pixel
115 ldr r7, [r3,#0xc] @ r7 = DBOP_STATUS
116 tst r7, #DBOP_BUSY @ fifo not empty?
119 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
120 add r12, r7, r7, asl #2 @
121 add r7, r12, r7, asl #5 @
122 @ compute R, G, and B
123 add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
124 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
125 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
127 orr r12, r0, r11 @ check if clamping is needed...
128 orr r12, r12, r7, asr #1 @ ...at all
131 cmp r0, #31 @ clamp b
132 mvnhi r0, r0, asr #31 @
134 cmp r11, #31 @ clamp r
135 mvnhi r11, r11, asr #31 @
136 andhi r11, r11, #31 @
137 cmp r7, #63 @ clamp g
138 mvnhi r7, r7, asr #31 @
142 ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
144 orr r0, r0, r11, lsl #11 @ r0 = (r << 11) | b
145 orr r0, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
146 strh r0, [r3, #0x10] @ write pixel
148 ldr r7, [r3,#0xc] @ r7 = DBOP_STATUS
149 tst r7, #DBOP_BUSY @ fifo not empty?
152 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
153 add r12, r7, r7, asl #2 @
154 add r7, r12, r7, asl #5 @
155 @ compute R, G, and B
156 add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
157 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
158 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
160 orr r12, r0, r11 @ check if clamping is needed...
161 orr r12, r12, r7, asr #1 @ ...at all
164 cmp r0, #31 @ clamp b
165 mvnhi r0, r0, asr #31 @
167 cmp r11, #31 @ clamp r
168 mvnhi r11, r11, asr #31 @
169 andhi r11, r11, #31 @
170 cmp r7, #63 @ clamp g
171 mvnhi r7, r7, asr #31 @
175 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
178 orr r0, r0, r11, lsl #11 @ r0 = (r << 11) | b
179 orr r0, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
180 strh r0, [r3, #0x10] @ write pixel
182 ldr r7, [r3,#0xc] @ r7 = DBOP_STATUS
183 tst r7, #DBOP_BUSY @ fifo not empty?
186 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
187 add r12, r7, r7, asl #2 @
188 add r7, r12, r7, asl #5 @
189 @ compute R, G, and B
190 add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
191 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
192 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
194 orr r12, r0, r11 @ check if clamping is needed...
195 orr r12, r12, r7, asr #1 @ ...at all
198 cmp r0, #31 @ clamp b
199 mvnhi r0, r0, asr #31 @
201 cmp r11, #31 @ clamp r
202 mvnhi r11, r11, asr #31 @
203 andhi r11, r11, #31 @
204 cmp r7, #63 @ clamp g
205 mvnhi r7, r7, asr #31 @
209 orr r0, r0, r11, lsl #11 @ r0 = (r << 11) | b
210 orr r0, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
211 strh r0, [r3, #0x10] @ write pixel
213 ldr r7, [r3,#0xc] @ r7 = DBOP_STATUS
214 tst r7, #DBOP_BUSY @ fifo not empty?
217 subs r1, r1, #2 @ subtract block from width
218 bgt 10b @ loop line @
220 ldmfd sp!, { r4-r12 } @ restore registers and return
222 .ltorg @ dump constant pool
223 .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
225 /****************************************************************************
226 * void lcd_write_yuv_420_lines_odither(unsigned char const * const src[3],
232 * |R| |1.000000 -0.000001 1.402000| |Y'|
233 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
234 * |B| |1.000000 1.772000 0.000000| |Pr|
235 * Red scaled at twice g & b but at same precision to place it in correct
236 * bit position after multiply and leave instruction count lower.
237 * |R| |258 0 408| |Y' - 16|
238 * |G| = |149 -49 -104| |Cb - 128|
239 * |B| |149 258 0| |Cr - 128|
241 * Write four RGB565 pixels in the following order on each loop:
245 * Kernel pattern (raw|rotated|use order):
246 * 5 3 4 2 2 6 3 7 row0 row2 > down
247 * 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left
248 * 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/
251 .section .icode, "ax", %progbits
253 .global lcd_write_yuv420_lines_odither
254 .type lcd_write_yuv420_lines_odither, %function
255 lcd_write_yuv420_lines_odither:
261 stmfd sp!, { r4-r12, lr } @ save non-scratch
262 ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
263 @ r5 = yuv_src[1] = Cb_p
264 @ r6 = yuv_src[2] = Cr_p
267 ldr r14, [sp, #40] @ Line up pattern and kernel quadrant
270 mov r14, r14, lsl #6 @ 0x00 or 0x80
271 mov r3, #0xC8000000 @
272 orr r3, r3, #0x120000 @ r3 = DBOP_BASE
275 ldrb r7, [r4], #1 @ r7 = *Y'_p++;
276 ldrb r8, [r5], #1 @ r8 = *Cb_p++;
277 ldrb r9, [r6], #1 @ r9 = *Cr_p++;
279 eor r14, r14, #0x80 @ flip pattern quadrant
281 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
282 add r12, r7, r7, asl #2 @
283 add r12, r12, r12, asl #4 @
284 add r7, r12, r7, asl #6 @
286 sub r8, r8, #128 @ Cb -= 128
287 sub r9, r9, #128 @ Cr -= 128
289 add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
290 add r10, r10, r8, asl #5 @
291 add r10, r10, r9, asl #3 @
292 add r10, r10, r9, asl #5 @
293 add r10, r10, r9, asl #6 @
295 mov r8, r8, asl #1 @ r8 = bu = Cb*258
296 add r8, r8, r8, asl #7 @
298 add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
299 add r9, r9, r9, asl #4 @
302 @ compute R, G, and B
303 add r0, r8, r7 @ r0 = b' = Y + bu
304 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
305 rsb r7, r10, r7 @ r7 = g' = Y + guv
307 @ r8 = bu, r9 = rv, r10 = guv
309 sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
310 add r0, r12, r0, lsr #8 @
312 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
313 add r11, r12, r11, lsr #8 @
315 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
316 add r7, r12, r7, lsr #8 @
318 add r12, r14, #0x100 @
320 add r0, r0, r12 @ b = r0 + delta
321 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
322 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
324 orr r12, r0, r11, asr #1 @ check if clamping is needed...
325 orr r12, r12, r7 @ ...at all
326 movs r12, r12, asr #15 @
328 movs r12, r0, asr #15 @ clamp b
329 mvnne r0, r12, lsr #15 @
330 andne r0, r0, #0x7c00 @ mask b only if clamped
331 movs r12, r11, asr #16 @ clamp r
332 mvnne r11, r12, lsr #16 @
333 movs r12, r7, asr #15 @ clamp g
334 mvnne r7, r12, lsr #15 @
337 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
339 and r11, r11, #0xf800 @ pack pixel
340 and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
341 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
342 orr r0, r11, r0, lsr #10 @ (b >> 10)
343 strh r0, [r3, #0x10] @ write pixel
345 ldr r7, [r3,#0xc] @ r7 = DBOP_STATUS
346 tst r7, #DBOP_BUSY @ fifo not empty?
349 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
350 add r12, r7, r7, asl #2 @
351 add r12, r12, r12, asl #4 @
352 add r7, r12, r7, asl #6 @
353 @ compute R, G, and B
354 add r0, r8, r7 @ r0 = b' = Y + bu
355 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
356 rsb r7, r10, r7 @ r7 = g' = Y + guv
358 sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
359 add r0, r12, r0, lsr #8 @
361 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
362 add r11, r12, r11, lsr #8 @
364 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
365 add r7, r12, r7, lsr #8 @
367 add r12, r14, #0x200 @
369 add r0, r0, r12 @ b = r0 + delta
370 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
371 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
373 orr r12, r0, r11, asr #1 @ check if clamping is needed...
374 orr r12, r12, r7 @ ...at all
375 movs r12, r12, asr #15 @
377 movs r12, r0, asr #15 @ clamp b
378 mvnne r0, r12, lsr #15 @
379 andne r0, r0, #0x7c00 @ mask b only if clamped
380 movs r12, r11, asr #16 @ clamp r
381 mvnne r11, r12, lsr #16 @
382 movs r12, r7, asr #15 @ clamp g
383 mvnne r7, r12, lsr #15 @
386 ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
388 and r11, r11, #0xf800 @ pack pixel
389 and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
390 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
391 orr r0, r11, r0, lsr #10 @ (b >> 10)
392 strh r0, [r3, #0x10] @ write pixel
394 ldr r7, [r3,#0xc] @ r7 = DBOP_STATUS
395 tst r7, #DBOP_BUSY @ fifo not empty?
398 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
399 add r12, r7, r7, asl #2 @
400 add r12, r12, r12, asl #4 @
401 add r7, r12, r7, asl #6 @
402 @ compute R, G, and B
403 add r0, r8, r7 @ r0 = b' = Y + bu
404 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
405 rsb r7, r10, r7 @ r7 = g' = Y + guv
407 @ r8 = bu, r9 = rv, r10 = guv
409 sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
410 add r0, r12, r0, lsr #8 @
412 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
413 add r11, r12, r11, lsr #8 @
415 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
416 add r7, r12, r7, lsr #8 @
418 add r12, r14, #0x300 @
420 add r0, r0, r12 @ b = r0 + delta
421 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
422 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
424 orr r12, r0, r11, asr #1 @ check if clamping is needed...
425 orr r12, r12, r7 @ ...at all
426 movs r12, r12, asr #15 @
428 movs r12, r0, asr #15 @ clamp b
429 mvnne r0, r12, lsr #15 @
430 andne r0, r0, #0x7c00 @ mask b only if clamped
431 movs r12, r11, asr #16 @ clamp r
432 mvnne r11, r12, lsr #16 @
433 movs r12, r7, asr #15 @ clamp g
434 mvnne r7, r12, lsr #15 @
437 ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
439 and r11, r11, #0xf800 @ pack pixel
440 and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
441 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
442 orr r0, r11, r0, lsr #10 @ (b >> 10)
443 strh r0, [r3, #0x10] @ write pixel
445 ldr r7, [r3,#0xc] @ r7 = DBOP_STATUS
446 tst r7, #DBOP_BUSY @ fifo not empty?
449 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
450 add r12, r7, r7, asl #2 @
451 add r12, r12, r12, asl #4 @
452 add r7, r12, r7, asl #6 @
453 @ compute R, G, and B
454 add r0, r8, r7 @ r0 = b' = Y + bu
455 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
456 rsb r7, r10, r7 @ r7 = g' = Y + guv
458 sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
459 add r0, r12, r0, lsr #8 @
461 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
462 add r11, r12, r11, lsr #8 @
464 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
465 add r7, r12, r7, lsr #8 @
467 @ This element is zero - use r14 @
469 add r0, r0, r14 @ b = r0 + delta
470 add r11, r11, r14, lsl #1 @ r = r11 + delta*2
471 add r7, r7, r14, lsr #1 @ g = r7 + delta/2
473 orr r12, r0, r11, asr #1 @ check if clamping is needed...
474 orr r12, r12, r7 @ ...at all
475 movs r12, r12, asr #15 @
477 movs r12, r0, asr #15 @ clamp b
478 mvnne r0, r12, lsr #15 @
479 andne r0, r0, #0x7c00 @ mask b only if clamped
480 movs r12, r11, asr #16 @ clamp r
481 mvnne r11, r12, lsr #16 @
482 movs r12, r7, asr #15 @ clamp g
483 mvnne r7, r12, lsr #15 @
486 and r11, r11, #0xf800 @ pack pixel
487 and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
488 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
489 orr r0, r11, r0, lsr #10 @ (b >> 10)
490 strh r0, [r3, #0x10] @ write pixel
492 ldr r7, [r3,#0xc] @ r7 = DBOP_STATUS
493 tst r7, #DBOP_BUSY @ fifo not empty?
496 subs r1, r1, #2 @ subtract block from width
497 bgt 10b @ loop line @
499 ldmfd sp!, { r4-r12, pc } @ restore registers and return
500 .ltorg @ dump constant pool
501 .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither