1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2007 by Michael Sevakis
12 * ARM code for memory framebuffer LCDs
14 * All files in this archive are subject to the GNU General Public License.
15 * See the file COPYING in the source tree root for full license agreement.
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
20 ****************************************************************************/
25 /****************************************************************************
26 * void lcd_copy_buffer_rect(fb_data *dst, fb_data *src, int width,
29 .section .icode, "ax", %progbits
31 .global lcd_copy_buffer_rect
32 .type lcd_copy_buffer_rect, %function
37 lcd_copy_buffer_rect: @
38 stmfd sp!, { r4-r12, lr } @ save non-scratch regs
39 mov r5, r2 @ r5 = cached width
40 rsb r4, r2, #LCD_WIDTH @ r4 = LCD_WIDTH - width
42 subs r2, r5, #1 @ r2 = width - 1
43 beq 40f @ finish line @ one halfword? skip to trailing copy
44 tst r0, #2 @ word aligned?
45 beq 20f @ rem copy @ yes? skip to word copy
46 ldrh r6, [r1], #2 @ copy leading halfword
49 ble 40f @ finish line @ next line if lt or finish
50 @ trailing halfword if eq
52 add r14, r2, #1 @ get remaining width mod 16 after word
54 and r14, r14, #0xe @ r14 = 0 (16), 2, 4, 6, 8, 10, 12, 14
55 add pc, pc, r14, lsl #3 @ branch to 32-byte align
57 b 30f @ rw % 16 = 0 or 1? use octword loop
61 ldr r6, [r1], #4 @ rw % 16 = 2 or 3
64 b 25f @ copy up done @
65 ldmia r1!, { r6-r7 } @ rw % 16 = 4 or 5
67 stmia r0!, { r6-r7 } @
68 b 25f @ copy up done @
69 ldmia r1!, { r6-r8 } @ rw % 16 = 6 or 7
71 stmia r0!, { r6-r8 } @
72 b 25f @ copy up done @
73 ldmia r1!, { r6-r9 } @ rw % 16 = 8 or 9
75 stmia r0!, { r6-r9 } @
76 b 25f @ copy up done @
77 ldmia r1!, { r6-r10 } @ rw % 16 = 10 or 11
79 stmia r0!, { r6-r10 } @
80 b 25f @ copy up done @
81 ldmia r1!, { r6-r11 } @ rw % 16 = 12 or 13
83 stmia r0!, { r6-r11 } @
84 b 25f @ copy up done @
85 ldmia r1!, { r6-r12 } @ rw % 16 = 14 or 15
87 stmia r0!, { r6-r12 } @
89 ble 40f @ finish line @ no 32-byte segments remaining?
90 30: @ octword loop @ copy 16 pixels per loop
91 ldmia r1!, { r6-r12, r14 } @
93 stmia r0!, { r6-r12, r14 } @
94 bgt 30b @ octword loop @
96 ldreqh r6, [r1], #2 @ finish last halfword if eq ...
97 add r1, r1, r4, lsl #1 @
98 streqh r6, [r0], #2 @ ...
99 add r0, r0, r4, lsl #1 @
100 subs r3, r3, #1 @ next line
101 bgt 10b @ copy line @
102 ldmfd sp!, { r4-r12, pc } @ restore regs and return
103 .ltorg @ dump constant pool
104 .size lcd_copy_buffer_rect, .-lcd_copy_buffer_rect
106 /****************************************************************************
107 * void lcd_write_yuv_420_lines(fb_data *dst,
108 * unsigned char const * const src[3],
112 * |R| |1.000000 -0.000001 1.402000| |Y'|
113 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
114 * |B| |1.000000 1.772000 0.000000| |Pr|
115 * Scaled, normalized, rounded and tweaked to yield RGB 565:
116 * |R| |74 0 101| |Y' - 16| >> 9
117 * |G| = |74 -24 -51| |Cb - 128| >> 8
118 * |B| |74 128 0| |Cr - 128| >> 9
120 * Write four RGB565 pixels in the following order on each loop:
124 .section .icode, "ax", %progbits
126 .global lcd_write_yuv420_lines
127 .type lcd_write_yuv420_lines, %function
128 lcd_write_yuv420_lines:
133 stmfd sp!, { r4-r12 } @ save non-scratch
134 ldmia r1, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
135 @ r5 = yuv_src[1] = Cb_p
136 @ r6 = yuv_src[2] = Cr_p
140 ldrb r7, [r4], #1 @ r7 = *Y'_p++;
141 ldrb r8, [r5], #1 @ r8 = *Cb_p++;
142 ldrb r9, [r6], #1 @ r9 = *Cr_p++;
144 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
145 add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
146 add r7, r12, r7, asl #5 @ by one less when adding - same for all
148 sub r8, r8, #128 @ Cb -= 128
149 sub r9, r9, #128 @ Cr -= 128
151 add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
152 add r10, r10, r10, asl #4 @
153 add r10, r10, r8, asl #3 @
154 add r10, r10, r8, asl #4 @
156 add r11, r9, r9, asl #2 @ r9 = Cr*101
157 add r11, r11, r9, asl #5 @
158 add r9, r11, r9, asl #6 @
160 add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
162 add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9
164 rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8
165 mov r10, r10, asr #8 @
166 @ compute R, G, and B
167 add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
168 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
169 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
171 orr r12, r1, r11 @ check if clamping is needed...
172 orr r12, r12, r7, asr #1 @ ...at all
175 cmp r1, #31 @ clamp b
176 mvnhi r1, r1, asr #31 @
178 cmp r11, #31 @ clamp r
179 mvnhi r11, r11, asr #31 @
180 andhi r11, r11, #31 @
181 cmp r7, #63 @ clamp g
182 mvnhi r7, r7, asr #31 @
186 ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
188 orr r1, r1, r7, lsl #5 @ r4 |= (g << 5)
189 orr r1, r1, r11, lsl #11 @ r4 = b | (r << 11)
191 strh r1, [r0], #LCD_WIDTH @ store pixel
196 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
197 add r12, r7, r7, asl #2 @
198 add r7, r12, r7, asl #5 @
199 @ compute R, G, and B
200 add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
201 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
202 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
204 orr r12, r1, r11 @ check if clamping is needed...
205 orr r12, r12, r7, asr #1 @ ...at all
208 cmp r1, #31 @ clamp b
209 mvnhi r1, r1, asr #31 @
211 cmp r11, #31 @ clamp r
212 mvnhi r11, r11, asr #31 @
213 andhi r11, r11, #31 @
214 cmp r7, #63 @ clamp g
215 mvnhi r7, r7, asr #31 @
219 ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
221 orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
222 orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
224 strh r1, [r0, #-LCD_WIDTH-2] @ store pixel
227 add r0, r0, #LCD_WIDTH @
230 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
231 add r12, r7, r7, asl #2 @
232 add r7, r12, r7, asl #5 @
233 @ compute R, G, and B
234 add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
235 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
236 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
238 orr r12, r1, r11 @ check if clamping is needed...
239 orr r12, r12, r7, asr #1 @ ...at all
242 cmp r1, #31 @ clamp b
243 mvnhi r1, r1, asr #31 @
245 cmp r11, #31 @ clamp r
246 mvnhi r11, r11, asr #31 @
247 andhi r11, r11, #31 @
248 cmp r7, #63 @ clamp g
249 mvnhi r7, r7, asr #31 @
253 ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
255 orr r1, r1, r7, lsl #5 @ r1 = b | (g << 5)
256 orr r1, r1, r11, lsl #11 @ r1 |= (r << 11)
258 strh r1, [r0, #LCD_WIDTH]! @ store pixel
263 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
264 add r12, r7, r7, asl #2 @
265 add r7, r12, r7, asl #5 @
266 @ compute R, G, and B
267 add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
268 add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
269 add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
271 orr r12, r1, r11 @ check if clamping is needed...
272 orr r12, r12, r7, asr #1 @ ...at all
275 cmp r1, #31 @ clamp b
276 mvnhi r1, r1, asr #31 @
278 cmp r11, #31 @ clamp r
279 mvnhi r11, r11, asr #31 @
280 andhi r11, r11, #31 @
281 cmp r7, #63 @ clamp g
282 mvnhi r7, r7, asr #31 @
286 orr r12, r1, r11, lsl #11 @ r12 = b | (r << 11)
287 orr r12, r12, r7, lsl #5 @ r12 |= (g << 5)
288 strh r12, [r0, #-2] @ store pixel
290 add r0, r0, #2*LCD_WIDTH @
292 add r0, r0, #LCD_WIDTH @
295 subs r2, r2, #2 @ subtract block from width
296 bgt 10b @ loop line @
298 ldmfd sp!, { r4-r12 } @ restore registers and return
300 .ltorg @ dump constant pool
301 .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
304 /****************************************************************************
305 * void lcd_write_yuv_420_lines_odither(fb_data *dst,
306 * unsigned char const * const src[3],
312 * |R| |1.000000 -0.000001 1.402000| |Y'|
313 * |G| = |1.000000 -0.334136 -0.714136| |Pb|
314 * |B| |1.000000 1.772000 0.000000| |Pr|
315 * Red scaled at twice g & b but at same precision to place it in correct
316 * bit position after multiply and leave instruction count lower.
317 * |R| |258 0 408| |Y' - 16|
318 * |G| = |149 -49 -104| |Cb - 128|
319 * |B| |149 258 0| |Cr - 128|
321 * Write four RGB565 pixels in the following order on each loop:
325 * Kernel pattern (raw|rotated|use order):
326 * 5 3 4 2 2 6 3 7 row0 row2 > down
327 * 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left
328 * 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/
331 .section .icode, "ax", %progbits
333 .global lcd_write_yuv420_lines_odither
334 .type lcd_write_yuv420_lines_odither, %function
335 lcd_write_yuv420_lines_odither:
342 stmfd sp!, { r4-r12, lr } @ save non-scratch
343 ldmia r1, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
344 @ r5 = yuv_src[1] = Cb_p
345 @ r6 = yuv_src[2] = Cr_p
348 add r1, sp, #40 @ Line up pattern and kernel quadrant
349 ldmia r1, { r12, r14 } @
352 mov r14, r14, lsl #6 @ 0x00 or 0x80
355 ldrb r7, [r4], #1 @ r7 = *Y'_p++;
356 ldrb r8, [r5], #1 @ r8 = *Cb_p++;
357 ldrb r9, [r6], #1 @ r9 = *Cr_p++;
359 eor r14, r14, #0x80 @ flip pattern quadrant
361 sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
362 add r12, r7, r7, asl #2 @
363 add r12, r12, r12, asl #4 @
364 add r7, r12, r7, asl #6 @
366 sub r8, r8, #128 @ Cb -= 128
367 sub r9, r9, #128 @ Cr -= 128
369 add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
370 add r10, r10, r8, asl #5 @
371 add r10, r10, r9, asl #3 @
372 add r10, r10, r9, asl #5 @
373 add r10, r10, r9, asl #6 @
375 mov r8, r8, asl #1 @ r8 = bu = Cb*258
376 add r8, r8, r8, asl #7 @
378 add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
379 add r9, r9, r9, asl #4 @
382 @ compute R, G, and B
383 add r1, r8, r7 @ r1 = b' = Y + bu
384 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
385 rsb r7, r10, r7 @ r7 = g' = Y + guv
387 @ r8 = bu, r9 = rv, r10 = guv
389 sub r12, r1, r1, lsr #5 @ r1 = 31/32*b + b/256
390 add r1, r12, r1, lsr #8 @
392 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
393 add r11, r12, r11, lsr #8 @
395 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
396 add r7, r12, r7, lsr #8 @
398 add r12, r14, #0x100 @
400 add r1, r1, r12 @ b = r1 + delta
401 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
402 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
404 orr r12, r1, r11, asr #1 @ check if clamping is needed...
405 orr r12, r12, r7 @ ...at all
406 movs r12, r12, asr #15 @
408 movs r12, r1, asr #15 @ clamp b
409 mvnne r1, r12, lsr #15 @
410 andne r1, r1, #0x7c00 @ mask b only if clamped
411 movs r12, r11, asr #16 @ clamp r
412 mvnne r11, r12, lsr #16 @
413 movs r12, r7, asr #15 @ clamp g
414 mvnne r7, r12, lsr #15 @
417 ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
419 and r11, r11, #0xf800 @ pack pixel
420 and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
421 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
422 orr r1, r11, r1, lsr #10 @ (b >> 10)
425 strh r1, [r0], #LCD_WIDTH @ store pixel
430 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
431 add r12, r7, r7, asl #2 @
432 add r12, r12, r12, asl #4 @
433 add r7, r12, r7, asl #6 @
434 @ compute R, G, and B
435 add r1, r8, r7 @ r1 = b' = Y + bu
436 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
437 rsb r7, r10, r7 @ r7 = g' = Y + guv
439 sub r12, r1, r1, lsr #5 @ r1 = 31/32*b' + b'/256
440 add r1, r12, r1, lsr #8 @
442 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
443 add r11, r12, r11, lsr #8 @
445 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
446 add r7, r12, r7, lsr #8 @
448 add r12, r14, #0x200 @
450 add r1, r1, r12 @ b = r1 + delta
451 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
452 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
454 orr r12, r1, r11, asr #1 @ check if clamping is needed...
455 orr r12, r12, r7 @ ...at all
456 movs r12, r12, asr #15 @
458 movs r12, r1, asr #15 @ clamp b
459 mvnne r1, r12, lsr #15 @
460 andne r1, r1, #0x7c00 @ mask b only if clamped
461 movs r12, r11, asr #16 @ clamp r
462 mvnne r11, r12, lsr #16 @
463 movs r12, r7, asr #15 @ clamp g
464 mvnne r7, r12, lsr #15 @
467 ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
469 and r11, r11, #0xf800 @ pack pixel
470 and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
471 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
472 orr r1, r11, r1, lsr #10 @ (b >> 10)
475 strh r1, [r0, #-LCD_WIDTH-2] @ store pixel
477 strh r1, [r0, #-2] @ store pixel
478 add r0, r0, #LCD_WIDTH @
481 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
482 add r12, r7, r7, asl #2 @
483 add r12, r12, r12, asl #4 @
484 add r7, r12, r7, asl #6 @
485 @ compute R, G, and B
486 add r1, r8, r7 @ r1 = b' = Y + bu
487 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
488 rsb r7, r10, r7 @ r7 = g' = Y + guv
490 @ r8 = bu, r9 = rv, r10 = guv
492 sub r12, r1, r1, lsr #5 @ r1 = 31/32*b' + b'/256
493 add r1, r12, r1, lsr #8 @
495 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
496 add r11, r12, r11, lsr #8 @
498 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
499 add r7, r12, r7, lsr #8 @
501 add r12, r14, #0x300 @
503 add r1, r1, r12 @ b = r1 + delta
504 add r11, r11, r12, lsl #1 @ r = r11 + delta*2
505 add r7, r7, r12, lsr #1 @ g = r7 + delta/2
507 orr r12, r1, r11, asr #1 @ check if clamping is needed...
508 orr r12, r12, r7 @ ...at all
509 movs r12, r12, asr #15 @
511 movs r12, r1, asr #15 @ clamp b
512 mvnne r1, r12, lsr #15 @
513 andne r1, r1, #0x7c00 @ mask b only if clamped
514 movs r12, r11, asr #16 @ clamp r
515 mvnne r11, r12, lsr #16 @
516 movs r12, r7, asr #15 @ clamp g
517 mvnne r7, r12, lsr #15 @
520 ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
522 and r11, r11, #0xf800 @ pack pixel
523 and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
524 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
525 orr r1, r11, r1, lsr #10 @ (b >> 10)
528 strh r1, [r0, #LCD_WIDTH]! @ store pixel
533 sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
534 add r12, r7, r7, asl #2 @
535 add r12, r12, r12, asl #4 @
536 add r7, r12, r7, asl #6 @
537 @ compute R, G, and B
538 add r1, r8, r7 @ r1 = b' = Y + bu
539 add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
540 rsb r7, r10, r7 @ r7 = g' = Y + guv
542 sub r12, r1, r1, lsr #5 @ r1 = 31/32*b + b/256
543 add r1, r12, r1, lsr #8 @
545 sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
546 add r11, r12, r11, lsr #8 @
548 sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
549 add r7, r12, r7, lsr #8 @
551 @ This element is zero - use r14 @
553 add r1, r1, r14 @ b = r1 + delta
554 add r11, r11, r14, lsl #1 @ r = r11 + delta*2
555 add r7, r7, r14, lsr #1 @ g = r7 + delta/2
557 orr r12, r1, r11, asr #1 @ check if clamping is needed...
558 orr r12, r12, r7 @ ...at all
559 movs r12, r12, asr #15 @
561 movs r12, r1, asr #15 @ clamp b
562 mvnne r1, r12, lsr #15 @
563 andne r1, r1, #0x7c00 @ mask b only if clamped
564 movs r12, r11, asr #16 @ clamp r
565 mvnne r11, r12, lsr #16 @
566 movs r12, r7, asr #15 @ clamp g
567 mvnne r7, r12, lsr #15 @
570 and r11, r11, #0xf800 @ pack pixel
571 and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
572 orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
573 orr r1, r11, r1, lsr #10 @ (b >> 10)
575 strh r1, [r0, #-2] @ store pixel
577 add r0, r0, #2*LCD_WIDTH @
579 add r0, r0, #LCD_WIDTH @
582 subs r2, r2, #2 @ subtract block from width
583 bgt 10b @ loop line @
585 ldmfd sp!, { r4-r12, pc } @ restore registers and return
586 .ltorg @ dump constant pool
587 .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither