fix YUV Dither for the other targets too, like in r26064.
[kugel-rb.git] / firmware / target / arm / philips / hdd1630 / lcd-as-hdd1630.S
blob73ad84ae450299a8ccb52067a315830662ac1cf6
1 /***************************************************************************
2  *             __________               __   ___.
3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
7  *                     \/            \/     \/    \/            \/
8  * $Id$
9  *
10  * Copyright (C) 2007-2008 by Michael Sevakis
11  *
12  * H10 20GB LCD assembly routines
13  *
14  * This program is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU General Public License
16  * as published by the Free Software Foundation; either version 2
17  * of the License, or (at your option) any later version.
18  *
19  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20  * KIND, either express or implied.
21  *
22  ****************************************************************************/
24 #include "config.h"
25 #include "cpu.h"
27 /****************************************************************************
28  * void lcd_write_yuv_420_lines(unsigned char const * const src[3],
29  *                              int width,
30  *                              int stride);
31  *
32  *   |R|   |1.000000 -0.000001  1.402000| |Y'|
33  *   |G| = |1.000000 -0.334136 -0.714136| |Pb|
34  *   |B|   |1.000000  1.772000  0.000000| |Pr|
35  *   Scaled, normalized, rounded and tweaked to yield RGB 565:
36  *   |R|   |74   0 101| |Y' -  16| >> 9
37  *   |G| = |74 -24 -51| |Cb - 128| >> 8
38  *   |B|   |74 128   0| |Cr - 128| >> 9
39  *
40  * Write four RGB565 pixels in the following order on each loop:
41  * 1 3 + > down
42  * 2 4 \/ left
43  */
44     .section    .icode, "ax", %progbits
45     .align      2
46     .global     lcd_write_yuv420_lines
47     .type       lcd_write_yuv420_lines, %function
48 lcd_write_yuv420_lines:
49                                         @ r0 = yuv_src
50                                         @ r1 = width
51                                         @ r2 = stride
52     stmfd       sp!, { r4-r11, lr }     @ save non-scratch
53     ldmia       r0, { r4, r5, r6 }      @ r4 = yuv_src[0] = Y'_p
54                                         @ r5 = yuv_src[1] = Cb_p
55                                         @ r6 = yuv_src[2] = Cr_p
56                                         @
57     mov         r0, #0x7000000c         @ r0 = &LCD2_PORT = 0x70008a0c
58     add         r0, r0, #0x8a00         @
59     mov         r14, #LCD2_DATA_MASK    @
60                                         @
61     sub         r2, r2, #1              @ Adjust stride because of increment
62 10: @ loop line                         @
63     ldrb        r7, [r4], #1            @ r7 = *Y'_p++;
64     ldrb        r8, [r5], #1            @ r8 = *Cb_p++;
65     ldrb        r9, [r6], #1            @ r9 = *Cr_p++;
66                                         @
67     sub         r7, r7, #16             @ r7 = Y = (Y' - 16)*74
68     add         r12, r7, r7, asl #2     @ actually (Y' - 16)*37 and shift right
69     add         r7, r12, r7, asl #5     @ by one less when adding - same for all
70                                         @
71     sub         r8, r8, #128            @ Cb -= 128
72     sub         r9, r9, #128            @ Cr -= 128
73                                         @
74     add         r10, r9, r9, asl #1     @ r10 = Cr*51 + Cb*24
75     add         r10, r10, r10, asl #4   @
76     add         r10, r10, r8, asl #3    @
77     add         r10, r10, r8, asl #4    @
78                                         @
79     add         r11, r9, r9, asl #2     @ r9 = Cr*101
80     add         r11, r11, r9, asl #5    @
81     add         r9, r11, r9, asl #6     @
82                                         @
83     add         r8, r8, #2              @ r8 = bu = (Cb*128 + 128) >> 8
84     mov         r8, r8, asr #2          @
85     add         r9, r9, #256            @ r9 = rv = (r8 + 256) >> 9
86     mov         r9, r9, asr #9          @
87     rsb         r10, r10, #128          @ r10 = guv = (-r9 + 128) >> 8
88     mov         r10, r10, asr #8        @
89                                         @ compute R, G, and B
90     add         r3, r8, r7, asr #8      @ r3 = b = (Y >> 9) + bu
91     add         r11, r9, r7, asr #8     @ r11 = r = (Y >> 9) + rv
92     add         r7, r10, r7, asr #7     @ r7  = g = (Y >> 8) + guv
93                                         @
94     orr         r12, r3, r11            @ check if clamping is needed...
95     orr         r12, r12, r7, asr #1    @ ...at all
96     cmp         r12, #31                @
97     bls         15f @ no clamp          @
98     cmp         r3, #31                 @ clamp b
99     mvnhi       r3, r3, asr #31         @
100     andhi       r3, r3, #31             @
101     cmp         r11, #31                @ clamp r
102     mvnhi       r11, r11, asr #31       @
103     andhi       r11, r11, #31           @
104     cmp         r7, #63                 @ clamp g
105     mvnhi       r7, r7, asr #31         @
106     andhi       r7, r7, #63             @
107 15: @ no clamp                          @
108                                         @
109     ldrb        r12, [r4, r2]           @ r12 = Y' = *(Y'_p + stride)
110                                         @
111     orr         r3, r3, r11, lsl #11    @ r3 = b | (r << 11)
112     orr         r3, r3, r7, lsl #5      @ r3 |= (g << 5)
113                                         @
114     orr         r7, r14, r3, lsr #8     @ store pixel
115     orr         r11, r14, r3            @
116 20:                                     @
117     ldr         r3, [r0]                @
118     tst         r3, #LCD2_BUSY_MASK     @
119     bne         20b                     @
120     str         r7, [r0]                @
121 20:                                     @
122     ldr         r3, [r0]                @
123     tst         r3, #LCD2_BUSY_MASK     @
124     bne         20b                     @
125     str         r11, [r0]               @
126                                         @
127     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*74
128     add         r12, r7, r7, asl #2     @
129     add         r7, r12, r7, asl #5     @
130                                         @ compute R, G, and B
131     add         r3, r8, r7, asr #8      @ r3  = b = (Y >> 9) + bu
132     add         r11, r9, r7, asr #8     @ r11 = r = (Y >> 9) + rv
133     add         r7, r10, r7, asr #7     @ r7  = g = (Y >> 8) + guv
134                                         @
135     orr         r12, r3, r11            @ check if clamping is needed...
136     orr         r12, r12, r7, asr #1    @ ...at all
137     cmp         r12, #31                @
138     bls         15f @ no clamp          @
139     cmp         r3, #31                 @ clamp b
140     mvnhi       r3, r3, asr #31         @
141     andhi       r3, r3, #31             @
142     cmp         r11, #31                @ clamp r
143     mvnhi       r11, r11, asr #31       @
144     andhi       r11, r11, #31           @
145     cmp         r7, #63                 @ clamp g
146     mvnhi       r7, r7, asr #31         @
147     andhi       r7, r7, #63             @
148 15: @ no clamp                          @
149                                         @
150     ldrb        r12, [r4], #1           @ r12 = Y' = *(Y'_p++)
151                                         @
152     orr         r3, r3, r11, lsl #11    @ r3 = b | (r << 11)
153     orr         r3, r3, r7, lsl #5      @ r3 |= (g << 5)
154                                         @
155     orr         r7, r14, r3, lsr #8     @ store pixel
156     orr         r11, r14, r3            @
157 20:                                     @
158     ldr         r3, [r0]                @
159     tst         r3, #LCD2_BUSY_MASK     @
160     bne         20b                     @
161     str         r7, [r0]                @
162 20:                                     @
163     ldr         r3, [r0]                @
164     tst         r3, #LCD2_BUSY_MASK     @
165     bne         20b                     @
166     str         r11, [r0]               @
167                                         @
168     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*74
169     add         r12, r7, r7, asl #2     @
170     add         r7, r12, r7, asl #5     @
171                                         @ compute R, G, and B
172     add         r3, r8, r7, asr #8      @ r3  = b = (Y >> 9) + bu
173     add         r11, r9, r7, asr #8     @ r11 = r = (Y >> 9) + rv
174     add         r7, r10, r7, asr #7     @ r7  = g = (Y >> 8) + guv
175                                         @
176     orr         r12, r3, r11            @ check if clamping is needed...
177     orr         r12, r12, r7, asr #1    @ ...at all
178     cmp         r12, #31                @
179     bls         15f @ no clamp          @
180     cmp         r3, #31                 @ clamp b
181     mvnhi       r3, r3, asr #31         @
182     andhi       r3, r3, #31             @
183     cmp         r11, #31                @ clamp r
184     mvnhi       r11, r11, asr #31       @
185     andhi       r11, r11, #31           @
186     cmp         r7, #63                 @ clamp g
187     mvnhi       r7, r7, asr #31         @
188     andhi       r7, r7, #63             @
189 15: @ no clamp                          @
190                                         @
191     ldrb        r12, [r4, r2]           @ r12 = Y' = *(Y'_p + stride)
192                                         @
193     orr         r3, r3, r7, lsl #5      @ r3 = b | (g << 5)
194     orr         r3, r3, r11, lsl #11    @ r3 |= (r << 11)
195                                         @
196     orr         r7, r14, r3, lsr #8     @ store pixel
197     orr         r11, r14, r3            @
198 20:                                     @
199     ldr         r3, [r0]                @
200     tst         r3, #LCD2_BUSY_MASK     @
201     bne         20b                     @
202     str         r7, [r0]                @
203 20:                                     @
204     ldr         r3, [r0]                @
205     tst         r3, #LCD2_BUSY_MASK     @
206     bne         20b                     @
207     str         r11, [r0]               @
208                                         @
209     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*74
210     add         r12, r7, r7, asl #2     @
211     add         r7, r12, r7, asl #5     @
212                                         @ compute R, G, and B
213     add         r3, r8, r7, asr #8      @ r3  = b = (Y >> 9) + bu
214     add         r11, r9, r7, asr #8     @ r11 = r = (Y >> 9) + rv
215     add         r7, r10, r7, asr #7     @ r7  = g = (Y >> 8) + guv
216                                         @
217     orr         r12, r3, r11            @ check if clamping is needed...
218     orr         r12, r12, r7, asr #1    @ ...at all
219     cmp         r12, #31                @
220     bls         15f @ no clamp          @
221     cmp         r3, #31                 @ clamp b
222     mvnhi       r3, r3, asr #31         @
223     andhi       r3, r3, #31             @
224     cmp         r11, #31                @ clamp r
225     mvnhi       r11, r11, asr #31       @
226     andhi       r11, r11, #31           @
227     cmp         r7, #63                 @ clamp g
228     mvnhi       r7, r7, asr #31         @
229     andhi       r7, r7, #63             @
230 15: @ no clamp                          @
231                                         @
232     orr         r3, r3, r11, lsl #11    @ r3 = b | (r << 11)
233     orr         r3, r3, r7, lsl #5      @ r3 |= (g << 5)
234                                         @
235     orr         r7, r14, r3, lsr #8     @ store pixel
236     orr         r11, r14, r3            @
237 20:                                     @
238     ldr         r3, [r0]                @
239     tst         r3, #LCD2_BUSY_MASK     @
240     bne         20b                     @
241     str         r7, [r0]                @
242 20:                                     @
243     ldr         r3, [r0]                @
244     tst         r3, #LCD2_BUSY_MASK     @
245     bne         20b                     @
246     str         r11, [r0]               @
247                                         @
248     subs        r1, r1, #2              @ subtract block from width
249     bgt         10b @ loop line         @
250                                         @
251     ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
252     .ltorg                              @ dump constant pool
253     .size   lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
256 /****************************************************************************
257  * void lcd_write_yuv_420_lines_odither(unsigned char const * const src[3],
258  *                                      int width,
259  *                                      int stride,
260  *                                      int x_screen,
261  *                                      int y_screen);
263  *   |R|   |1.000000 -0.000001  1.402000| |Y'|
264  *   |G| = |1.000000 -0.334136 -0.714136| |Pb|
265  *   |B|   |1.000000  1.772000  0.000000| |Pr|
266  *   Red scaled at twice g & b but at same precision to place it in correct
267  *   bit position after multiply and leave instruction count lower.
268  *   |R|   |258   0  408| |Y' -  16|
269  *   |G| = |149 -49 -104| |Cb - 128|
270  *   |B|   |149 258    0| |Cr - 128|
272  * Write four RGB565 pixels in the following order on each loop:
273  * 1 3 + > down
274  * 2 4 \/ left
276  * Kernel pattern (raw|use order):
277  * 5 3 4 2     row0    row2         > down
278  * 1 7 0 6 | 5 1 3 7 4 0 2 6 col0     left
279  * 4 2 5 3 | 4 0 2 6 5 1 3 7 col2  \/
280  * 0 6 1 7
281  */
282     .section    .icode, "ax", %progbits
283     .align      2
284     .global     lcd_write_yuv420_lines_odither
285     .type       lcd_write_yuv420_lines_odither, %function
286 lcd_write_yuv420_lines_odither:
287                                         @ r0   = yuv_src
288                                         @ r1   = width
289                                         @ r2   = stride
290                                         @ r3   = x_screen
291                                         @ [sp] = y_screen
292     stmfd       sp!, { r4-r11, lr }     @ save non-scratch
293     ldmia       r0, { r4, r5, r6 }      @ r4 = yuv_src[0] = Y'_p
294                                         @ r5 = yuv_src[1] = Cb_p
295                                         @ r6 = yuv_src[2] = Cr_p
296                                         @
297     ldr         r0, [sp, #36]           @ Line up pattern and kernel quadrant
298     eor         r14, r3, r0             @
299     and         r14, r14, #0x2          @
300     mov         r14, r14, lsl #6        @ 0x00 or 0x80
301                                         @
302     mov         r0, #0x7000000c         @ r0 = &LCD2_PORT = 0x70008a0c
303     add         r0, r0, #0x8a00         @
304                                         @
305     sub         r2, r2, #1              @ Adjust stride because of increment
306 10: @ loop line                         @
307                                         @
308     ldrb        r7, [r4], #1            @ r7 = *Y'_p++;
309     ldrb        r8, [r5], #1            @ r8 = *Cb_p++;
310     ldrb        r9, [r6], #1            @ r9 = *Cr_p++;
311                                         @
312     eor         r14, r14, #0x80         @ flip pattern quadrant
313                                         @
314     sub         r7, r7, #16             @ r7 = Y = (Y' - 16)*149
315     add         r12, r7, r7, asl #2     @
316     add         r12, r12, r12, asl #4   @
317     add         r7, r12, r7, asl #6     @
318                                         @    
319     sub         r8, r8, #128            @ Cb -= 128
320     sub         r9, r9, #128            @ Cr -= 128
321                                         @
322     add         r10, r8, r8, asl #4     @ r10 = guv = Cr*104 + Cb*49
323     add         r10, r10, r8, asl #5    @
324     add         r10, r10, r9, asl #3    @
325     add         r10, r10, r9, asl #5    @
326     add         r10, r10, r9, asl #6    @
327                                         @
328     mov         r8, r8, asl #1          @ r8 = bu = Cb*258
329     add         r8, r8, r8, asl #7      @
330                                         @
331     add         r9, r9, r9, asl #1      @ r9 = rv = Cr*408
332     add         r9, r9, r9, asl #4      @
333     mov         r9, r9, asl #3          @
334                                         @
335                                         @ compute R, G, and B
336     add         r3, r8, r7              @ r3  = b' = Y + bu
337     add         r11, r9, r7, asl #1     @ r11 = r' = Y*2 + rv
338     rsb         r7, r10, r7             @ r7  = g' = Y + guv
339                                         @
340                                         @ r8 = bu, r9 = rv, r10 = guv
341                                         @
342     sub         r12, r3, r3, lsr #5     @ r3 = 31/32*b + b/256
343     add         r3, r12, r3, lsr #8     @
344                                         @
345     sub         r12, r11, r11, lsr #5   @ r11 = 31/32*r + r/256
346     add         r11, r12, r11, lsr #8   @
347                                         @
348     sub         r12, r7, r7, lsr #6     @ r7 = 63/64*g + g/256
349     add         r7, r12, r7, lsr #8     @
350                                         @
351     add         r12, r14, #0x200        @
352                                         @
353     add         r3, r3, r12             @ b = r3 + delta
354     add         r11, r11, r12, lsl #1   @ r = r11 + delta*2
355     add         r7, r7, r12, lsr #1     @ g = r7 + delta/2
356                                         @
357     orr         r12, r3, r11, asr #1    @ check if clamping is needed...
358     orr         r12, r12, r7            @ ...at all
359     movs        r12, r12, asr #15       @
360     beq         15f @ no clamp          @
361     movs        r12, r3, asr #15        @ clamp b
362     mvnne       r3, r12, lsr #15        @
363     andne       r3, r3, #0x7c00         @ mask b only if clamped
364     movs        r12, r11, asr #16       @ clamp r
365     mvnne       r11, r12, lsr #16       @
366     movs        r12, r7, asr #15        @ clamp g
367     mvnne       r7, r12, lsr #15        @
368 15: @ no clamp                          @
369                                         @
370     ldrb        r12, [r4, r2]           @ r12 = Y' = *(Y'_p + stride)
371                                         @
372     and         r11, r11, #0xf800       @ pack pixel
373     and         r7, r7, #0x7e00         @ r3 = pixel = (r & 0xf800) |
374     orr         r11, r11, r7, lsr #4    @              ((g & 0x7e00) >> 4) |
375     orr         r3, r11, r3, lsr #10    @              (b >> 10)
376                                         @
377     mov         r11, #LCD2_DATA_MASK    @ store pixel
378     orr         r7, r11, r3, lsr #8     @
379     orr         r11, r11, r3            @
380 20:                                     @
381     ldr         r3, [r0]                @
382     tst         r3, #LCD2_BUSY_MASK     @
383     bne         20b                     @
384     str         r7, [r0]                @
385 20:                                     @
386     ldr         r3, [r0]                @
387     tst         r3, #LCD2_BUSY_MASK     @
388     bne         20b                     @
389     str         r11, [r0]               @
390                                         @
391     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*149
392     add         r12, r7, r7, asl #2     @
393     add         r12, r12, r12, asl #4   @
394     add         r7, r12, r7, asl #6     @
395                                         @ compute R, G, and B
396     add         r3, r8, r7              @ r3  = b' = Y + bu
397     add         r11, r9, r7, asl #1     @ r11 = r' = Y*2 + rv
398     rsb         r7, r10, r7             @ r7  = g' = Y + guv
399                                         @
400     sub         r12, r3, r3, lsr #5     @ r3  = 31/32*b' + b'/256
401     add         r3, r12, r3, lsr #8     @
402                                         @
403     sub         r12, r11, r11, lsr #5   @ r11 = 31/32*r' + r'/256
404     add         r11, r12, r11, lsr #8   @
405                                         @
406     sub         r12, r7, r7, lsr #6     @ r7  = 63/64*g' + g'/256
407     add         r7, r12, r7, lsr #8     @
408                                         @
409     @ This element is zero - use r14    @
410                                         @
411     add         r3, r3, r14             @ b = r3 + delta
412     add         r11, r11, r14, lsl #1   @ r = r11 + delta*2
413     add         r7, r7, r14, lsr #1     @ g = r7 + delta/2
414                                         @
415     orr         r12, r3, r11, asr #1    @ check if clamping is needed...
416     orr         r12, r12, r7            @ ...at all
417     movs        r12, r12, asr #15       @
418     beq         15f @ no clamp          @
419     movs        r12, r3, asr #15        @ clamp b
420     mvnne       r3, r12, lsr #15        @
421     andne       r3, r3, #0x7c00         @ mask b only if clamped
422     movs        r12, r11, asr #16       @ clamp r
423     mvnne       r11, r12, lsr #16       @
424     movs        r12, r7, asr #15        @ clamp g
425     mvnne       r7, r12, lsr #15        @
426 15: @ no clamp                          @
427                                         @
428     ldrb        r12, [r4], #1           @ r12 = Y' = *(Y'_p++)
429                                         @
430     and         r11, r11, #0xf800       @ pack pixel
431     and         r7, r7, #0x7e00         @ r3 = pixel = (r & 0xf800) |
432     orr         r11, r11, r7, lsr #4    @              ((g & 0x7e00) >> 4) |
433     orr         r3, r11, r3, lsr #10    @              (b >> 10)
434                                         @
435     mov         r11, #LCD2_DATA_MASK    @ store pixel
436     orr         r7, r11, r3, lsr #8     @
437     orr         r11, r11, r3            @
438 20:                                     @
439     ldr         r3, [r0]                @
440     tst         r3, #LCD2_BUSY_MASK     @
441     bne         20b                     @
442     str         r7, [r0]                @
443 20:                                     @
444     ldr         r3, [r0]                @
445     tst         r3, #LCD2_BUSY_MASK     @
446     bne         20b                     @
447     str         r11, [r0]               @
448                                         @
449     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*149
450     add         r12, r7, r7, asl #2     @
451     add         r12, r12, r12, asl #4   @
452     add         r7, r12, r7, asl #6     @
453                                         @ compute R, G, and B
454     add         r3, r8, r7              @ r3  = b' = Y + bu
455     add         r11, r9, r7, asl #1     @ r11 = r' = Y*2 + rv
456     rsb         r7, r10, r7             @ r7  = g' = Y + guv
457                                         @
458                                         @ r8 = bu, r9 = rv, r10 = guv
459                                         @
460     sub         r12, r3, r3, lsr #5     @ r3  = 31/32*b' + b'/256
461     add         r3, r12, r3, lsr #8     @
462                                         @
463     sub         r12, r11, r11, lsr #5   @ r11 = 31/32*r' + r'/256
464     add         r11, r12, r11, lsr #8   @
465                                         @
466     sub         r12, r7, r7, lsr #6     @ r7  = 63/64*g' + g'/256
467     add         r7, r12, r7, lsr #8     @
468                                         @
469     add         r12, r14, #0x100        @
470                                         @
471     add         r3, r3, r12             @ b = r3 + delta
472     add         r11, r11, r12, lsl #1   @ r = r11 + delta*2
473     add         r7, r7, r12, lsr #1     @ g = r7 + delta/2
474                                         @
475     orr         r12, r3, r11, asr #1    @ check if clamping is needed...
476     orr         r12, r12, r7            @ ...at all
477     movs        r12, r12, asr #15       @
478     beq         15f @ no clamp          @
479     movs        r12, r3, asr #15        @ clamp b
480     mvnne       r3, r12, lsr #15        @
481     andne       r3, r3, #0x7c00         @ mask b only if clamped
482     movs        r12, r11, asr #16       @ clamp r
483     mvnne       r11, r12, lsr #16       @
484     movs        r12, r7, asr #15        @ clamp g
485     mvnne       r7, r12, lsr #15        @
486 15: @ no clamp                          @
487                                         @
488     ldrb        r12, [r4, r2]           @ r12 = Y' = *(Y'_p + stride)    
489                                         @
490     and         r11, r11, #0xf800       @ pack pixel
491     and         r7, r7, #0x7e00         @ r3 = pixel = (r & 0xf800) |
492     orr         r11, r11, r7, lsr #4    @              ((g & 0x7e00) >> 4) |
493     orr         r3, r11, r3, lsr #10    @              (b >> 10)
494                                         @
495     mov         r11, #LCD2_DATA_MASK    @ store pixel
496     orr         r7, r11, r3, lsr #8     @
497     orr         r11, r11, r3            @
498 20:                                     @
499     ldr         r3, [r0]                @
500     tst         r3, #LCD2_BUSY_MASK     @
501     bne         20b                     @
502     str         r7, [r0]                @
503 20:                                     @
504     ldr         r3, [r0]                @
505     tst         r3, #LCD2_BUSY_MASK     @
506     bne         20b                     @
507     str         r11, [r0]               @
508                                         @
509     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*149
510     add         r12, r7, r7, asl #2     @
511     add         r12, r12, r12, asl #4   @
512     add         r7, r12, r7, asl #6     @
513                                         @ compute R, G, and B
514     add         r3, r8, r7              @ r3  = b' = Y + bu
515     add         r11, r9, r7, asl #1     @ r11 = r' = Y*2 + rv
516     rsb         r7, r10, r7             @ r7  = g' = Y + guv
517                                         @
518     sub         r12, r3, r3, lsr #5     @ r3 = 31/32*b + b/256
519     add         r3, r12, r3, lsr #8     @
520                                         @
521     sub         r12, r11, r11, lsr #5   @ r11 = 31/32*r + r/256
522     add         r11, r12, r11, lsr #8   @
523                                         @
524     sub         r12, r7, r7, lsr #6     @ r7 = 63/64*g + g/256
525     add         r7, r12, r7, lsr #8     @
526                                         @
527     add         r12, r14, #0x300        @
528                                         @
529     add         r3, r3, r12             @ b = r3 + delta
530     add         r11, r11, r12, lsl #1   @ r = r11 + delta*2
531     add         r7, r7, r12, lsr #1     @ g = r7 + delta/2
532                                         @
533     orr         r12, r3, r11, asr #1    @ check if clamping is needed...
534     orr         r12, r12, r7            @ ...at all
535     movs        r12, r12, asr #15       @
536     beq         15f @ no clamp          @
537     movs        r12, r3, asr #15        @ clamp b
538     mvnne       r3, r12, lsr #15        @
539     andne       r3, r3, #0x7c00         @ mask b only if clamped
540     movs        r12, r11, asr #16       @ clamp r
541     mvnne       r11, r12, lsr #16       @
542     movs        r12, r7, asr #15        @ clamp g
543     mvnne       r7, r12, lsr #15        @
544 15: @ no clamp                          @
545                                         @
546     and         r11, r11, #0xf800       @ pack pixel
547     and         r7, r7, #0x7e00         @ r3 = pixel = (r & 0xf800) |
548     orr         r11, r11, r7, lsr #4    @              ((g & 0x7e00) >> 4) |
549     orr         r3, r11, r3, lsr #10    @              (b >> 10)
550                                         @
551     mov         r11, #LCD2_DATA_MASK    @ store pixel
552     orr         r7, r11, r3, lsr #8     @
553     orr         r11, r11, r3            @
554 20:                                     @
555     ldr         r3, [r0]                @
556     tst         r3, #LCD2_BUSY_MASK     @
557     bne         20b                     @
558     str         r7, [r0]                @
559 20:                                     @
560     ldr         r3, [r0]                @
561     tst         r3, #LCD2_BUSY_MASK     @
562     bne         20b                     @
563     str         r11, [r0]               @
564                                         @
565     subs        r1, r1, #2              @ subtract block from width
566     bgt         10b @ loop line         @
567                                         @
568     ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
569     .ltorg                              @ dump constant pool
570     .size   lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither