YUV Dither: r12 saving was removed but stacked parameter load offset wasn't changed...
[kugel-rb.git] / firmware / target / arm / iriver / h10 / lcd-as-h10.S
blobb3f12e46b664b73c105ca340f5f76107dc1823dc
1 /***************************************************************************
2  *             __________               __   ___.
3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
7  *                     \/            \/     \/    \/            \/
8  * $Id$
9  *
10  * Copyright (C) 2007-2008 by Michael Sevakis
11  *
12  * H10 20GB LCD assembly routines
13  *
14  * This program is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU General Public License
16  * as published by the Free Software Foundation; either version 2
17  * of the License, or (at your option) any later version.
18  *
19  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20  * KIND, either express or implied.
21  *
22  ****************************************************************************/
24 #include "config.h"
25 #include "cpu.h"
27 /****************************************************************************
28  * void lcd_write_yuv_420_lines(unsigned char const * const src[3],
29  *                              int width,
30  *                              int stride);
31  *
32  *   |R|   |1.000000 -0.000001  1.402000| |Y'|
33  *   |G| = |1.000000 -0.334136 -0.714136| |Pb|
34  *   |B|   |1.000000  1.772000  0.000000| |Pr|
35  *   Scaled, normalized, rounded and tweaked to yield RGB 565:
36  *   |R|   |74   0 101| |Y' -  16| >> 9
37  *   |G| = |74 -24 -51| |Cb - 128| >> 8
38  *   |B|   |74 128   0| |Cr - 128| >> 9
39  *
40  * Write four RGB565 pixels in the following order on each loop:
41  * 1 3 + > down
42  * 2 4 \/ left
43  */
44     .section    .icode, "ax", %progbits
45     .align      2
46     .global     lcd_write_yuv420_lines
47     .type       lcd_write_yuv420_lines, %function
48 lcd_write_yuv420_lines:
49                                         @ r0 = yuv_src
50                                         @ r1 = width
51                                         @ r2 = stride
52     stmfd       sp!, { r4-r11, lr }     @ save non-scratch
53     ldmia       r0, { r4, r5, r6 }      @ r4 = yuv_src[0] = Y'_p
54                                         @ r5 = yuv_src[1] = Cb_p
55                                         @ r6 = yuv_src[2] = Cr_p
56                                         @
57     mov         r0, #0x7000000c         @ r0 = &LCD2_PORT = 0x70008a0c
58     add         r0, r0, #0x8a00         @
59     mov         r14, #LCD2_DATA_MASK    @
60                                         @
61     sub         r2, r2, #1              @ Adjust stride because of increment
62 10: @ loop line                         @
63     ldrb        r7, [r4], #1            @ r7 = *Y'_p++;
64     ldrb        r8, [r5], #1            @ r8 = *Cb_p++;
65     ldrb        r9, [r6], #1            @ r9 = *Cr_p++;
66                                         @
67     sub         r7, r7, #16             @ r7 = Y = (Y' - 16)*74
68     add         r12, r7, r7, asl #2     @ actually (Y' - 16)*37 and shift right
69     add         r7, r12, r7, asl #5     @ by one less when adding - same for all
70                                         @
71     sub         r8, r8, #128            @ Cb -= 128
72     sub         r9, r9, #128            @ Cr -= 128
73                                         @
74     add         r10, r9, r9, asl #1     @ r10 = Cr*51 + Cb*24
75     add         r10, r10, r10, asl #4   @
76     add         r10, r10, r8, asl #3    @
77     add         r10, r10, r8, asl #4    @
78                                         @
79     add         r11, r9, r9, asl #2     @ r9 = Cr*101
80     add         r11, r11, r9, asl #5    @
81     add         r9, r11, r9, asl #6     @
82                                         @
83     add         r8, r8, #2              @ r8 = bu = (Cb*128 + 128) >> 8
84     mov         r8, r8, asr #2          @
85     add         r9, r9, #256            @ r9 = rv = (r8 + 256) >> 9
86     mov         r9, r9, asr #9          @
87     rsb         r10, r10, #128          @ r10 = guv = (-r9 + 128) >> 8
88     mov         r10, r10, asr #8        @
89                                         @ compute R, G, and B
90     add         r3, r8, r7, asr #8      @ r3 = b = (Y >> 9) + bu
91     add         r11, r9, r7, asr #8     @ r11 = r = (Y >> 9) + rv
92     add         r7, r10, r7, asr #7     @ r7  = g = (Y >> 8) + guv
93                                         @
94     orr         r12, r3, r11            @ check if clamping is needed...
95     orr         r12, r12, r7, asr #1    @ ...at all
96     cmp         r12, #31                @
97     bls         15f @ no clamp          @
98     cmp         r3, #31                 @ clamp b
99     mvnhi       r3, r3, asr #31         @
100     andhi       r3, r3, #31             @
101     cmp         r11, #31                @ clamp r
102     mvnhi       r11, r11, asr #31       @
103     andhi       r11, r11, #31           @
104     cmp         r7, #63                 @ clamp g
105     mvnhi       r7, r7, asr #31         @
106     andhi       r7, r7, #63             @
107 15: @ no clamp                          @
108                                         @
109     ldrb        r12, [r4, r2]           @ r12 = Y' = *(Y'_p + stride)
110                                         @
111     orr         r3, r3, r11, lsl #11    @ r3 = b | (r << 11)
112     orr         r3, r3, r7, lsl #5      @ r3 |= (g << 5)
113                                         @
114     orr         r7, r14, r3, lsr #8     @ store pixel
115     orr         r11, r14, r3            @
116 20:                                     @
117     ldr         r3, [r0]                @
118     tst         r3, #LCD2_BUSY_MASK     @
119     bne         20b                     @
120     str         r7, [r0]                @
121     str         r11, [r0]               @
122                                         @
123     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*74
124     add         r12, r7, r7, asl #2     @
125     add         r7, r12, r7, asl #5     @
126                                         @ compute R, G, and B
127     add         r3, r8, r7, asr #8      @ r3  = b = (Y >> 9) + bu
128     add         r11, r9, r7, asr #8     @ r11 = r = (Y >> 9) + rv
129     add         r7, r10, r7, asr #7     @ r7  = g = (Y >> 8) + guv
130                                         @
131     orr         r12, r3, r11            @ check if clamping is needed...
132     orr         r12, r12, r7, asr #1    @ ...at all
133     cmp         r12, #31                @
134     bls         15f @ no clamp          @
135     cmp         r3, #31                 @ clamp b
136     mvnhi       r3, r3, asr #31         @
137     andhi       r3, r3, #31             @
138     cmp         r11, #31                @ clamp r
139     mvnhi       r11, r11, asr #31       @
140     andhi       r11, r11, #31           @
141     cmp         r7, #63                 @ clamp g
142     mvnhi       r7, r7, asr #31         @
143     andhi       r7, r7, #63             @
144 15: @ no clamp                          @
145                                         @
146     ldrb        r12, [r4], #1           @ r12 = Y' = *(Y'_p++)
147                                         @
148     orr         r3, r3, r11, lsl #11    @ r3 = b | (r << 11)
149     orr         r3, r3, r7, lsl #5      @ r3 |= (g << 5)
150                                         @
151     orr         r7, r14, r3, lsr #8     @ store pixel
152     orr         r11, r14, r3            @
153 20:                                     @
154     ldr         r3, [r0]                @
155     tst         r3, #LCD2_BUSY_MASK     @
156     bne         20b                     @
157     str         r7, [r0]                @
158     str         r11, [r0]               @
159                                         @
160     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*74
161     add         r12, r7, r7, asl #2     @
162     add         r7, r12, r7, asl #5     @
163                                         @ compute R, G, and B
164     add         r3, r8, r7, asr #8      @ r3  = b = (Y >> 9) + bu
165     add         r11, r9, r7, asr #8     @ r11 = r = (Y >> 9) + rv
166     add         r7, r10, r7, asr #7     @ r7  = g = (Y >> 8) + guv
167                                         @
168     orr         r12, r3, r11            @ check if clamping is needed...
169     orr         r12, r12, r7, asr #1    @ ...at all
170     cmp         r12, #31                @
171     bls         15f @ no clamp          @
172     cmp         r3, #31                 @ clamp b
173     mvnhi       r3, r3, asr #31         @
174     andhi       r3, r3, #31             @
175     cmp         r11, #31                @ clamp r
176     mvnhi       r11, r11, asr #31       @
177     andhi       r11, r11, #31           @
178     cmp         r7, #63                 @ clamp g
179     mvnhi       r7, r7, asr #31         @
180     andhi       r7, r7, #63             @
181 15: @ no clamp                          @
182                                         @
183     ldrb        r12, [r4, r2]           @ r12 = Y' = *(Y'_p + stride)
184                                         @
185     orr         r3, r3, r7, lsl #5      @ r3 = b | (g << 5)
186     orr         r3, r3, r11, lsl #11    @ r3 |= (r << 11)
187                                         @
188     orr         r7, r14, r3, lsr #8     @ store pixel
189     orr         r11, r14, r3            @
190 20:                                     @
191     ldr         r3, [r0]                @
192     tst         r3, #LCD2_BUSY_MASK     @
193     bne         20b                     @
194     str         r7, [r0]                @
195     str         r11, [r0]               @
196                                         @
197     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*74
198     add         r12, r7, r7, asl #2     @
199     add         r7, r12, r7, asl #5     @
200                                         @ compute R, G, and B
201     add         r3, r8, r7, asr #8      @ r3  = b = (Y >> 9) + bu
202     add         r11, r9, r7, asr #8     @ r11 = r = (Y >> 9) + rv
203     add         r7, r10, r7, asr #7     @ r7  = g = (Y >> 8) + guv
204                                         @
205     orr         r12, r3, r11            @ check if clamping is needed...
206     orr         r12, r12, r7, asr #1    @ ...at all
207     cmp         r12, #31                @
208     bls         15f @ no clamp          @
209     cmp         r3, #31                 @ clamp b
210     mvnhi       r3, r3, asr #31         @
211     andhi       r3, r3, #31             @
212     cmp         r11, #31                @ clamp r
213     mvnhi       r11, r11, asr #31       @
214     andhi       r11, r11, #31           @
215     cmp         r7, #63                 @ clamp g
216     mvnhi       r7, r7, asr #31         @
217     andhi       r7, r7, #63             @
218 15: @ no clamp                          @
219                                         @
220     orr         r3, r3, r11, lsl #11    @ r3 = b | (r << 11)
221     orr         r3, r3, r7, lsl #5      @ r3 |= (g << 5)
222                                         @
223     orr         r7, r14, r3, lsr #8     @ store pixel
224     orr         r11, r14, r3            @
225 20:                                     @
226     ldr         r3, [r0]                @
227     tst         r3, #LCD2_BUSY_MASK     @
228     bne         20b                     @
229     str         r7, [r0]                @
230     str         r11, [r0]               @
231                                         @
232     subs        r1, r1, #2              @ subtract block from width
233     bgt         10b @ loop line         @
234                                         @
235     ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
236     .ltorg                              @ dump constant pool
237     .size   lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
240 /****************************************************************************
241  * void lcd_write_yuv_420_lines_odither(unsigned char const * const src[3],
242  *                                      int width,
243  *                                      int stride,
244  *                                      int x_screen,
245  *                                      int y_screen);
247  *   |R|   |1.000000 -0.000001  1.402000| |Y'|
248  *   |G| = |1.000000 -0.334136 -0.714136| |Pb|
249  *   |B|   |1.000000  1.772000  0.000000| |Pr|
250  *   Red scaled at twice g & b but at same precision to place it in correct
251  *   bit position after multiply and leave instruction count lower.
252  *   |R|   |258   0  408| |Y' -  16|
253  *   |G| = |149 -49 -104| |Cb - 128|
254  *   |B|   |149 258    0| |Cr - 128|
256  * Write four RGB565 pixels in the following order on each loop:
257  * 1 3 + > down
258  * 2 4 \/ left
260  * Kernel pattern (raw|use order):
261  * 5 3 4 2     row0    row2         > down
262  * 1 7 0 6 | 5 1 3 7 4 0 2 6 col0     left
263  * 4 2 5 3 | 4 0 2 6 5 1 3 7 col2  \/
264  * 0 6 1 7
265  */
266     .section    .icode, "ax", %progbits
267     .align      2
268     .global     lcd_write_yuv420_lines_odither
269     .type       lcd_write_yuv420_lines_odither, %function
270 lcd_write_yuv420_lines_odither:
271                                         @ r0   = yuv_src
272                                         @ r1   = width
273                                         @ r2   = stride
274                                         @ r3   = x_screen
275                                         @ [sp] = y_screen
276     stmfd       sp!, { r4-r11, lr }     @ save non-scratch
277     ldmia       r0, { r4, r5, r6 }      @ r4 = yuv_src[0] = Y'_p
278                                         @ r5 = yuv_src[1] = Cb_p
279                                         @ r6 = yuv_src[2] = Cr_p
280                                         @
281     ldr         r0, [sp, #36]           @ Line up pattern and kernel quadrant
282     eor         r14, r3, r0             @
283     and         r14, r14, #0x2          @
284     mov         r14, r14, lsl #6        @ 0x00 or 0x80
285                                         @
286     mov         r0, #0x7000000c         @ r0 = &LCD2_PORT = 0x70008a0c
287     add         r0, r0, #0x8a00         @
288                                         @
289     sub         r2, r2, #1              @ Adjust stride because of increment
290 10: @ loop line                         @
291                                         @
292     ldrb        r7, [r4], #1            @ r7 = *Y'_p++;
293     ldrb        r8, [r5], #1            @ r8 = *Cb_p++;
294     ldrb        r9, [r6], #1            @ r9 = *Cr_p++;
295                                         @
296     eor         r14, r14, #0x80         @ flip pattern quadrant
297                                         @
298     sub         r7, r7, #16             @ r7 = Y = (Y' - 16)*149
299     add         r12, r7, r7, asl #2     @
300     add         r12, r12, r12, asl #4   @
301     add         r7, r12, r7, asl #6     @
302                                         @    
303     sub         r8, r8, #128            @ Cb -= 128
304     sub         r9, r9, #128            @ Cr -= 128
305                                         @
306     add         r10, r8, r8, asl #4     @ r10 = guv = Cr*104 + Cb*49
307     add         r10, r10, r8, asl #5    @
308     add         r10, r10, r9, asl #3    @
309     add         r10, r10, r9, asl #5    @
310     add         r10, r10, r9, asl #6    @
311                                         @
312     mov         r8, r8, asl #1          @ r8 = bu = Cb*258
313     add         r8, r8, r8, asl #7      @
314                                         @
315     add         r9, r9, r9, asl #1      @ r9 = rv = Cr*408
316     add         r9, r9, r9, asl #4      @
317     mov         r9, r9, asl #3          @
318                                         @
319                                         @ compute R, G, and B
320     add         r3, r8, r7              @ r3  = b' = Y + bu
321     add         r11, r9, r7, asl #1     @ r11 = r' = Y*2 + rv
322     rsb         r7, r10, r7             @ r7  = g' = Y + guv
323                                         @
324                                         @ r8 = bu, r9 = rv, r10 = guv
325                                         @
326     sub         r12, r3, r3, lsr #5     @ r3 = 31/32*b + b/256
327     add         r3, r12, r3, lsr #8     @
328                                         @
329     sub         r12, r11, r11, lsr #5   @ r11 = 31/32*r + r/256
330     add         r11, r12, r11, lsr #8   @
331                                         @
332     sub         r12, r7, r7, lsr #6     @ r7 = 63/64*g + g/256
333     add         r7, r12, r7, lsr #8     @
334                                         @
335     add         r12, r14, #0x200        @
336                                         @
337     add         r3, r3, r12             @ b = r3 + delta
338     add         r11, r11, r12, lsl #1   @ r = r11 + delta*2
339     add         r7, r7, r12, lsr #1     @ g = r7 + delta/2
340                                         @
341     orr         r12, r3, r11, asr #1    @ check if clamping is needed...
342     orr         r12, r12, r7            @ ...at all
343     movs        r12, r12, asr #15       @
344     beq         15f @ no clamp          @
345     movs        r12, r3, asr #15        @ clamp b
346     mvnne       r3, r12, lsr #15        @
347     andne       r3, r3, #0x7c00         @ mask b only if clamped
348     movs        r12, r11, asr #16       @ clamp r
349     mvnne       r11, r12, lsr #16       @
350     movs        r12, r7, asr #15        @ clamp g
351     mvnne       r7, r12, lsr #15        @
352 15: @ no clamp                          @
353                                         @
354     ldrb        r12, [r4, r2]           @ r12 = Y' = *(Y'_p + stride)
355                                         @
356     and         r11, r11, #0xf800       @ pack pixel
357     and         r7, r7, #0x7e00         @ r3 = pixel = (r & 0xf800) |
358     orr         r11, r11, r7, lsr #4    @              ((g & 0x7e00) >> 4) |
359     orr         r3, r11, r3, lsr #10    @              (b >> 10)
360                                         @
361     mov         r11, #LCD2_DATA_MASK    @ store pixel
362     orr         r7, r11, r3, lsr #8     @
363     orr         r11, r11, r3            @
364 20:                                     @
365     ldr         r3, [r0]                @
366     tst         r3, #LCD2_BUSY_MASK     @
367     bne         20b                     @
368     str         r7, [r0]                @
369     str         r11, [r0]               @
370                                         @
371     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*149
372     add         r12, r7, r7, asl #2     @
373     add         r12, r12, r12, asl #4   @
374     add         r7, r12, r7, asl #6     @
375                                         @ compute R, G, and B
376     add         r3, r8, r7              @ r3  = b' = Y + bu
377     add         r11, r9, r7, asl #1     @ r11 = r' = Y*2 + rv
378     rsb         r7, r10, r7             @ r7  = g' = Y + guv
379                                         @
380     sub         r12, r3, r3, lsr #5     @ r3  = 31/32*b' + b'/256
381     add         r3, r12, r3, lsr #8     @
382                                         @
383     sub         r12, r11, r11, lsr #5   @ r11 = 31/32*r' + r'/256
384     add         r11, r12, r11, lsr #8   @
385                                         @
386     sub         r12, r7, r7, lsr #6     @ r7  = 63/64*g' + g'/256
387     add         r7, r12, r7, lsr #8     @
388                                         @
389     @ This element is zero - use r14    @
390                                         @
391     add         r3, r3, r14             @ b = r3 + delta
392     add         r11, r11, r14, lsl #1   @ r = r11 + delta*2
393     add         r7, r7, r14, lsr #1     @ g = r7 + delta/2
394                                         @
395     orr         r12, r3, r11, asr #1    @ check if clamping is needed...
396     orr         r12, r12, r7            @ ...at all
397     movs        r12, r12, asr #15       @
398     beq         15f @ no clamp          @
399     movs        r12, r3, asr #15        @ clamp b
400     mvnne       r3, r12, lsr #15        @
401     andne       r3, r3, #0x7c00         @ mask b only if clamped
402     movs        r12, r11, asr #16       @ clamp r
403     mvnne       r11, r12, lsr #16       @
404     movs        r12, r7, asr #15        @ clamp g
405     mvnne       r7, r12, lsr #15        @
406 15: @ no clamp                          @
407                                         @
408     ldrb        r12, [r4], #1           @ r12 = Y' = *(Y'_p++)
409                                         @
410     and         r11, r11, #0xf800       @ pack pixel
411     and         r7, r7, #0x7e00         @ r3 = pixel = (r & 0xf800) |
412     orr         r11, r11, r7, lsr #4    @              ((g & 0x7e00) >> 4) |
413     orr         r3, r11, r3, lsr #10    @              (b >> 10)
414                                         @
415     mov         r11, #LCD2_DATA_MASK    @ store pixel
416     orr         r7, r11, r3, lsr #8     @
417     orr         r11, r11, r3            @
418 20:                                     @
419     ldr         r3, [r0]                @
420     tst         r3, #LCD2_BUSY_MASK     @
421     bne         20b                     @
422     str         r7, [r0]                @
423     str         r11, [r0]               @
424                                         @
425     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*149
426     add         r12, r7, r7, asl #2     @
427     add         r12, r12, r12, asl #4   @
428     add         r7, r12, r7, asl #6     @
429                                         @ compute R, G, and B
430     add         r3, r8, r7              @ r3  = b' = Y + bu
431     add         r11, r9, r7, asl #1     @ r11 = r' = Y*2 + rv
432     rsb         r7, r10, r7             @ r7  = g' = Y + guv
433                                         @
434                                         @ r8 = bu, r9 = rv, r10 = guv
435                                         @
436     sub         r12, r3, r3, lsr #5     @ r3  = 31/32*b' + b'/256
437     add         r3, r12, r3, lsr #8     @
438                                         @
439     sub         r12, r11, r11, lsr #5   @ r11 = 31/32*r' + r'/256
440     add         r11, r12, r11, lsr #8   @
441                                         @
442     sub         r12, r7, r7, lsr #6     @ r7  = 63/64*g' + g'/256
443     add         r7, r12, r7, lsr #8     @
444                                         @
445     add         r12, r14, #0x100        @
446                                         @
447     add         r3, r3, r12             @ b = r3 + delta
448     add         r11, r11, r12, lsl #1   @ r = r11 + delta*2
449     add         r7, r7, r12, lsr #1     @ g = r7 + delta/2
450                                         @
451     orr         r12, r3, r11, asr #1    @ check if clamping is needed...
452     orr         r12, r12, r7            @ ...at all
453     movs        r12, r12, asr #15       @
454     beq         15f @ no clamp          @
455     movs        r12, r3, asr #15        @ clamp b
456     mvnne       r3, r12, lsr #15        @
457     andne       r3, r3, #0x7c00         @ mask b only if clamped
458     movs        r12, r11, asr #16       @ clamp r
459     mvnne       r11, r12, lsr #16       @
460     movs        r12, r7, asr #15        @ clamp g
461     mvnne       r7, r12, lsr #15        @
462 15: @ no clamp                          @
463                                         @
464     ldrb        r12, [r4, r2]           @ r12 = Y' = *(Y'_p + stride)    
465                                         @
466     and         r11, r11, #0xf800       @ pack pixel
467     and         r7, r7, #0x7e00         @ r3 = pixel = (r & 0xf800) |
468     orr         r11, r11, r7, lsr #4    @              ((g & 0x7e00) >> 4) |
469     orr         r3, r11, r3, lsr #10    @              (b >> 10)
470                                         @
471     mov         r11, #LCD2_DATA_MASK    @ store pixel
472     orr         r7, r11, r3, lsr #8     @
473     orr         r11, r11, r3            @
474 20:                                     @
475     ldr         r3, [r0]                @
476     tst         r3, #LCD2_BUSY_MASK     @
477     bne         20b                     @
478     str         r7, [r0]                @
479     str         r11, [r0]               @
480                                         @
481     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*149
482     add         r12, r7, r7, asl #2     @
483     add         r12, r12, r12, asl #4   @
484     add         r7, r12, r7, asl #6     @
485                                         @ compute R, G, and B
486     add         r3, r8, r7              @ r3  = b' = Y + bu
487     add         r11, r9, r7, asl #1     @ r11 = r' = Y*2 + rv
488     rsb         r7, r10, r7             @ r7  = g' = Y + guv
489                                         @
490     sub         r12, r3, r3, lsr #5     @ r3 = 31/32*b + b/256
491     add         r3, r12, r3, lsr #8     @
492                                         @
493     sub         r12, r11, r11, lsr #5   @ r11 = 31/32*r + r/256
494     add         r11, r12, r11, lsr #8   @
495                                         @
496     sub         r12, r7, r7, lsr #6     @ r7 = 63/64*g + g/256
497     add         r7, r12, r7, lsr #8     @
498                                         @
499     add         r12, r14, #0x300        @
500                                         @
501     add         r3, r3, r12             @ b = r3 + delta
502     add         r11, r11, r12, lsl #1   @ r = r11 + delta*2
503     add         r7, r7, r12, lsr #1     @ g = r7 + delta/2
504                                         @
505     orr         r12, r3, r11, asr #1    @ check if clamping is needed...
506     orr         r12, r12, r7            @ ...at all
507     movs        r12, r12, asr #15       @
508     beq         15f @ no clamp          @
509     movs        r12, r3, asr #15        @ clamp b
510     mvnne       r3, r12, lsr #15        @
511     andne       r3, r3, #0x7c00         @ mask b only if clamped
512     movs        r12, r11, asr #16       @ clamp r
513     mvnne       r11, r12, lsr #16       @
514     movs        r12, r7, asr #15        @ clamp g
515     mvnne       r7, r12, lsr #15        @
516 15: @ no clamp                          @
517                                         @
518     and         r11, r11, #0xf800       @ pack pixel
519     and         r7, r7, #0x7e00         @ r3 = pixel = (r & 0xf800) |
520     orr         r11, r11, r7, lsr #4    @              ((g & 0x7e00) >> 4) |
521     orr         r3, r11, r3, lsr #10    @              (b >> 10)
522                                         @
523     mov         r11, #LCD2_DATA_MASK    @ store pixel
524     orr         r7, r11, r3, lsr #8     @
525     orr         r11, r11, r3            @
526 20:                                     @
527     ldr         r3, [r0]                @
528     tst         r3, #LCD2_BUSY_MASK     @
529     bne         20b                     @
530     str         r7, [r0]                @
531     str         r11, [r0]               @
532                                         @
533     subs        r1, r1, #2              @ subtract block from width
534     bgt         10b @ loop line         @
535                                         @
536     ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
537     .ltorg                              @ dump constant pool
538     .size   lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither