Sansa e200v2/Fuze: Optimize YUV blitting by writing 2 pixel at once to the DBOP and...
[kugel-rb.git] / firmware / target / arm / as3525 / lcd-as-e200v2-fuze.S
blob2725c926a8cfb42647eebd200fdc504865ef16de
1 /***************************************************************************
2  *             __________               __   ___.
3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
7  *                     \/            \/     \/    \/            \/
8  * $Id$
9  *
10  * Copyright (C) 2007 by Jens Arnold
11  * Heavily based on lcd-as-memframe.c by Michael Sevakis
12  * Adapted for Sansa Fuze/e200v2 by Rafaël Carré
13  *
14  * This program is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU General Public License
16  * as published by the Free Software Foundation; either version 2
17  * of the License, or (at your option) any later version.
18  *
19  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20  * KIND, either express or implied.
21  *
22  ****************************************************************************/
24 #include "config.h"
25 #include "cpu.h"
27 #define DBOP_BUSY (1<<10)
29 /****************************************************************************
30  * void lcd_write_yuv_420_lines(unsigned char const * const src[3],
31  *                              int width,
32  *                              int stride);
33  *
34  *   |R|   |1.000000 -0.000001  1.402000| |Y'|
35  *   |G| = |1.000000 -0.334136 -0.714136| |Pb|
36  *   |B|   |1.000000  1.772000  0.000000| |Pr|
37  *   Scaled, normalized, rounded and tweaked to yield RGB 565:
38  *   |R|   |74   0 101| |Y' -  16| >> 9
39  *   |G| = |74 -24 -51| |Cb - 128| >> 8
40  *   |B|   |74 128   0| |Cr - 128| >> 9
41  *
42  * Write four RGB565 pixels in the following order on each loop:
43  * 1 3 + > down
44  * 2 4 \/ left
45  */
46     .section    .icode, "ax", %progbits
47     .align      2
48     .global     lcd_write_yuv420_lines
49     .type       lcd_write_yuv420_lines, %function
50 lcd_write_yuv420_lines:
51                                         @ r0 = yuv_src
52                                         @ r1 = width
53                                         @ r2 = stride
54     stmfd       sp!, { r4-r11, lr }     @ save non-scratch
56     mov         r3, #0xC8000000         @
57     orr         r3, r3, #0x120000       @ r3 = DBOP_BASE
59     ldmia       r0, { r4, r5, r6 }      @ r4 = yuv_src[0] = Y'_p
60                                         @ r5 = yuv_src[1] = Cb_p
61                                         @ r6 = yuv_src[2] = Cr_p
62                                         @ r0 = scratch
63     ldr         r12, [r3, #8]           @
64     sub         r2, r2, #1              @ stride -= 1
65     orr         r12, r12, #3<<13        @
66     str         r12, [r3, #8]           @ DBOP_CTRL |= (1<<13|1<<14) (32bit mode)
67 10: @ loop line                         @
68     ldrb        r7, [r4], #1            @ r7 = *Y'_p++;
69     ldrb        r8, [r5], #1            @ r8 = *Cb_p++;
70     ldrb        r9, [r6], #1            @ r9 = *Cr_p++;
71                                         @
72     sub         r7, r7, #16             @ r7 = Y = (Y' - 16)*74
73     add         r12, r7, r7, asl #2     @ actually (Y' - 16)*37 and shift right
74     add         r7, r12, r7, asl #5     @ by one less when adding - same for all
75                                         @
76     sub         r8, r8, #128            @ Cb -= 128
77     sub         r9, r9, #128            @ Cr -= 128
78                                         @
79     add         r10, r9, r9, asl #1     @ r10 = Cr*51 + Cb*24
80     add         r10, r10, r10, asl #4   @
81     add         r10, r10, r8, asl #3    @
82     add         r10, r10, r8, asl #4    @
83                                         @
84     add         lr, r9, r9, asl #2      @ r9 = Cr*101
85     add         lr, lr, r9, asl #5      @
86     add         r9, lr, r9, asl #6      @
87                                         @
88     add         r8, r8, #2              @ r8 = bu = (Cb*128 + 128) >> 8
89     mov         r8, r8, asr #2          @
90     add         r9, r9, #256            @ r9 = rv = (r9 + 256) >> 9
91     mov         r9, r9, asr #9          @
92     rsb         r10, r10, #128          @ r10 = guv = (-r10 + 128) >> 8
93     mov         r10, r10, asr #8        @
94                                         @ compute R, G, and B
95     add         r0, r8, r7, asr #8      @ r0  = b = (Y >> 9) + bu
96     add         lr, r9, r7, asr #8      @ lr = r = (Y >> 9) + rv
97     add         r7, r10, r7, asr #7     @ r7  = g = (Y >> 8) + guv
98                                         @
99     orr         r12, r0, lr             @ check if clamping is needed...
100     orr         r12, r12, r7, asr #1    @ ...at all
101     cmp         r12, #31                @
102     bls         15f @ no clamp          @
103     cmp         r0, #31                 @ clamp b
104     mvnhi       r0, r0, asr #31         @
105     andhi       r0, r0, #31             @
106     cmp         lr, #31                 @ clamp r
107     mvnhi       lr, lr, asr #31         @
108     andhi       lr, lr, #31             @
109     cmp         r7, #63                 @ clamp g
110     mvnhi       r7, r7, asr #31         @
111     andhi       r7, r7, #63             @
112 15: @ no clamp                          @
113                                         @
114     ldrb        r12, [r4, r2]           @ r12 = Y' = *(Y'_p + stride)
115                                         @
116     orr         r0, r0, lr, lsl #11     @ r0 = (r << 11) | b
117     orr         r11, r0, r7, lsl #5      @ r0 = (r << 11) | (g << 5) | b
118                                         @
119     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*74
120     add         r12, r7, r7, asl #2     @
121     add         r7, r12, r7, asl #5     @
122                                         @ compute R, G, and B
123     add         r0, r8, r7, asr #8      @ r0  = b = (Y >> 9) + bu
124     add         lr, r9, r7, asr #8      @ lr = r = (Y >> 9) + rv
125     add         r7, r10, r7, asr #7     @ r7  = g = (Y >> 8) + guv
126                                         @
127     orr         r12, r0, lr             @ check if clamping is needed...
128     orr         r12, r12, r7, asr #1    @ ...at all
129     cmp         r12, #31                @
130     bls         15f @ no clamp          @
131     cmp         r0, #31                 @ clamp b
132     mvnhi       r0, r0, asr #31         @
133     andhi       r0, r0, #31             @
134     cmp         lr, #31                 @ clamp r
135     mvnhi       lr, lr, asr #31         @
136     andhi       lr, lr, #31             @
137     cmp         r7, #63                 @ clamp g
138     mvnhi       r7, r7, asr #31         @
139     andhi       r7, r7, #63             @
140 15: @ no clamp                          @
141                                         @
142     ldrb        r12, [r4], #1           @ r12 = Y' = *(Y'_p++)
143                                         @
144     orr         r0, r0, lr, lsl #11     @ r0 = (r << 11) | b
145     orr         r0, r0, r7, lsl #5      @ r0 = (r << 11) | (g << 5) | b
146     orr         r0, r11, r0, lsl#16     @ pack with 2nd pixel
147     str         r0, [r3, #0x10]         @ write pixel
148                                         @
149     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*74
150     add         r12, r7, r7, asl #2     @
151     add         r7, r12, r7, asl #5     @
152                                         @ compute R, G, and B
153     add         r0, r8, r7, asr #8      @ r0  = b = (Y >> 9) + bu
154     add         lr, r9, r7, asr #8      @ lr = r = (Y >> 9) + rv
155     add         r7, r10, r7, asr #7     @ r7  = g = (Y >> 8) + guv
156                                         @
157     orr         r12, r0, lr             @ check if clamping is needed...
158     orr         r12, r12, r7, asr #1    @ ...at all
159     cmp         r12, #31                @
160     bls         15f @ no clamp          @
161     cmp         r0, #31                 @ clamp b
162     mvnhi       r0, r0, asr #31         @
163     andhi       r0, r0, #31             @
164     cmp         lr, #31                 @ clamp r
165     mvnhi       lr, lr, asr #31         @
166     andhi       lr, lr, #31             @
167     cmp         r7, #63                 @ clamp g
168     mvnhi       r7, r7, asr #31         @
169     andhi       r7, r7, #63             @
170 15: @ no clamp                          @
171                                         @
172     ldrb        r12, [r4, r2]           @ r12 = Y' = *(Y'_p + stride)
173                                         @
174                                         @
175     orr         r0, r0, lr, lsl #11     @ r0 = (r << 11) | b
176     orr         r11, r0, r7, lsl #5      @ r0 = (r << 11) | (g << 5) | b
177                                         @
178     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*74
179     add         r12, r7, r7, asl #2     @
180     add         r7, r12, r7, asl #5     @
181                                         @ compute R, G, and B
182     add         r0, r8, r7, asr #8      @ r0  = b = (Y >> 9) + bu
183     add         lr, r9, r7, asr #8      @ lr = r = (Y >> 9) + rv
184     add         r7, r10, r7, asr #7     @ r7  = g = (Y >> 8) + guv
185                                         @
186     orr         r12, r0, lr             @ check if clamping is needed...
187     orr         r12, r12, r7, asr #1    @ ...at all
188     cmp         r12, #31                @
189     bls         15f @ no clamp          @
190     cmp         r0, #31                 @ clamp b
191     mvnhi       r0, r0, asr #31         @
192     andhi       r0, r0, #31             @
193     cmp         lr, #31                 @ clamp r
194     mvnhi       lr, lr, asr #31         @
195     andhi       lr, lr, #31             @
196     cmp         r7, #63                 @ clamp g
197     mvnhi       r7, r7, asr #31         @
198     andhi       r7, r7, #63             @
199 15: @ no clamp                          @
200                                         @
201     orr         r0, r0, lr, lsl #11     @ r0 = (r << 11) | b
202     orr         r0, r0, r7, lsl #5      @ r0 = (r << 11) | (g << 5) | b
203     orr         r0, r11, r0, lsl#16     @ pack with 2nd pixel
204     str         r0, [r3, #0x10]         @ write pixel
205                                         @
206     subs        r1, r1, #2              @ subtract block from width
207     bgt         10b @ loop line         @
208                                         @
209 1: @ busy
210     @ writing at max 110*32 its (LCD_WIDTH/2), the fifo is bigger
211     @ so polling fifo empty after the loops is save
212     ldr         r7, [r3,#0xc]           @ r7 = DBOP_STATUS
213     tst         r7, #DBOP_BUSY          @ fifo not empty?
214     beq         1b                      @
216     ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
217     bx          lr                      @
218     .ltorg                              @ dump constant pool
219     .size   lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
221 /****************************************************************************
222  * void lcd_write_yuv_420_lines_odither(unsigned char const * const src[3],
223  *                                      int width,
224  *                                      int stride,
225  *                                      int x_screen,
226  *                                      int y_screen);
228  *   |R|   |1.000000 -0.000001  1.402000| |Y'|
229  *   |G| = |1.000000 -0.334136 -0.714136| |Pb|
230  *   |B|   |1.000000  1.772000  0.000000| |Pr|
231  *   Red scaled at twice g & b but at same precision to place it in correct
232  *   bit position after multiply and leave instruction count lower.
233  *   |R|   |258   0  408| |Y' -  16|
234  *   |G| = |149 -49 -104| |Cb - 128|
235  *   |B|   |149 258    0| |Cr - 128|
237  * Write four RGB565 pixels in the following order on each loop:
238  * 1 3 + > down
239  * 2 4 \/ left
241  * Kernel pattern (raw|rotated|use order):
242  * 5 3 4 2   2 6 3 7     row0   row2          > down
243  * 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0     left
244  * 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2  \/
245  * 0 6 1 7   5 1 4 0
246  */
247     .section    .icode, "ax", %progbits
248     .align      2
249     .global     lcd_write_yuv420_lines_odither
250     .type       lcd_write_yuv420_lines_odither, %function
251 lcd_write_yuv420_lines_odither:
252                                         @ r0 = yuv_src
253                                         @ r1 = width
254                                         @ r2 = stride
255                                         @ r3 = x_screen
256                                         @ [sp] = y_screen
257     stmfd       sp!, { r4-r11, lr }     @ save non-scratch
258     ldmia       r0, { r4, r5, r6 }      @ r4 = yuv_src[0] = Y'_p
259                                         @ r5 = yuv_src[1] = Cb_p
260                                         @ r6 = yuv_src[2] = Cr_p
261                                         @
262     ldr         r14, [sp, #40]          @ Line up pattern and kernel quadrant
263     sub         r2, r2, #1              @ stride =- 1
264     eor         r14, r14, r3            @
265     and         r14, r14, #0x2          @
266     mov         r14, r14, lsl #6        @ 0x00 or 0x80
268     mov         r3, #0xC8000000         @
269     orr         r3, r3, #0x120000       @ r3 = DBOP_BASE, need to be redone
270                                         @ due to lack of registers
271     ldr         r12, [r3, #8]           @
272     orr         r12, r12, #3<<13        @ DBOP_CTRL |= (1<<13|1<<14) 
273     str         r12, [r3, #8]           @ (32bit mode)
274 10: @ loop line                         @
275                                         @
276     ldrb        r7, [r4], #1            @ r7 = *Y'_p++;
277     ldrb        r8, [r5], #1            @ r8 = *Cb_p++;
278     ldrb        r9, [r6], #1            @ r9 = *Cr_p++;
279                                         @
280     eor         r14, r14, #0x80         @ flip pattern quadrant
281                                         @
282     sub         r7, r7, #16             @ r7 = Y = (Y' - 16)*149
283     add         r12, r7, r7, asl #2     @
284     add         r12, r12, r12, asl #4   @
285     add         r7, r12, r7, asl #6     @
286                                         @
287     sub         r8, r8, #128            @ Cb -= 128
288     sub         r9, r9, #128            @ Cr -= 128
289                                         @
290     add         r10, r8, r8, asl #4     @ r10 = guv = Cr*104 + Cb*49
291     add         r10, r10, r8, asl #5    @
292     add         r10, r10, r9, asl #3    @
293     add         r10, r10, r9, asl #5    @
294     add         r10, r10, r9, asl #6    @
295                                         @
296     mov         r8, r8, asl #1          @ r8 = bu = Cb*258
297     add         r8, r8, r8, asl #7      @
298                                         @
299     add         r9, r9, r9, asl #1      @ r9 = rv = Cr*408
300     add         r9, r9, r9, asl #4      @
301     mov         r9, r9, asl #3          @
302                                         @
303                                         @ compute R, G, and B
304     add         r0, r8, r7              @ r0  = b' = Y + bu
305     add         r11, r9, r7, asl #1     @ r11 = r' = Y*2 + rv
306     rsb         r7, r10, r7             @ r7  = g' = Y + guv
307                                         @
308                                         @ r8 = bu, r9 = rv, r10 = guv
309                                         @
310     sub         r12, r0, r0, lsr #5     @ r0 = 31/32*b + b/256
311     add         r0, r12, r0, lsr #8     @
312                                         @
313     sub         r12, r11, r11, lsr #5   @ r11 = 31/32*r + r/256
314     add         r11, r12, r11, lsr #8   @
315                                         @
316     sub         r12, r7, r7, lsr #6     @ r7 = 63/64*g + g/256
317     add         r7, r12, r7, lsr #8     @
318                                         @
319     add         r12, r14, #0x100        @
320                                         @
321     add         r0, r0, r12             @ b = r0 + delta
322     add         r11, r11, r12, lsl #1   @ r = r11 + delta*2
323     add         r7, r7, r12, lsr #1     @ g = r7 + delta/2
324                                         @
325     orr         r12, r0, r11, asr #1    @ check if clamping is needed...
326     orr         r12, r12, r7            @ ...at all
327     movs        r12, r12, asr #15       @
328     beq         15f @ no clamp          @
329     movs        r12, r0, asr #15        @ clamp b
330     mvnne       r0, r12, lsr #15        @
331     andne       r0, r0, #0x7c00         @ mask b only if clamped
332     movs        r12, r11, asr #16       @ clamp r
333     mvnne       r11, r12, lsr #16       @
334     movs        r12, r7, asr #15        @ clamp g
335     mvnne       r7, r12, lsr #15        @
336 15: @ no clamp                          @
337                                         @
338     ldrb        r12, [r4, r2]           @ r12 = Y' = *(Y'_p + stride)
339                                         @
340     and         r11, r11, #0xf800       @ pack pixel
341     and         r7, r7, #0x7e00         @ r0 = pixel = (r & 0xf800) |
342     orr         r11, r11, r7, lsr #4    @              ((g & 0x7e00) >> 4) |
343     orr         r3, r11, r0, lsr #10    @              (b >> 10)
344                                         @ save pixel
345     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*149
346     add         r12, r7, r7, asl #2     @
347     add         r12, r12, r12, asl #4   @
348     add         r7, r12, r7, asl #6     @
349                                         @ compute R, G, and B
350     add         r0, r8, r7              @ r0  = b' = Y + bu
351     add         r11, r9, r7, asl #1     @ r11 = r' = Y*2 + rv
352     rsb         r7, r10, r7             @ r7  = g' = Y + guv
353                                         @
354     sub         r12, r0, r0, lsr #5     @ r0  = 31/32*b' + b'/256
355     add         r0, r12, r0, lsr #8     @
356                                         @
357     sub         r12, r11, r11, lsr #5   @ r11 = 31/32*r' + r'/256
358     add         r11, r12, r11, lsr #8   @
359                                         @
360     sub         r12, r7, r7, lsr #6     @ r7  = 63/64*g' + g'/256
361     add         r7, r12, r7, lsr #8     @
362                                         @
363     add         r12, r14, #0x200        @
364                                         @
365     add         r0, r0, r12             @ b = r0 + delta
366     add         r11, r11, r12, lsl #1   @ r = r11 + delta*2
367     add         r7, r7, r12, lsr #1     @ g = r7 + delta/2
368                                         @
369     orr         r12, r0, r11, asr #1    @ check if clamping is needed...
370     orr         r12, r12, r7            @ ...at all
371     movs        r12, r12, asr #15       @
372     beq         15f @ no clamp          @
373     movs        r12, r0, asr #15        @ clamp b
374     mvnne       r0, r12, lsr #15        @
375     andne       r0, r0, #0x7c00         @ mask b only if clamped
376     movs        r12, r11, asr #16       @ clamp r
377     mvnne       r11, r12, lsr #16       @
378     movs        r12, r7, asr #15        @ clamp g
379     mvnne       r7, r12, lsr #15        @
380 15: @ no clamp                          @
381                                         @
382     ldrb        r12, [r4], #1           @ r12 = Y' = *(Y'_p++)
384     and         r11, r11, #0xf800       @ pack pixel
385     and         r7, r7, #0x7e00         @ r0 = pixel = (r & 0xf800) |
386     orr         r11, r11, r7, lsr #4    @              ((g & 0x7e00) >> 4) |
387     orr         r0, r11, r0, lsr #10    @              (b >> 10)
388     orr         r3, r3, r0, lsl#16      @ pack with 2nd pixel
389     mov         r0, #0xC8000000         @
390     orr         r0, r0, #0x120000       @ r3 = DBOP_BASE
392     str         r3, [r0, #0x10]         @ write pixel
393                                         @
394     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*149
395     add         r12, r7, r7, asl #2     @
396     add         r12, r12, r12, asl #4   @
397     add         r7, r12, r7, asl #6     @
398                                         @ compute R, G, and B
399     add         r0, r8, r7              @ r0  = b' = Y + bu
400     add         r11, r9, r7, asl #1     @ r11 = r' = Y*2 + rv
401     rsb         r7, r10, r7             @ r7  = g' = Y + guv
402                                         @
403                                         @ r8 = bu, r9 = rv, r10 = guv
404                                         @
405     sub         r12, r0, r0, lsr #5     @ r0  = 31/32*b' + b'/256
406     add         r0, r12, r0, lsr #8     @
407                                         @
408     sub         r12, r11, r11, lsr #5   @ r11 = 31/32*r' + r'/256
409     add         r11, r12, r11, lsr #8   @
410                                         @
411     sub         r12, r7, r7, lsr #6     @ r7  = 63/64*g' + g'/256
412     add         r7, r12, r7, lsr #8     @
413                                         @
414     add         r12, r14, #0x300        @
415                                         @
416     add         r0, r0, r12             @ b = r0 + delta
417     add         r11, r11, r12, lsl #1   @ r = r11 + delta*2
418     add         r7, r7, r12, lsr #1     @ g = r7 + delta/2
419                                         @
420     orr         r12, r0, r11, asr #1    @ check if clamping is needed...
421     orr         r12, r12, r7            @ ...at all
422     movs        r12, r12, asr #15       @
423     beq         15f @ no clamp          @
424     movs        r12, r0, asr #15        @ clamp b
425     mvnne       r0, r12, lsr #15        @
426     andne       r0, r0, #0x7c00         @ mask b only if clamped
427     movs        r12, r11, asr #16       @ clamp r
428     mvnne       r11, r12, lsr #16       @
429     movs        r12, r7, asr #15        @ clamp g
430     mvnne       r7, r12, lsr #15        @
431 15: @ no clamp                          @
432                                         @
433     ldrb        r12, [r4, r2]           @ r12 = Y' = *(Y'_p + stride)
434                                         @
435     and         r11, r11, #0xf800       @ pack pixel
436     and         r7, r7, #0x7e00         @ r0 = pixel = (r & 0xf800) |
437     orr         r11, r11, r7, lsr #4    @              ((g & 0x7e00) >> 4) |
438     orr         r3, r11, r0, lsr #10    @              (b >> 10)
439                                         @ save pixel
440                                         @
441     sub         r7, r12, #16            @ r7 = Y = (Y' - 16)*149
442     add         r12, r7, r7, asl #2     @
443     add         r12, r12, r12, asl #4   @
444     add         r7, r12, r7, asl #6     @
445                                         @ compute R, G, and B
446     add         r0, r8, r7              @ r0  = b' = Y + bu
447     add         r11, r9, r7, asl #1     @ r11 = r' = Y*2 + rv
448     rsb         r7, r10, r7             @ r7  = g' = Y + guv
449                                         @
450     sub         r12, r0, r0, lsr #5     @ r0 = 31/32*b + b/256
451     add         r0, r12, r0, lsr #8     @
452                                         @
453     sub         r12, r11, r11, lsr #5   @ r11 = 31/32*r + r/256
454     add         r11, r12, r11, lsr #8   @
455                                         @
456     sub         r12, r7, r7, lsr #6     @ r7 = 63/64*g + g/256
457     add         r7, r12, r7, lsr #8     @
458                                         @
459     @ This element is zero - use r14    @
460                                         @
461     add         r0, r0, r14             @ b = r0 + delta
462     add         r11, r11, r14, lsl #1   @ r = r11 + delta*2
463     add         r7, r7, r14, lsr #1     @ g = r7 + delta/2
464                                         @
465     orr         r12, r0, r11, asr #1    @ check if clamping is needed...
466     orr         r12, r12, r7            @ ...at all
467     movs        r12, r12, asr #15       @
468     beq         15f @ no clamp          @
469     movs        r12, r0, asr #15        @ clamp b
470     mvnne       r0, r12, lsr #15        @
471     andne       r0, r0, #0x7c00         @ mask b only if clamped
472     movs        r12, r11, asr #16       @ clamp r
473     mvnne       r11, r12, lsr #16       @
474     movs        r12, r7, asr #15        @ clamp g
475     mvnne       r7, r12, lsr #15        @
476 15: @ no clamp                          @
477                                         @
478     and         r11, r11, #0xf800       @ pack pixel
479     and         r7, r7, #0x7e00         @ r0 = pixel = (r & 0xf800) |
480     orr         r11, r11, r7, lsr #4    @              ((g & 0x7e00) >> 4) |
481     orr         r0, r11, r0, lsr #10    @              (b >> 10)
482     orr         r3, r3, r0, lsl#16      @ pack with 2nd pixel
483     mov         r0, #0xC8000000         @
484     orr         r0, r0, #0x120000       @ r3 = DBOP_BASE
486     str         r3, [r0, #0x10]         @ write pixel
487                                         @
488     subs        r1, r1, #2              @ subtract block from width
489     bgt         10b @ loop line         @
490                                         @
491 1: @ busy                               @
492     @ writing at max 110*32 its (LCD_WIDTH/2), the fifo is bigger
493     @ so polling fifo empty after the loops is save
494     ldr         r7, [r0,#0xc]           @ r7 = DBOP_STATUS
495     tst         r7, #DBOP_BUSY          @ fifo not empty?
496     beq         1b                      @
498     ldmfd       sp!, { r4-r11, pc }     @ restore registers and return
499     .ltorg                              @ dump constant pool
500     .size   lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither