core: Fix rest of the G2_LE and G4_LE WritePixels.
[gfxprim.git] / libs / core / GP_WritePixel.c
blobbc57ea164f98e34533e59bfd3c06344adcbaf287
1 /*****************************************************************************
2 * This file is part of gfxprim library. *
3 * *
4 * Gfxprim is free software; you can redistribute it and/or *
5 * modify it under the terms of the GNU Lesser General Public *
6 * License as published by the Free Software Foundation; either *
7 * version 2.1 of the License, or (at your option) any later version. *
8 * *
9 * Gfxprim is distributed in the hope that it will be useful, *
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
12 * Lesser General Public License for more details. *
13 * *
14 * You should have received a copy of the GNU Lesser General Public *
15 * License along with gfxprim; if not, write to the Free Software *
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, *
17 * Boston, MA 02110-1301 USA *
18 * *
19 * Copyright (C) 2009-2010 Jiri "BlueBear" Dluhos *
20 * <jiri.bluebear.dluhos@gmail.com> *
21 * *
22 * Copyright (C) 2009-2012 Cyril Hrubis <metan@ucw.cz> *
23 * *
24 *****************************************************************************/
26 #include <string.h>
28 #include "GP_Core.h"
29 #include "GP_WritePixel.h"
31 static const uint8_t bytes_1BPP[] = {0x00, 0xff};
33 void GP_WritePixels_1BPP_LE(uint8_t *start, uint8_t off,
34 size_t cnt, uint8_t val)
36 int len = cnt;
38 /* Write start of the line */
39 switch (off) {
40 case 0:
41 break;
42 case 1:
43 GP_SET_BITS1_ALIGNED(1, 1, start, val);
45 if (--len == 0)
46 return;
47 case 2:
48 GP_SET_BITS1_ALIGNED(2, 1, start, val);
50 if (--len == 0)
51 return;
52 case 3:
53 GP_SET_BITS1_ALIGNED(3, 1, start, val);
55 if (--len == 0)
56 return;
57 case 4:
58 GP_SET_BITS1_ALIGNED(4, 1, start, val);
60 if (--len == 0)
61 return;
62 case 5:
63 GP_SET_BITS1_ALIGNED(5, 1, start, val);
65 if (--len == 0)
66 return;
67 case 6:
68 GP_SET_BITS1_ALIGNED(6, 1, start, val);
70 if (--len == 0)
71 return;
72 case 7:
73 GP_SET_BITS1_ALIGNED(7, 1, start, val);
75 if (--len == 0)
76 return;
78 start++;
79 break;
82 /* Write as many bytes as possible */
83 memset(start, bytes_1BPP[val & 0x01], len/8);
85 start+=len/8;
87 /* And the rest */
88 switch (len%8) {
89 case 7:
90 GP_SET_BITS1_ALIGNED(6, 1, start, val);
91 case 6:
92 GP_SET_BITS1_ALIGNED(5, 1, start, val);
93 case 5:
94 GP_SET_BITS1_ALIGNED(4, 1, start, val);
95 case 4:
96 GP_SET_BITS1_ALIGNED(3, 1, start, val);
97 case 3:
98 GP_SET_BITS1_ALIGNED(2, 1, start, val);
99 case 2:
100 GP_SET_BITS1_ALIGNED(1, 1, start, val);
101 case 1:
102 GP_SET_BITS1_ALIGNED(0, 1, start, val);
103 break;
107 static const uint8_t bytes_2BPP[] = {0x00, 0x55, 0xaa, 0xff};
109 void GP_WritePixels_2BPP_LE(uint8_t *start, uint8_t off,
110 size_t cnt, uint8_t val)
112 int len = cnt;
114 /* Write start of the line */
115 switch (off) {
116 case 0:
117 break;
118 case 1:
119 GP_SET_BITS1_ALIGNED(2, 2, start, val);
121 if (--len == 0)
122 return;
123 case 2:
124 GP_SET_BITS1_ALIGNED(4, 2, start, val);
126 if (--len == 0)
127 return;
128 case 3:
129 GP_SET_BITS1_ALIGNED(6, 2, start, val);
131 if (--len == 0)
132 return;
134 start++;
135 break;
138 /* Write as many bytes as possible */
139 memset(start, bytes_2BPP[val & 0x03], len/4);
141 start+=len/4;
143 /* And the rest */
144 switch (len%4) {
145 case 3:
146 GP_SET_BITS1_ALIGNED(4, 2, start, val);
147 case 2:
148 GP_SET_BITS1_ALIGNED(2, 2, start, val);
149 case 1:
150 GP_SET_BITS1_ALIGNED(0, 2, start, val);
151 break;
155 static const uint8_t bytes_4BPP[] = {
156 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
157 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff
160 void GP_WritePixels_4BPP_LE(uint8_t *start, uint8_t off,
161 size_t cnt, uint8_t val)
163 int len = cnt;
165 /* Write start of the line */
166 switch (off) {
167 case 0:
168 break;
169 case 1:
170 GP_SET_BITS1_ALIGNED(4, 4, start, val);
172 if (--len == 0)
173 return;
175 start++;
176 break;
179 /* Write as many bytes as possible */
180 memset(start, bytes_4BPP[val & 0x0f], len/2);
182 start+=len/2;
184 /* And the rest */
185 switch (len%2) {
186 case 1:
187 GP_SET_BITS1_ALIGNED(0, 4, start, val);
188 break;
192 static const uint8_t chunks_1bpp[8] = {
193 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe,
196 void GP_WritePixels1bpp(uint8_t *start, uint8_t off, size_t cnt, uint8_t val)
198 uint8_t s_off = off % 8;
199 uint8_t e_off = (cnt + s_off) % 8;
200 uint32_t len = (cnt + s_off) / 8;
201 uint8_t col = val ? 0xff : 0x00;
203 /* handle special cases */
204 if (cnt < 8) {
205 uint8_t u_chunk = chunks_1bpp[cnt] >> off;
206 uint8_t l_chunk = chunks_1bpp[cnt] << (8 - off);
208 if (val) {
209 start[0] |= u_chunk;
210 start[1] |= l_chunk;
211 } else {
212 start[0] &= ~u_chunk;
213 start[1] &= ~l_chunk;
216 return;
219 /* write len - 2 bytes */
220 if (len > 1)
221 GP_WritePixels8bpp(start + 1, len - 2, col);
223 /* deal with the start and end */
224 if (val) {
225 start[0] |= ~chunks_1bpp[s_off];
226 start[len] |= chunks_1bpp[e_off];
227 } else {
228 start[0] &= chunks_1bpp[s_off];
229 start[len] &= ~chunks_1bpp[e_off];
233 static const uint8_t colors_2bpp[4] = {
234 0x00, 0x55, 0xaa, 0xff
237 #define PUT_PIXEL_2BPP(p, off, pix) *(p) = (*(p) & ~(0xc0>>(off))) | (pix<<(6 - off))
239 void GP_WritePixels2bpp(uint8_t *start, uint8_t off, size_t cnt, uint8_t val)
241 uint8_t s_off = off % 4;
242 uint8_t e_off = 2 * ((cnt + s_off) % 4);
243 uint8_t len = (cnt + s_off) / 4;
245 s_off *= 2;
246 val %= 4;
248 /* handle special cases */
249 if (cnt < 4) {
250 uint8_t len = s_off + 2*cnt;
251 uint8_t max = GP_MIN(s_off + 2*cnt, 8u);
253 for (off = s_off; off < max; off+=2)
254 PUT_PIXEL_2BPP(start, off, val);
256 if (len <= 8)
257 return;
259 for (off = 0; off < len%8; off+=2)
260 PUT_PIXEL_2BPP(start+1, off, val);
262 return;
265 /* write len - 2 bytes */
266 if (len > 1)
267 GP_WritePixels8bpp(start + 1, len - 2, colors_2bpp[val]);
269 /* handle start and end */
270 start[0] = (start[0] & (0xff << (8 - s_off))) |
271 (colors_2bpp[val] >> s_off);
273 start[len] = (start[len] & (0xff >> e_off)) |
274 (colors_2bpp[val] << (8 - e_off));
278 void GP_WritePixels4bpp(uint8_t *start, uint8_t off, size_t cnt, uint8_t val)
280 uint8_t s_off = off % 2;
281 uint8_t e_off = (cnt + s_off) % 2;
282 uint32_t len = (cnt - s_off - e_off) / 2;
284 val %= 16;
285 uint8_t col = (val << 4)& val;
287 if (len > 0)
288 GP_WritePixels8bpp(start + s_off, len, val);
290 /* handle start and end */
291 if (s_off) GP_SET_BITS(4, 4, start[0], col);
292 if (e_off) GP_SET_BITS(0, 4, start[len+s_off], val);
295 void GP_WritePixels8bpp(void *start, size_t count, uint8_t value)
298 memset(start, value, count);
301 void GP_WritePixels16bpp(void *start, size_t count, uint16_t value)
303 uint16_t *p = (uint16_t *) start;
304 size_t i;
306 /* Write as much pixels as possible in 4-pixel blocks. */
307 for (i = count; i >= 4; p += 4, i -= 4) {
308 p[0] = value;
309 p[1] = value;
310 p[2] = value;
311 p[3] = value;
314 /* Write the rest. */
315 if (i > 0) {
316 p[0] = value;
317 if (i > 1) {
318 p[1] = value;
319 if (i > 2) {
320 p[2] = value;
326 void GP_WritePixels18bpp(void *start, uint8_t off, size_t count, uint32_t value)
328 #warning TODO
331 void GP_WritePixels24bpp(void *start, size_t count, uint32_t value)
333 uint8_t *bytep = (uint8_t *) start;
335 /* How much bytes we are offset against the 32-bit boundary. */
336 int shift = ((intptr_t) bytep) % 4;
339 * Pixels remaining to draw (one less than pixelcount because
340 * one incomplete pixel is drawn during the preparation phase.)
342 int i = count - 1;
345 * Handle each color component separately.
346 * (Probably they are R, G, B but who knows.)
348 uint8_t a = value & 0xff;
349 uint8_t b = (value >> 8) & 0xff;
350 uint8_t c = (value >> 16) & 0xff;
352 uint32_t *p;
353 uint32_t block[3];
356 * The line consists of three repeating 32-bit segments
357 * (except for the starting and ending segment:
358 * ABCA, BCAB, CABC.
360 #if __BIG_ENDIAN__
361 uint32_t abca = a << 24 | b << 16 | c << 8 | a;
362 uint32_t bcab = b << 24 | c << 16 | a << 8 | b;
363 uint32_t cabc = c << 24 | a << 16 | b << 8 | c;
364 #else
365 uint32_t abca = a << 24 | c << 16 | b << 8 | a;
366 uint32_t bcab = b << 24 | a << 16 | c << 8 | b;
367 uint32_t cabc = c << 24 | b << 16 | a << 8 | c;
368 #endif
371 * Handle the first few bytes (1 pixel or less) and prepare
372 * the repeating sequence.
374 switch (shift) {
375 default: /* shut up gcc */
376 case 0:
377 block[0] = abca;
378 block[1] = bcab;
379 block[2] = cabc;
380 p = (uint32_t *) bytep;
381 break;
382 case 3:
383 bytep[0] = a;
384 block[0] = bcab;
385 block[1] = cabc;
386 block[2] = abca;
387 p = (uint32_t *)(bytep + 1);
388 break;
389 case 2:
390 bytep[0] = a;
391 bytep[1] = b;
392 block[0] = cabc;
393 block[1] = abca;
394 block[2] = bcab;
395 p = (uint32_t *)(bytep + 2);
396 break;
397 case 1:
398 bytep[0] = a;
399 bytep[1] = b;
400 bytep[2] = c;
401 block[0] = abca;
402 block[1] = bcab;
403 block[2] = cabc;
404 p = (uint32_t *)(bytep + 3);
405 i--;
406 break;
410 * Write as much of the line as possible as
411 * triplets of 32-bit numbers; hopefully the compiler can
412 * put some wide write instructions in.
414 while (i >= 4) {
415 p[0] = block[0];
416 p[1] = block[1];
417 p[2] = block[2];
418 p += 3;
419 i -= 4;
422 /* Write the rest of the last pixel of the main part */
423 bytep = (uint8_t *) p;
424 switch (shift) {
425 case 0:
426 break;
427 case 1:
428 break;
429 case 2:
430 bytep[0] = c;
431 bytep++;
432 i--;
433 break;
434 case 3:
435 bytep[0] = b;
436 bytep[1] = c;
437 bytep += 2;
438 i--;
439 break;
442 while (i >= 0) {
443 bytep[0] = a;
444 bytep[1] = b;
445 bytep[2] = c;
446 bytep += 3;
447 i--;
451 void GP_WritePixels32bpp(void *start, size_t count, uint32_t value)
454 * Inspired by GNU libc's wmemset() (by Ulrich Drepper, licensed under LGPL).
456 * Write the pixels in groups of four, allowing the compiler to use
457 * MMX/SSE/similar instructions if available. The last few pixels are
458 * copied normally one-by-one. (Speed gain is about 15% over a naive loop
459 * on AMD Phenom CPU.)
462 uint32_t *p = (uint32_t *) start;
463 size_t i = count;
464 while (i >= 4) {
465 p[0] = value;
466 p[1] = value;
467 p[2] = value;
468 p[3] = value;
469 p += 4;
470 i -= 4;
472 if (i > 0) {
473 p[0] = value;
474 if (i > 1) {
475 p[1] = value;
476 if (i > 2) {
477 p[2] = value;