core/GP_WritePixel.c

   1 /*****************************************************************************
   2  * This file is part of gfxprim library.                                     *
   3  *                                                                           *
   4  * Gfxprim is free software; you can redistribute it and/or                  *
   5  * modify it under the terms of the GNU Lesser General Public                *
   6  * License as published by the Free Software Foundation; either              *
   7  * version 2.1 of the License, or (at your option) any later version.        *
   8  *                                                                           *
   9  * Gfxprim is distributed in the hope that it will be useful,                *
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of            *
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU         *
  12  * Lesser General Public License for more details.                           *
  13  *                                                                           *
  14  * You should have received a copy of the GNU Lesser General Public          *
  15  * License along with gfxprim; if not, write to the Free Software            *
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor,                        *
  17  * Boston, MA  02110-1301  USA                                               *
  18  *                                                                           *
  19  * Copyright (C) 2009-2010 Jiri "BlueBear" Dluhos                            *
  20  *                         <jiri.bluebear.dluhos@gmail.com>                  *
  21  *                                                                           *
  22  * Copyright (C) 2009-2010 Cyril Hrubis <metan@ucw.cz>                       *
  23  *                                                                           *
  24  *****************************************************************************/
  25
  26 #include "GP.h"
  27
  28 void GP_WritePixels8bpp(void *start, size_t count, uint8_t value)
  29 {
  30         uint8_t *p = (uint8_t *) start;
  31         uint8_t *end = p + count;
  32         for (; p <= end; p++)
  33                 *p = value;
  34 }
  35
  36 void GP_WritePixels16bpp(void *start, size_t count, uint16_t value)
  37 {
  38         uint16_t *p = (uint16_t *) start;
  39         size_t i;
  40
  41         /* Write as much pixels as possible in 4-pixel blocks. */
  42         for (i = count; i >= 4; p += 4, i -= 4) {
  43                 p[0] = value;
  44                 p[1] = value;
  45                 p[2] = value;
  46                 p[3] = value;
  47         }
  48
  49         /* Write the rest. */
  50         if (i > 0) {
  51                 p[0] = value;
  52                 if (i > 1) {
  53                         p[1] = value;
  54                         if (i > 2) {
  55                                 p[2] = value;
  56                         }
  57                 }
  58         }
  59 }
  60
  61 void GP_WritePixels24bpp(void *start, size_t count, uint32_t value)
  62 {
  63         uint8_t *bytep = (uint8_t *) start;
  64
  65         /* How much bytes we are offset against the 32-bit boundary. */
  66         int shift = ((intptr_t) bytep) % 4;
  67
  68         /*
  69          * Pixels remaining to draw (one less than pixelcount because
  70          * one incomplete pixel is drawn during the preparation phase.)
  71          */
  72         int i = count - 1;
  73
  74         /*
  75          * Handle each color component separately.
  76          * (Probably they are R, G, B but who knows.)
  77          */
  78         uint8_t a = value & 0xff;
  79         uint8_t b = (value >> 8) & 0xff;
  80         uint8_t c = (value >> 16) & 0xff;
  81
  82         uint32_t *p;
  83         uint32_t block[3];
  84
  85         /*
  86          * The line consists of three repeating 32-bit segments
  87          * (except for the starting and ending segment:
  88          * ABCA, BCAB, CABC.
  89          */
  90 #if __BIG_ENDIAN__
  91         uint32_t abca = a << 24 | b << 16 | c << 8 | a;
  92         uint32_t bcab = b << 24 | c << 16 | a << 8 | b;
  93         uint32_t cabc = c << 24 | a << 16 | b << 8 | c;
  94 #else
  95         uint32_t abca = a << 24 | c << 16 | b << 8 | a;
  96         uint32_t bcab = b << 24 | a << 16 | c << 8 | b;
  97         uint32_t cabc = c << 24 | b << 16 | a << 8 | c;
  98 #endif
  99
 100         /*
 101          * Handle the first few bytes (1 pixel or less) and prepare
 102          * the repeating sequence.
 103          */
 104         switch (shift) {
 105         default: /* shut up gcc */
 106         case 0:
 107                 block[0] = abca;
 108                 block[1] = bcab;
 109                 block[2] = cabc;
 110                 p = (uint32_t *) bytep;
 111                 break;
 112         case 3:
 113                 bytep[0] = a;
 114                 block[0] = bcab;
 115                 block[1] = cabc;
 116                 block[2] = abca;
 117                 p = (uint32_t *)(bytep + 1);
 118                 break;
 119         case 2:
 120                 bytep[0] = a;
 121                 bytep[1] = b;
 122                 block[0] = cabc;
 123                 block[1] = abca;
 124                 block[2] = bcab;
 125                 p = (uint32_t *)(bytep + 2);
 126                 break;
 127         case 1:
 128                 bytep[0] = a;
 129                 bytep[1] = b;
 130                 bytep[2] = c;
 131                 block[0] = abca;
 132                 block[1] = bcab;
 133                 block[2] = cabc;
 134                 p = (uint32_t *)(bytep + 3);
 135                 break;
 136         }
 137
 138         /*
 139          * Write as much of the line as possible as
 140          * triplets of 32-bit numbers; hopefully the compiler can
 141          * put some wide write instructions in.
 142          */
 143         while (i >= 4) {
 144                 p[0] = block[0];
 145                 p[1] = block[1];
 146                 p[2] = block[2];
 147                 p += 3;
 148                 i -= 4;
 149         }
 150
 151         /* Write the rest of the last pixel of the main part */
 152         bytep = (uint8_t *) p;
 153         switch (shift) {
 154         case 0:
 155                 bytep[0] = a;
 156                 bytep[1] = b;
 157                 bytep[2] = c;
 158                 bytep += 3;
 159                 break;
 160         case 1:
 161                 break;
 162         case 2:
 163                 bytep[0] = c;
 164                 bytep++;
 165                 break;
 166         case 3:
 167                 bytep[0] = b;
 168                 bytep[1] = c;
 169                 bytep += 2;
 170                 break;
 171         }
 172
 173         while (i > 0) {
 174                 bytep[0] = a;
 175                 bytep[1] = b;
 176                 bytep[2] = c;
 177                 bytep += 3;
 178                 i--;
 179         }
 180 }
 181
 182 void GP_WritePixels32bpp(void *start, size_t count, uint32_t value)
 183 {
 184         /*
 185          * Inspired by GNU libc's wmemset() (by Ulrich Drepper, licensed under LGPL).
 186          *
 187          * Write the pixels in groups of four, allowing the compiler to use
 188          * MMX/SSE/similar instructions if available. The last few pixels are
 189          * copied normally one-by-one. (Speed gain is about 15% over a naive loop
 190          * on AMD Phenom CPU.)
 191          */
 192
 193         uint32_t *p = (uint32_t *) start;
 194         size_t i = count;
 195         while (i >= 4) {
 196                 p[0] = value;
 197                 p[1] = value;
 198                 p[2] = value;
 199                 p[3] = value;
 200                 p += 4;
 201                 i -= 4;
 202         }
 203         if (i > 0) {
 204                 p[0] = value;
 205                 if (i > 1) {
 206                         p[1] = value;
 207                         if (i > 2) {
 208                                 p[2] = value;
 209                         }
 210                 }
 211         }
 212 }