Revert r200891, "Added MIPS DSPr2 optimization for BGRAConvolve2D routine."
[chromium-blink-merge.git] / skia / ext / convolver.cc
bloba7824aafb10b233be8b93daf3fb8e5de6f57184a
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <algorithm>
7 #include "base/logging.h"
8 #include "skia/ext/convolver.h"
9 #include "skia/ext/convolver_SSE2.h"
10 #include "third_party/skia/include/core/SkSize.h"
11 #include "third_party/skia/include/core/SkTypes.h"
13 namespace skia {
15 namespace {
17 // Converts the argument to an 8-bit unsigned value by clamping to the range
18 // 0-255.
19 inline unsigned char ClampTo8(int a) {
20 if (static_cast<unsigned>(a) < 256)
21 return a; // Avoid the extra check in the common case.
22 if (a < 0)
23 return 0;
24 return 255;
27 // Takes the value produced by accumulating element-wise product of image with
28 // a kernel and brings it back into range.
29 // All of the filter scaling factors are in fixed point with kShiftBits bits of
30 // fractional part.
31 inline unsigned char BringBackTo8(int a, bool take_absolute) {
32 a >>= ConvolutionFilter1D::kShiftBits;
33 if (take_absolute)
34 a = std::abs(a);
35 return ClampTo8(a);
38 // Stores a list of rows in a circular buffer. The usage is you write into it
39 // by calling AdvanceRow. It will keep track of which row in the buffer it
40 // should use next, and the total number of rows added.
41 class CircularRowBuffer {
42 public:
43 // The number of pixels in each row is given in |source_row_pixel_width|.
44 // The maximum number of rows needed in the buffer is |max_y_filter_size|
45 // (we only need to store enough rows for the biggest filter).
47 // We use the |first_input_row| to compute the coordinates of all of the
48 // following rows returned by Advance().
49 CircularRowBuffer(int dest_row_pixel_width, int max_y_filter_size,
50 int first_input_row)
51 : row_byte_width_(dest_row_pixel_width * 4),
52 num_rows_(max_y_filter_size),
53 next_row_(0),
54 next_row_coordinate_(first_input_row) {
55 buffer_.resize(row_byte_width_ * max_y_filter_size);
56 row_addresses_.resize(num_rows_);
59 // Moves to the next row in the buffer, returning a pointer to the beginning
60 // of it.
61 unsigned char* AdvanceRow() {
62 unsigned char* row = &buffer_[next_row_ * row_byte_width_];
63 next_row_coordinate_++;
65 // Set the pointer to the next row to use, wrapping around if necessary.
66 next_row_++;
67 if (next_row_ == num_rows_)
68 next_row_ = 0;
69 return row;
72 // Returns a pointer to an "unrolled" array of rows. These rows will start
73 // at the y coordinate placed into |*first_row_index| and will continue in
74 // order for the maximum number of rows in this circular buffer.
76 // The |first_row_index_| may be negative. This means the circular buffer
77 // starts before the top of the image (it hasn't been filled yet).
78 unsigned char* const* GetRowAddresses(int* first_row_index) {
79 // Example for a 4-element circular buffer holding coords 6-9.
80 // Row 0 Coord 8
81 // Row 1 Coord 9
82 // Row 2 Coord 6 <- next_row_ = 2, next_row_coordinate_ = 10.
83 // Row 3 Coord 7
85 // The "next" row is also the first (lowest) coordinate. This computation
86 // may yield a negative value, but that's OK, the math will work out
87 // since the user of this buffer will compute the offset relative
88 // to the first_row_index and the negative rows will never be used.
89 *first_row_index = next_row_coordinate_ - num_rows_;
91 int cur_row = next_row_;
92 for (int i = 0; i < num_rows_; i++) {
93 row_addresses_[i] = &buffer_[cur_row * row_byte_width_];
95 // Advance to the next row, wrapping if necessary.
96 cur_row++;
97 if (cur_row == num_rows_)
98 cur_row = 0;
100 return &row_addresses_[0];
103 private:
104 // The buffer storing the rows. They are packed, each one row_byte_width_.
105 std::vector<unsigned char> buffer_;
107 // Number of bytes per row in the |buffer_|.
108 int row_byte_width_;
110 // The number of rows available in the buffer.
111 int num_rows_;
113 // The next row index we should write into. This wraps around as the
114 // circular buffer is used.
115 int next_row_;
117 // The y coordinate of the |next_row_|. This is incremented each time a
118 // new row is appended and does not wrap.
119 int next_row_coordinate_;
121 // Buffer used by GetRowAddresses().
122 std::vector<unsigned char*> row_addresses_;
125 // Convolves horizontally along a single row. The row data is given in
126 // |src_data| and continues for the num_values() of the filter.
127 template<bool has_alpha>
128 void ConvolveHorizontally(const unsigned char* src_data,
129 const ConvolutionFilter1D& filter,
130 unsigned char* out_row) {
131 // Loop over each pixel on this row in the output image.
132 int num_values = filter.num_values();
133 for (int out_x = 0; out_x < num_values; out_x++) {
134 // Get the filter that determines the current output pixel.
135 int filter_offset, filter_length;
136 const ConvolutionFilter1D::Fixed* filter_values =
137 filter.FilterForValue(out_x, &filter_offset, &filter_length);
139 // Compute the first pixel in this row that the filter affects. It will
140 // touch |filter_length| pixels (4 bytes each) after this.
141 const unsigned char* row_to_filter = &src_data[filter_offset * 4];
143 // Apply the filter to the row to get the destination pixel in |accum|.
144 int accum[4] = {0};
145 for (int filter_x = 0; filter_x < filter_length; filter_x++) {
146 ConvolutionFilter1D::Fixed cur_filter = filter_values[filter_x];
147 accum[0] += cur_filter * row_to_filter[filter_x * 4 + 0];
148 accum[1] += cur_filter * row_to_filter[filter_x * 4 + 1];
149 accum[2] += cur_filter * row_to_filter[filter_x * 4 + 2];
150 if (has_alpha)
151 accum[3] += cur_filter * row_to_filter[filter_x * 4 + 3];
154 // Bring this value back in range. All of the filter scaling factors
155 // are in fixed point with kShiftBits bits of fractional part.
156 accum[0] >>= ConvolutionFilter1D::kShiftBits;
157 accum[1] >>= ConvolutionFilter1D::kShiftBits;
158 accum[2] >>= ConvolutionFilter1D::kShiftBits;
159 if (has_alpha)
160 accum[3] >>= ConvolutionFilter1D::kShiftBits;
162 // Store the new pixel.
163 out_row[out_x * 4 + 0] = ClampTo8(accum[0]);
164 out_row[out_x * 4 + 1] = ClampTo8(accum[1]);
165 out_row[out_x * 4 + 2] = ClampTo8(accum[2]);
166 if (has_alpha)
167 out_row[out_x * 4 + 3] = ClampTo8(accum[3]);
171 // Does vertical convolution to produce one output row. The filter values and
172 // length are given in the first two parameters. These are applied to each
173 // of the rows pointed to in the |source_data_rows| array, with each row
174 // being |pixel_width| wide.
176 // The output must have room for |pixel_width * 4| bytes.
177 template<bool has_alpha>
178 void ConvolveVertically(const ConvolutionFilter1D::Fixed* filter_values,
179 int filter_length,
180 unsigned char* const* source_data_rows,
181 int pixel_width,
182 unsigned char* out_row) {
183 // We go through each column in the output and do a vertical convolution,
184 // generating one output pixel each time.
185 for (int out_x = 0; out_x < pixel_width; out_x++) {
186 // Compute the number of bytes over in each row that the current column
187 // we're convolving starts at. The pixel will cover the next 4 bytes.
188 int byte_offset = out_x * 4;
190 // Apply the filter to one column of pixels.
191 int accum[4] = {0};
192 for (int filter_y = 0; filter_y < filter_length; filter_y++) {
193 ConvolutionFilter1D::Fixed cur_filter = filter_values[filter_y];
194 accum[0] += cur_filter * source_data_rows[filter_y][byte_offset + 0];
195 accum[1] += cur_filter * source_data_rows[filter_y][byte_offset + 1];
196 accum[2] += cur_filter * source_data_rows[filter_y][byte_offset + 2];
197 if (has_alpha)
198 accum[3] += cur_filter * source_data_rows[filter_y][byte_offset + 3];
201 // Bring this value back in range. All of the filter scaling factors
202 // are in fixed point with kShiftBits bits of precision.
203 accum[0] >>= ConvolutionFilter1D::kShiftBits;
204 accum[1] >>= ConvolutionFilter1D::kShiftBits;
205 accum[2] >>= ConvolutionFilter1D::kShiftBits;
206 if (has_alpha)
207 accum[3] >>= ConvolutionFilter1D::kShiftBits;
209 // Store the new pixel.
210 out_row[byte_offset + 0] = ClampTo8(accum[0]);
211 out_row[byte_offset + 1] = ClampTo8(accum[1]);
212 out_row[byte_offset + 2] = ClampTo8(accum[2]);
213 if (has_alpha) {
214 unsigned char alpha = ClampTo8(accum[3]);
216 // Make sure the alpha channel doesn't come out smaller than any of the
217 // color channels. We use premultipled alpha channels, so this should
218 // never happen, but rounding errors will cause this from time to time.
219 // These "impossible" colors will cause overflows (and hence random pixel
220 // values) when the resulting bitmap is drawn to the screen.
222 // We only need to do this when generating the final output row (here).
223 int max_color_channel = std::max(out_row[byte_offset + 0],
224 std::max(out_row[byte_offset + 1], out_row[byte_offset + 2]));
225 if (alpha < max_color_channel)
226 out_row[byte_offset + 3] = max_color_channel;
227 else
228 out_row[byte_offset + 3] = alpha;
229 } else {
230 // No alpha channel, the image is opaque.
231 out_row[byte_offset + 3] = 0xff;
236 void ConvolveVertically(const ConvolutionFilter1D::Fixed* filter_values,
237 int filter_length,
238 unsigned char* const* source_data_rows,
239 int pixel_width,
240 unsigned char* out_row,
241 bool source_has_alpha) {
242 if (source_has_alpha) {
243 ConvolveVertically<true>(filter_values, filter_length,
244 source_data_rows,
245 pixel_width,
246 out_row);
247 } else {
248 ConvolveVertically<false>(filter_values, filter_length,
249 source_data_rows,
250 pixel_width,
251 out_row);
255 } // namespace
257 // ConvolutionFilter1D ---------------------------------------------------------
259 ConvolutionFilter1D::ConvolutionFilter1D()
260 : max_filter_(0) {
263 ConvolutionFilter1D::~ConvolutionFilter1D() {
266 void ConvolutionFilter1D::AddFilter(int filter_offset,
267 const float* filter_values,
268 int filter_length) {
269 SkASSERT(filter_length > 0);
271 std::vector<Fixed> fixed_values;
272 fixed_values.reserve(filter_length);
274 for (int i = 0; i < filter_length; ++i)
275 fixed_values.push_back(FloatToFixed(filter_values[i]));
277 AddFilter(filter_offset, &fixed_values[0], filter_length);
280 void ConvolutionFilter1D::AddFilter(int filter_offset,
281 const Fixed* filter_values,
282 int filter_length) {
283 // It is common for leading/trailing filter values to be zeros. In such
284 // cases it is beneficial to only store the central factors.
285 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on
286 // a 1080p image this optimization gives a ~10% speed improvement.
287 int filter_size = filter_length;
288 int first_non_zero = 0;
289 while (first_non_zero < filter_length && filter_values[first_non_zero] == 0)
290 first_non_zero++;
292 if (first_non_zero < filter_length) {
293 // Here we have at least one non-zero factor.
294 int last_non_zero = filter_length - 1;
295 while (last_non_zero >= 0 && filter_values[last_non_zero] == 0)
296 last_non_zero--;
298 filter_offset += first_non_zero;
299 filter_length = last_non_zero + 1 - first_non_zero;
300 SkASSERT(filter_length > 0);
302 for (int i = first_non_zero; i <= last_non_zero; i++)
303 filter_values_.push_back(filter_values[i]);
304 } else {
305 // Here all the factors were zeroes.
306 filter_length = 0;
309 FilterInstance instance;
311 // We pushed filter_length elements onto filter_values_
312 instance.data_location = (static_cast<int>(filter_values_.size()) -
313 filter_length);
314 instance.offset = filter_offset;
315 instance.trimmed_length = filter_length;
316 instance.length = filter_size;
317 filters_.push_back(instance);
319 max_filter_ = std::max(max_filter_, filter_length);
322 const ConvolutionFilter1D::Fixed* ConvolutionFilter1D::GetSingleFilter(
323 int* specified_filter_length,
324 int* filter_offset,
325 int* filter_length) const {
326 const FilterInstance& filter = filters_[0];
327 *filter_offset = filter.offset;
328 *filter_length = filter.trimmed_length;
329 *specified_filter_length = filter.length;
330 if (filter.trimmed_length == 0)
331 return NULL;
333 return &filter_values_[filter.data_location];
336 typedef void (*ConvolveVertically_pointer)(
337 const ConvolutionFilter1D::Fixed* filter_values,
338 int filter_length,
339 unsigned char* const* source_data_rows,
340 int pixel_width,
341 unsigned char* out_row,
342 bool has_alpha);
343 typedef void (*Convolve4RowsHorizontally_pointer)(
344 const unsigned char* src_data[4],
345 const ConvolutionFilter1D& filter,
346 unsigned char* out_row[4]);
347 typedef void (*ConvolveHorizontally_pointer)(
348 const unsigned char* src_data,
349 const ConvolutionFilter1D& filter,
350 unsigned char* out_row);
352 struct ConvolveProcs {
353 // This is how many extra pixels may be read by the
354 // conolve*horizontally functions.
355 int extra_horizontal_reads;
356 ConvolveVertically_pointer convolve_vertically;
357 Convolve4RowsHorizontally_pointer convolve_4rows_horizontally;
358 ConvolveHorizontally_pointer convolve_horizontally;
361 void SetupSIMD(ConvolveProcs *procs) {
362 #ifdef SIMD_SSE2
363 base::CPU cpu;
364 if (cpu.has_sse2()) {
365 procs->extra_horizontal_reads = 3;
366 procs->convolve_vertically = &ConvolveVertically_SSE2;
367 procs->convolve_4rows_horizontally = &Convolve4RowsHorizontally_SSE2;
368 procs->convolve_horizontally = &ConvolveHorizontally_SSE2;
370 #endif
373 void BGRAConvolve2D(const unsigned char* source_data,
374 int source_byte_row_stride,
375 bool source_has_alpha,
376 const ConvolutionFilter1D& filter_x,
377 const ConvolutionFilter1D& filter_y,
378 int output_byte_row_stride,
379 unsigned char* output,
380 bool use_simd_if_possible) {
381 ConvolveProcs simd;
382 simd.extra_horizontal_reads = 0;
383 simd.convolve_vertically = NULL;
384 simd.convolve_4rows_horizontally = NULL;
385 simd.convolve_horizontally = NULL;
386 if (use_simd_if_possible) {
387 SetupSIMD(&simd);
390 int max_y_filter_size = filter_y.max_filter();
392 // The next row in the input that we will generate a horizontally
393 // convolved row for. If the filter doesn't start at the beginning of the
394 // image (this is the case when we are only resizing a subset), then we
395 // don't want to generate any output rows before that. Compute the starting
396 // row for convolution as the first pixel for the first vertical filter.
397 int filter_offset, filter_length;
398 const ConvolutionFilter1D::Fixed* filter_values =
399 filter_y.FilterForValue(0, &filter_offset, &filter_length);
400 int next_x_row = filter_offset;
402 // We loop over each row in the input doing a horizontal convolution. This
403 // will result in a horizontally convolved image. We write the results into
404 // a circular buffer of convolved rows and do vertical convolution as rows
405 // are available. This prevents us from having to store the entire
406 // intermediate image and helps cache coherency.
407 // We will need four extra rows to allow horizontal convolution could be done
408 // simultaneously. We also padding each row in row buffer to be aligned-up to
409 // 16 bytes.
410 // TODO(jiesun): We do not use aligned load from row buffer in vertical
411 // convolution pass yet. Somehow Windows does not like it.
412 int row_buffer_width = (filter_x.num_values() + 15) & ~0xF;
413 int row_buffer_height = max_y_filter_size +
414 (simd.convolve_4rows_horizontally ? 4 : 0);
415 CircularRowBuffer row_buffer(row_buffer_width,
416 row_buffer_height,
417 filter_offset);
419 // Loop over every possible output row, processing just enough horizontal
420 // convolutions to run each subsequent vertical convolution.
421 SkASSERT(output_byte_row_stride >= filter_x.num_values() * 4);
422 int num_output_rows = filter_y.num_values();
424 // We need to check which is the last line to convolve before we advance 4
425 // lines in one iteration.
426 int last_filter_offset, last_filter_length;
428 // SSE2 can access up to 3 extra pixels past the end of the
429 // buffer. At the bottom of the image, we have to be careful
430 // not to access data past the end of the buffer. Normally
431 // we fall back to the C++ implementation for the last row.
432 // If the last row is less than 3 pixels wide, we may have to fall
433 // back to the C++ version for more rows. Compute how many
434 // rows we need to avoid the SSE implementation for here.
435 filter_x.FilterForValue(filter_x.num_values() - 1, &last_filter_offset,
436 &last_filter_length);
437 int avoid_simd_rows = 1 + simd.extra_horizontal_reads /
438 (last_filter_offset + last_filter_length);
440 filter_y.FilterForValue(num_output_rows - 1, &last_filter_offset,
441 &last_filter_length);
443 for (int out_y = 0; out_y < num_output_rows; out_y++) {
444 filter_values = filter_y.FilterForValue(out_y,
445 &filter_offset, &filter_length);
447 // Generate output rows until we have enough to run the current filter.
448 while (next_x_row < filter_offset + filter_length) {
449 if (simd.convolve_4rows_horizontally &&
450 next_x_row + 3 < last_filter_offset + last_filter_length -
451 avoid_simd_rows) {
452 const unsigned char* src[4];
453 unsigned char* out_row[4];
454 for (int i = 0; i < 4; ++i) {
455 src[i] = &source_data[(next_x_row + i) * source_byte_row_stride];
456 out_row[i] = row_buffer.AdvanceRow();
458 simd.convolve_4rows_horizontally(src, filter_x, out_row);
459 next_x_row += 4;
460 } else {
461 // Check if we need to avoid SSE2 for this row.
462 if (simd.convolve_horizontally &&
463 next_x_row < last_filter_offset + last_filter_length -
464 avoid_simd_rows) {
465 simd.convolve_horizontally(
466 &source_data[next_x_row * source_byte_row_stride],
467 filter_x, row_buffer.AdvanceRow());
468 } else {
469 if (source_has_alpha) {
470 ConvolveHorizontally<true>(
471 &source_data[next_x_row * source_byte_row_stride],
472 filter_x, row_buffer.AdvanceRow());
473 } else {
474 ConvolveHorizontally<false>(
475 &source_data[next_x_row * source_byte_row_stride],
476 filter_x, row_buffer.AdvanceRow());
479 next_x_row++;
483 // Compute where in the output image this row of final data will go.
484 unsigned char* cur_output_row = &output[out_y * output_byte_row_stride];
486 // Get the list of rows that the circular buffer has, in order.
487 int first_row_in_circular_buffer;
488 unsigned char* const* rows_to_convolve =
489 row_buffer.GetRowAddresses(&first_row_in_circular_buffer);
491 // Now compute the start of the subset of those rows that the filter
492 // needs.
493 unsigned char* const* first_row_for_filter =
494 &rows_to_convolve[filter_offset - first_row_in_circular_buffer];
496 if (simd.convolve_vertically) {
497 simd.convolve_vertically(filter_values, filter_length,
498 first_row_for_filter,
499 filter_x.num_values(), cur_output_row,
500 source_has_alpha);
501 } else {
502 ConvolveVertically(filter_values, filter_length,
503 first_row_for_filter,
504 filter_x.num_values(), cur_output_row,
505 source_has_alpha);
510 void SingleChannelConvolveX1D(const unsigned char* source_data,
511 int source_byte_row_stride,
512 int input_channel_index,
513 int input_channel_count,
514 const ConvolutionFilter1D& filter,
515 const SkISize& image_size,
516 unsigned char* output,
517 int output_byte_row_stride,
518 int output_channel_index,
519 int output_channel_count,
520 bool absolute_values) {
521 int filter_offset, filter_length, filter_size;
522 // Very much unlike BGRAConvolve2D, here we expect to have the same filter
523 // for all pixels.
524 const ConvolutionFilter1D::Fixed* filter_values =
525 filter.GetSingleFilter(&filter_size, &filter_offset, &filter_length);
527 if (filter_values == NULL || image_size.width() < filter_size) {
528 NOTREACHED();
529 return;
532 int centrepoint = filter_length / 2;
533 if (filter_size - filter_offset != 2 * filter_offset) {
534 // This means the original filter was not symmetrical AND
535 // got clipped from one side more than from the other.
536 centrepoint = filter_size / 2 - filter_offset;
539 const unsigned char* source_data_row = source_data;
540 unsigned char* output_row = output;
542 for (int r = 0; r < image_size.height(); ++r) {
543 unsigned char* target_byte = output_row + output_channel_index;
544 // Process the lead part, padding image to the left with the first pixel.
545 int c = 0;
546 for (; c < centrepoint; ++c, target_byte += output_channel_count) {
547 int accval = 0;
548 int i = 0;
549 int pixel_byte_index = input_channel_index;
550 for (; i < centrepoint - c; ++i) // Padding part.
551 accval += filter_values[i] * source_data_row[pixel_byte_index];
553 for (; i < filter_length; ++i, pixel_byte_index += input_channel_count)
554 accval += filter_values[i] * source_data_row[pixel_byte_index];
556 *target_byte = BringBackTo8(accval, absolute_values);
559 // Now for the main event.
560 for (; c < image_size.width() - centrepoint;
561 ++c, target_byte += output_channel_count) {
562 int accval = 0;
563 int pixel_byte_index = (c - centrepoint) * input_channel_count +
564 input_channel_index;
566 for (int i = 0; i < filter_length;
567 ++i, pixel_byte_index += input_channel_count) {
568 accval += filter_values[i] * source_data_row[pixel_byte_index];
571 *target_byte = BringBackTo8(accval, absolute_values);
574 for (; c < image_size.width(); ++c, target_byte += output_channel_count) {
575 int accval = 0;
576 int overlap_taps = image_size.width() - c + centrepoint;
577 int pixel_byte_index = (c - centrepoint) * input_channel_count +
578 input_channel_index;
579 int i = 0;
580 for (; i < overlap_taps - 1; ++i, pixel_byte_index += input_channel_count)
581 accval += filter_values[i] * source_data_row[pixel_byte_index];
583 for (; i < filter_length; ++i)
584 accval += filter_values[i] * source_data_row[pixel_byte_index];
586 *target_byte = BringBackTo8(accval, absolute_values);
589 source_data_row += source_byte_row_stride;
590 output_row += output_byte_row_stride;
594 void SingleChannelConvolveY1D(const unsigned char* source_data,
595 int source_byte_row_stride,
596 int input_channel_index,
597 int input_channel_count,
598 const ConvolutionFilter1D& filter,
599 const SkISize& image_size,
600 unsigned char* output,
601 int output_byte_row_stride,
602 int output_channel_index,
603 int output_channel_count,
604 bool absolute_values) {
605 int filter_offset, filter_length, filter_size;
606 // Very much unlike BGRAConvolve2D, here we expect to have the same filter
607 // for all pixels.
608 const ConvolutionFilter1D::Fixed* filter_values =
609 filter.GetSingleFilter(&filter_size, &filter_offset, &filter_length);
611 if (filter_values == NULL || image_size.height() < filter_size) {
612 NOTREACHED();
613 return;
616 int centrepoint = filter_length / 2;
617 if (filter_size - filter_offset != 2 * filter_offset) {
618 // This means the original filter was not symmetrical AND
619 // got clipped from one side more than from the other.
620 centrepoint = filter_size / 2 - filter_offset;
623 for (int c = 0; c < image_size.width(); ++c) {
624 unsigned char* target_byte = output + c * output_channel_count +
625 output_channel_index;
626 int r = 0;
628 for (; r < centrepoint; ++r, target_byte += output_byte_row_stride) {
629 int accval = 0;
630 int i = 0;
631 int pixel_byte_index = c * input_channel_count + input_channel_index;
633 for (; i < centrepoint - r; ++i) // Padding part.
634 accval += filter_values[i] * source_data[pixel_byte_index];
636 for (; i < filter_length; ++i, pixel_byte_index += source_byte_row_stride)
637 accval += filter_values[i] * source_data[pixel_byte_index];
639 *target_byte = BringBackTo8(accval, absolute_values);
642 for (; r < image_size.height() - centrepoint;
643 ++r, target_byte += output_byte_row_stride) {
644 int accval = 0;
645 int pixel_byte_index = (r - centrepoint) * source_byte_row_stride +
646 c * input_channel_count + input_channel_index;
647 for (int i = 0; i < filter_length;
648 ++i, pixel_byte_index += source_byte_row_stride) {
649 accval += filter_values[i] * source_data[pixel_byte_index];
652 *target_byte = BringBackTo8(accval, absolute_values);
655 for (; r < image_size.height();
656 ++r, target_byte += output_byte_row_stride) {
657 int accval = 0;
658 int overlap_taps = image_size.height() - r + centrepoint;
659 int pixel_byte_index = (r - centrepoint) * source_byte_row_stride +
660 c * input_channel_count + input_channel_index;
661 int i = 0;
662 for (; i < overlap_taps - 1;
663 ++i, pixel_byte_index += source_byte_row_stride) {
664 accval += filter_values[i] * source_data[pixel_byte_index];
667 for (; i < filter_length; ++i)
668 accval += filter_values[i] * source_data[pixel_byte_index];
670 *target_byte = BringBackTo8(accval, absolute_values);
675 void SetUpGaussianConvolutionKernel(ConvolutionFilter1D* filter,
676 float kernel_sigma,
677 bool derivative) {
678 DCHECK(filter != NULL);
679 DCHECK_GT(kernel_sigma, 0.0);
680 const int tail_length = static_cast<int>(4.0f * kernel_sigma + 0.5f);
681 const int kernel_size = tail_length * 2 + 1;
682 const float sigmasq = kernel_sigma * kernel_sigma;
683 std::vector<float> kernel_weights(kernel_size, 0.0);
684 float kernel_sum = 1.0f;
686 kernel_weights[tail_length] = 1.0f;
688 for (int ii = 1; ii <= tail_length; ++ii) {
689 float v = std::exp(-0.5f * ii * ii / sigmasq);
690 kernel_weights[tail_length + ii] = v;
691 kernel_weights[tail_length - ii] = v;
692 kernel_sum += 2.0f * v;
695 for (int i = 0; i < kernel_size; ++i)
696 kernel_weights[i] /= kernel_sum;
698 if (derivative) {
699 kernel_weights[tail_length] = 0.0;
700 for (int ii = 1; ii <= tail_length; ++ii) {
701 float v = sigmasq * kernel_weights[tail_length + ii] / ii;
702 kernel_weights[tail_length + ii] = v;
703 kernel_weights[tail_length - ii] = -v;
707 filter->AddFilter(0, &kernel_weights[0], kernel_weights.size());
710 } // namespace skia