Bug 1852740: add tests for the `fetchpriority` attribute in Link headers. r=necko...
[gecko.git] / media / libjpeg / simd / mips64 / jcgryext-mmi.c
blob08a83d6699cbdb5dc409b6614514d947e41db1ea
1 /*
2 * Loongson MMI optimizations for libjpeg-turbo
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2014-2015, 2019, D. R. Commander. All Rights Reserved.
6 * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
7 * All Rights Reserved.
8 * Authors: ZhangLixia <zhanglixia-hf@loongson.cn>
10 * Based on the x86 SIMD extension for IJG JPEG library
11 * Copyright (C) 1999-2006, MIYASAKA Masaru.
13 * This software is provided 'as-is', without any express or implied
14 * warranty. In no event will the authors be held liable for any damages
15 * arising from the use of this software.
17 * Permission is granted to anyone to use this software for any purpose,
18 * including commercial applications, and to alter it and redistribute it
19 * freely, subject to the following restrictions:
21 * 1. The origin of this software must not be misrepresented; you must not
22 * claim that you wrote the original software. If you use this software
23 * in a product, an acknowledgment in the product documentation would be
24 * appreciated but is not required.
25 * 2. Altered source versions must be plainly marked as such, and must not be
26 * misrepresented as being the original software.
27 * 3. This notice may not be removed or altered from any source distribution.
30 /* This file is included by jcgray-mmi.c */
33 #if RGB_RED == 0
34 #define mmA re
35 #define mmB ro
36 #elif RGB_GREEN == 0
37 #define mmA ge
38 #define mmB go
39 #elif RGB_BLUE == 0
40 #define mmA be
41 #define mmB bo
42 #else
43 #define mmA xe
44 #define mmB xo
45 #endif
47 #if RGB_RED == 1
48 #define mmC re
49 #define mmD ro
50 #elif RGB_GREEN == 1
51 #define mmC ge
52 #define mmD go
53 #elif RGB_BLUE == 1
54 #define mmC be
55 #define mmD bo
56 #else
57 #define mmC xe
58 #define mmD xo
59 #endif
61 #if RGB_RED == 2
62 #define mmE re
63 #define mmF ro
64 #elif RGB_GREEN == 2
65 #define mmE ge
66 #define mmF go
67 #elif RGB_BLUE == 2
68 #define mmE be
69 #define mmF bo
70 #else
71 #define mmE xe
72 #define mmF xo
73 #endif
75 #if RGB_RED == 3
76 #define mmG re
77 #define mmH ro
78 #elif RGB_GREEN == 3
79 #define mmG ge
80 #define mmH go
81 #elif RGB_BLUE == 3
82 #define mmG be
83 #define mmH bo
84 #else
85 #define mmG xe
86 #define mmH xo
87 #endif
90 void jsimd_rgb_gray_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
91 JSAMPIMAGE output_buf, JDIMENSION output_row,
92 int num_rows)
94 JSAMPROW inptr, outptr;
95 int num_cols, col;
96 __m64 re, ro, ge, go, be, bo, xe;
97 #if RGB_PIXELSIZE == 4
98 __m64 xo;
99 #endif
100 __m64 rgle, rghe, rglo, rgho, bgle, bghe, bglo, bgho;
101 __m64 yle_rg, yhe_rg, yle_bg, yhe_bg, yle, yhe, ye;
102 __m64 ylo_rg, yho_rg, ylo_bg, yho_bg, ylo, yho, yo, y;
104 while (--num_rows >= 0) {
105 inptr = *input_buf++;
106 outptr = output_buf[0][output_row];
107 output_row++;
109 for (num_cols = image_width; num_cols > 0; num_cols -= 8,
110 outptr += 8) {
112 #if RGB_PIXELSIZE == 3
114 if (num_cols < 8) {
115 col = num_cols * 3;
116 asm(".set noreorder\r\n"
118 "li $8, 1\r\n"
119 "move $9, %3\r\n"
120 "and $10, $9, $8\r\n"
121 "beqz $10, 1f\r\n"
122 "nop \r\n"
123 "subu $9, $9, 1\r\n"
124 "xor $12, $12, $12\r\n"
125 "move $13, %5\r\n"
126 PTR_ADDU "$13, $13, $9\r\n"
127 "lbu $12, 0($13)\r\n"
129 "1: \r\n"
130 "li $8, 2\r\n"
131 "and $10, $9, $8\r\n"
132 "beqz $10, 2f\r\n"
133 "nop \r\n"
134 "subu $9, $9, 2\r\n"
135 "xor $11, $11, $11\r\n"
136 "move $13, %5\r\n"
137 PTR_ADDU "$13, $13, $9\r\n"
138 "lhu $11, 0($13)\r\n"
139 "sll $12, $12, 16\r\n"
140 "or $12, $12, $11\r\n"
142 "2: \r\n"
143 "dmtc1 $12, %0\r\n"
144 "li $8, 4\r\n"
145 "and $10, $9, $8\r\n"
146 "beqz $10, 3f\r\n"
147 "nop \r\n"
148 "subu $9, $9, 4\r\n"
149 "move $13, %5\r\n"
150 PTR_ADDU "$13, $13, $9\r\n"
151 "lwu $14, 0($13)\r\n"
152 "dmtc1 $14, %1\r\n"
153 "dsll32 $12, $12, 0\r\n"
154 "or $12, $12, $14\r\n"
155 "dmtc1 $12, %0\r\n"
157 "3: \r\n"
158 "li $8, 8\r\n"
159 "and $10, $9, $8\r\n"
160 "beqz $10, 4f\r\n"
161 "nop \r\n"
162 "mov.s %1, %0\r\n"
163 "ldc1 %0, 0(%5)\r\n"
164 "li $9, 8\r\n"
165 "j 5f\r\n"
166 "nop \r\n"
168 "4: \r\n"
169 "li $8, 16\r\n"
170 "and $10, $9, $8\r\n"
171 "beqz $10, 5f\r\n"
172 "nop \r\n"
173 "mov.s %2, %0\r\n"
174 "ldc1 %0, 0(%5)\r\n"
175 "ldc1 %1, 8(%5)\r\n"
177 "5: \r\n"
178 "nop \r\n"
179 ".set reorder\r\n"
181 : "=f" (mmA), "=f" (mmG), "=f" (mmF)
182 : "r" (col), "r" (num_rows), "r" (inptr)
183 : "$f0", "$f2", "$f4", "$8", "$9", "$10", "$11", "$12", "$13",
184 "$14", "memory"
186 } else {
187 if (!(((long)inptr) & 7)) {
188 mmA = _mm_load_si64((__m64 *)&inptr[0]);
189 mmG = _mm_load_si64((__m64 *)&inptr[8]);
190 mmF = _mm_load_si64((__m64 *)&inptr[16]);
191 } else {
192 mmA = _mm_loadu_si64((__m64 *)&inptr[0]);
193 mmG = _mm_loadu_si64((__m64 *)&inptr[8]);
194 mmF = _mm_loadu_si64((__m64 *)&inptr[16]);
196 inptr += RGB_PIXELSIZE * 8;
198 mmD = _mm_srli_si64(mmA, 4 * BYTE_BIT);
199 mmA = _mm_slli_si64(mmA, 4 * BYTE_BIT);
201 mmA = _mm_unpackhi_pi8(mmA, mmG);
202 mmG = _mm_slli_si64(mmG, 4 * BYTE_BIT);
204 mmD = _mm_unpacklo_pi8(mmD, mmF);
205 mmG = _mm_unpackhi_pi8(mmG, mmF);
207 mmE = _mm_srli_si64(mmA, 4 * BYTE_BIT);
208 mmA = _mm_slli_si64(mmA, 4 * BYTE_BIT);
210 mmA = _mm_unpackhi_pi8(mmA, mmD);
211 mmD = _mm_slli_si64(mmD, 4 * BYTE_BIT);
213 mmE = _mm_unpacklo_pi8(mmE, mmG);
214 mmD = _mm_unpackhi_pi8(mmD, mmG);
215 mmC = _mm_loadhi_pi8_f(mmA);
216 mmA = _mm_loadlo_pi8_f(mmA);
218 mmB = _mm_loadhi_pi8_f(mmE);
219 mmE = _mm_loadlo_pi8_f(mmE);
221 mmF = _mm_loadhi_pi8_f(mmD);
222 mmD = _mm_loadlo_pi8_f(mmD);
224 #else /* RGB_PIXELSIZE == 4 */
226 if (num_cols < 8) {
227 col = num_cols;
228 asm(".set noreorder\r\n"
230 "li $8, 1\r\n"
231 "move $9, %4\r\n"
232 "and $10, $9, $8\r\n"
233 "beqz $10, 1f\r\n"
234 "nop \r\n"
235 "subu $9, $9, 1\r\n"
236 PTR_SLL "$11, $9, 2\r\n"
237 "move $13, %5\r\n"
238 PTR_ADDU "$13, $13, $11\r\n"
239 "lwc1 %0, 0($13)\r\n"
241 "1: \r\n"
242 "li $8, 2\r\n"
243 "and $10, $9, $8\r\n"
244 "beqz $10, 2f\r\n"
245 "nop \r\n"
246 "subu $9, $9, 2\r\n"
247 PTR_SLL "$11, $9, 2\r\n"
248 "move $13, %5\r\n"
249 PTR_ADDU "$13, $13, $11\r\n"
250 "mov.s %1, %0\r\n"
251 "ldc1 %0, 0($13)\r\n"
253 "2: \r\n"
254 "li $8, 4\r\n"
255 "and $10, $9, $8\r\n"
256 "beqz $10, 3f\r\n"
257 "nop \r\n"
258 "mov.s %2, %0\r\n"
259 "mov.s %3, %1\r\n"
260 "ldc1 %0, 0(%5)\r\n"
261 "ldc1 %1, 8(%5)\r\n"
263 "3: \r\n"
264 "nop \r\n"
265 ".set reorder\r\n"
267 : "=f" (mmA), "=f" (mmF), "=f" (mmD), "=f" (mmC)
268 : "r" (col), "r" (inptr)
269 : "$f0", "$f2", "$8", "$9", "$10", "$11", "$13", "memory"
271 } else {
272 if (!(((long)inptr) & 7)) {
273 mmA = _mm_load_si64((__m64 *)&inptr[0]);
274 mmF = _mm_load_si64((__m64 *)&inptr[8]);
275 mmD = _mm_load_si64((__m64 *)&inptr[16]);
276 mmC = _mm_load_si64((__m64 *)&inptr[24]);
277 } else {
278 mmA = _mm_loadu_si64((__m64 *)&inptr[0]);
279 mmF = _mm_loadu_si64((__m64 *)&inptr[8]);
280 mmD = _mm_loadu_si64((__m64 *)&inptr[16]);
281 mmC = _mm_loadu_si64((__m64 *)&inptr[24]);
283 inptr += RGB_PIXELSIZE * 8;
285 mmB = _mm_unpackhi_pi8(mmA, mmF);
286 mmA = _mm_unpacklo_pi8(mmA, mmF);
288 mmG = _mm_unpackhi_pi8(mmD, mmC);
289 mmD = _mm_unpacklo_pi8(mmD, mmC);
291 mmE = _mm_unpackhi_pi16(mmA, mmD);
292 mmA = _mm_unpacklo_pi16(mmA, mmD);
294 mmH = _mm_unpackhi_pi16(mmB, mmG);
295 mmB = _mm_unpacklo_pi16(mmB, mmG);
297 mmC = _mm_loadhi_pi8_f(mmA);
298 mmA = _mm_loadlo_pi8_f(mmA);
300 mmD = _mm_loadhi_pi8_f(mmB);
301 mmB = _mm_loadlo_pi8_f(mmB);
303 mmG = _mm_loadhi_pi8_f(mmE);
304 mmE = _mm_loadlo_pi8_f(mmE);
306 mmF = _mm_unpacklo_pi8(mmH, mmH);
307 mmH = _mm_unpackhi_pi8(mmH, mmH);
308 mmF = _mm_srli_pi16(mmF, BYTE_BIT);
309 mmH = _mm_srli_pi16(mmH, BYTE_BIT);
311 #endif
313 /* re=(R0 R2 R4 R6), ge=(G0 G2 G4 G6), be=(B0 B2 B4 B6)
314 * ro=(R1 R3 R5 R7), go=(G1 G3 G5 G7), bo=(B1 B3 B5 B7)
316 * (Original)
317 * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
319 * (This implementation)
320 * Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
323 rglo = _mm_unpacklo_pi16(ro, go);
324 rgho = _mm_unpackhi_pi16(ro, go);
325 ylo_rg = _mm_madd_pi16(rglo, PW_F0299_F0337);
326 yho_rg = _mm_madd_pi16(rgho, PW_F0299_F0337);
328 rgle = _mm_unpacklo_pi16(re, ge);
329 rghe = _mm_unpackhi_pi16(re, ge);
330 yle_rg = _mm_madd_pi16(rgle, PW_F0299_F0337);
331 yhe_rg = _mm_madd_pi16(rghe, PW_F0299_F0337);
333 bglo = _mm_unpacklo_pi16(bo, go);
334 bgho = _mm_unpackhi_pi16(bo, go);
335 ylo_bg = _mm_madd_pi16(bglo, PW_F0114_F0250);
336 yho_bg = _mm_madd_pi16(bgho, PW_F0114_F0250);
338 ylo = _mm_add_pi32(ylo_bg, ylo_rg);
339 yho = _mm_add_pi32(yho_bg, yho_rg);
340 ylo = _mm_add_pi32(ylo, PD_ONEHALF);
341 yho = _mm_add_pi32(yho, PD_ONEHALF);
342 ylo = _mm_srli_pi32(ylo, SCALEBITS);
343 yho = _mm_srli_pi32(yho, SCALEBITS);
344 yo = _mm_packs_pi32(ylo, yho);
346 bgle = _mm_unpacklo_pi16(be, ge);
347 bghe = _mm_unpackhi_pi16(be, ge);
348 yle_bg = _mm_madd_pi16(bgle, PW_F0114_F0250);
349 yhe_bg = _mm_madd_pi16(bghe, PW_F0114_F0250);
351 yle = _mm_add_pi32(yle_bg, yle_rg);
352 yhe = _mm_add_pi32(yhe_bg, yhe_rg);
353 yle = _mm_add_pi32(yle, PD_ONEHALF);
354 yhe = _mm_add_pi32(yhe, PD_ONEHALF);
355 yle = _mm_srli_pi32(yle, SCALEBITS);
356 yhe = _mm_srli_pi32(yhe, SCALEBITS);
357 ye = _mm_packs_pi32(yle, yhe);
359 yo = _mm_slli_pi16(yo, BYTE_BIT);
360 y = _mm_or_si64(ye, yo);
362 _mm_store_si64((__m64 *)&outptr[0], y);
367 #undef mmA
368 #undef mmB
369 #undef mmC
370 #undef mmD
371 #undef mmE
372 #undef mmF
373 #undef mmG
374 #undef mmH