1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 using System
.Runtime
.CompilerServices
;
7 namespace System
.Runtime
.Intrinsics
.X86
10 /// This class provides access to Intel SSE hardware instructions via intrinsics
14 public abstract class Sse
18 public static bool IsSupported { get => IsSupported; }
21 public abstract class X64
25 public static bool IsSupported { get => IsSupported; }
28 /// __int64 _mm_cvtss_si64 (__m128 a)
29 /// CVTSS2SI r64, xmm/m32
30 /// This intrinisc is only available on 64-bit processes
32 public static long ConvertToInt64(Vector128
<float> value) => ConvertToInt64(value);
34 /// __m128 _mm_cvtsi64_ss (__m128 a, __int64 b)
35 /// CVTSI2SS xmm, reg/m64
36 /// This intrinisc is only available on 64-bit processes
38 public static Vector128
<float> ConvertScalarToVector128Single(Vector128
<float> upper
, long value) => ConvertScalarToVector128Single(upper
, value);
41 /// __int64 _mm_cvttss_si64 (__m128 a)
42 /// CVTTSS2SI r64, xmm/m32
43 /// This intrinisc is only available on 64-bit processes
45 public static long ConvertToInt64WithTruncation(Vector128
<float> value) => ConvertToInt64WithTruncation(value);
49 /// __m128 _mm_add_ps (__m128 a, __m128 b)
50 /// ADDPS xmm, xmm/m128
52 public static Vector128
<float> Add(Vector128
<float> left
, Vector128
<float> right
) => Add(left
, right
);
55 /// __m128 _mm_add_ss (__m128 a, __m128 b)
56 /// ADDSS xmm, xmm/m32
58 public static Vector128
<float> AddScalar(Vector128
<float> left
, Vector128
<float> right
) => AddScalar(left
, right
);
61 /// __m128 _mm_and_ps (__m128 a, __m128 b)
62 /// ANDPS xmm, xmm/m128
64 public static Vector128
<float> And(Vector128
<float> left
, Vector128
<float> right
) => And(left
, right
);
67 /// __m128 _mm_andnot_ps (__m128 a, __m128 b)
68 /// ANDNPS xmm, xmm/m128
70 public static Vector128
<float> AndNot(Vector128
<float> left
, Vector128
<float> right
) => AndNot(left
, right
);
73 /// __m128 _mm_cmpeq_ps (__m128 a, __m128 b)
74 /// CMPPS xmm, xmm/m128, imm8(0)
76 public static Vector128
<float> CompareEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareEqual(left
, right
);
79 /// int _mm_comieq_ss (__m128 a, __m128 b)
80 /// COMISS xmm, xmm/m32
82 public static bool CompareScalarOrderedEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarOrderedEqual(left
, right
);
85 /// int _mm_ucomieq_ss (__m128 a, __m128 b)
86 /// UCOMISS xmm, xmm/m32
88 public static bool CompareScalarUnorderedEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarUnorderedEqual(left
, right
);
91 /// __m128 _mm_cmpeq_ss (__m128 a, __m128 b)
92 /// CMPSS xmm, xmm/m32, imm8(0)
94 public static Vector128
<float> CompareScalarEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarEqual(left
, right
);
97 /// __m128 _mm_cmpgt_ps (__m128 a, __m128 b)
98 /// CMPPS xmm, xmm/m128, imm8(6)
100 public static Vector128
<float> CompareGreaterThan(Vector128
<float> left
, Vector128
<float> right
) => CompareGreaterThan(left
, right
);
103 /// int _mm_comigt_ss (__m128 a, __m128 b)
104 /// COMISS xmm, xmm/m32
106 public static bool CompareScalarOrderedGreaterThan(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarOrderedGreaterThan(left
, right
);
109 /// int _mm_ucomigt_ss (__m128 a, __m128 b)
110 /// UCOMISS xmm, xmm/m32
112 public static bool CompareScalarUnorderedGreaterThan(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarUnorderedGreaterThan(left
, right
);
115 /// __m128 _mm_cmpgt_ss (__m128 a, __m128 b)
116 /// CMPSS xmm, xmm/m32, imm8(6)
118 public static Vector128
<float> CompareScalarGreaterThan(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarGreaterThan(left
, right
);
121 /// __m128 _mm_cmpge_ps (__m128 a, __m128 b)
122 /// CMPPS xmm, xmm/m128, imm8(5)
124 public static Vector128
<float> CompareGreaterThanOrEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareGreaterThanOrEqual(left
, right
);
127 /// int _mm_comige_ss (__m128 a, __m128 b)
128 /// COMISS xmm, xmm/m32
130 public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarOrderedGreaterThanOrEqual(left
, right
);
133 /// int _mm_ucomige_ss (__m128 a, __m128 b)
134 /// UCOMISS xmm, xmm/m32
136 public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarUnorderedGreaterThanOrEqual(left
, right
);
139 /// __m128 _mm_cmpge_ss (__m128 a, __m128 b)
140 /// CMPPS xmm, xmm/m32, imm8(5)
142 public static Vector128
<float> CompareScalarGreaterThanOrEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarGreaterThanOrEqual(left
, right
);
145 /// __m128 _mm_cmplt_ps (__m128 a, __m128 b)
146 /// CMPPS xmm, xmm/m128, imm8(1)
148 public static Vector128
<float> CompareLessThan(Vector128
<float> left
, Vector128
<float> right
) => CompareLessThan(left
, right
);
151 /// int _mm_comilt_ss (__m128 a, __m128 b)
152 /// COMISS xmm, xmm/m32
154 public static bool CompareScalarOrderedLessThan(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarOrderedLessThan(left
, right
);
157 /// int _mm_ucomilt_ss (__m128 a, __m128 b)
158 /// UCOMISS xmm, xmm/m32
160 public static bool CompareScalarUnorderedLessThan(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarUnorderedLessThan(left
, right
);
163 /// __m128 _mm_cmplt_ss (__m128 a, __m128 b)
164 /// CMPSS xmm, xmm/m32, imm8(1)
166 public static Vector128
<float> CompareScalarLessThan(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarLessThan(left
, right
);
169 /// __m128 _mm_cmple_ps (__m128 a, __m128 b)
170 /// CMPPS xmm, xmm/m128, imm8(2)
172 public static Vector128
<float> CompareLessThanOrEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareLessThanOrEqual(left
, right
);
175 /// int _mm_comile_ss (__m128 a, __m128 b)
176 /// COMISS xmm, xmm/m32
178 public static bool CompareScalarOrderedLessThanOrEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarOrderedLessThanOrEqual(left
, right
);
181 /// int _mm_ucomile_ss (__m128 a, __m128 b)
182 /// UCOMISS xmm, xmm/m32
184 public static bool CompareScalarUnorderedLessThanOrEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarUnorderedLessThanOrEqual(left
, right
);
187 /// __m128 _mm_cmple_ss (__m128 a, __m128 b)
188 /// CMPSS xmm, xmm/m32, imm8(2)
190 public static Vector128
<float> CompareScalarLessThanOrEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarLessThanOrEqual(left
, right
);
193 /// __m128 _mm_cmpneq_ps (__m128 a, __m128 b)
194 /// CMPPS xmm, xmm/m128, imm8(4)
196 public static Vector128
<float> CompareNotEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareNotEqual(left
, right
);
199 /// int _mm_comineq_ss (__m128 a, __m128 b)
200 /// COMISS xmm, xmm/m32
202 public static bool CompareScalarOrderedNotEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarOrderedNotEqual(left
, right
);
205 /// int _mm_ucomineq_ss (__m128 a, __m128 b)
206 /// UCOMISS xmm, xmm/m32
208 public static bool CompareScalarUnorderedNotEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarUnorderedNotEqual(left
, right
);
211 /// __m128 _mm_cmpneq_ss (__m128 a, __m128 b)
212 /// CMPSS xmm, xmm/m32, imm8(4)
214 public static Vector128
<float> CompareScalarNotEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarNotEqual(left
, right
);
217 /// __m128 _mm_cmpngt_ps (__m128 a, __m128 b)
218 /// CMPPS xmm, xmm/m128, imm8(2)
220 public static Vector128
<float> CompareNotGreaterThan(Vector128
<float> left
, Vector128
<float> right
) => CompareNotGreaterThan(left
, right
);
223 /// __m128 _mm_cmpngt_ss (__m128 a, __m128 b)
224 /// CMPSS xmm, xmm/m32, imm8(2)
226 public static Vector128
<float> CompareScalarNotGreaterThan(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarNotGreaterThan(left
, right
);
229 /// __m128 _mm_cmpnge_ps (__m128 a, __m128 b)
230 /// CMPPS xmm, xmm/m128, imm8(1)
232 public static Vector128
<float> CompareNotGreaterThanOrEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareNotGreaterThanOrEqual(left
, right
);
235 /// __m128 _mm_cmpnge_ss (__m128 a, __m128 b)
236 /// CMPSS xmm, xmm/m32, imm8(1)
238 public static Vector128
<float> CompareScalarNotGreaterThanOrEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarNotGreaterThanOrEqual(left
, right
);
241 /// __m128 _mm_cmpnlt_ps (__m128 a, __m128 b)
242 /// CMPPS xmm, xmm/m128, imm8(5)
244 public static Vector128
<float> CompareNotLessThan(Vector128
<float> left
, Vector128
<float> right
) => CompareNotLessThan(left
, right
);
247 /// __m128 _mm_cmpnlt_ss (__m128 a, __m128 b)
248 /// CMPSS xmm, xmm/m32, imm8(5)
250 public static Vector128
<float> CompareScalarNotLessThan(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarNotLessThan(left
, right
);
253 /// __m128 _mm_cmpnle_ps (__m128 a, __m128 b)
254 /// CMPPS xmm, xmm/m128, imm8(6)
256 public static Vector128
<float> CompareNotLessThanOrEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareNotLessThanOrEqual(left
, right
);
259 /// __m128 _mm_cmpnle_ss (__m128 a, __m128 b)
260 /// CMPSS xmm, xmm/m32, imm8(6)
262 public static Vector128
<float> CompareScalarNotLessThanOrEqual(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarNotLessThanOrEqual(left
, right
);
265 /// __m128 _mm_cmpord_ps (__m128 a, __m128 b)
266 /// CMPPS xmm, xmm/m128, imm8(7)
268 public static Vector128
<float> CompareOrdered(Vector128
<float> left
, Vector128
<float> right
) => CompareOrdered(left
, right
);
271 /// __m128 _mm_cmpord_ss (__m128 a, __m128 b)
272 /// CMPSS xmm, xmm/m32, imm8(7)
274 public static Vector128
<float> CompareScalarOrdered(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarOrdered(left
, right
);
277 /// __m128 _mm_cmpunord_ps (__m128 a, __m128 b)
278 /// CMPPS xmm, xmm/m128, imm8(3)
280 public static Vector128
<float> CompareUnordered(Vector128
<float> left
, Vector128
<float> right
) => CompareUnordered(left
, right
);
283 /// __m128 _mm_cmpunord_ss (__m128 a, __m128 b)
284 /// CMPSS xmm, xmm/m32, imm8(3)
286 public static Vector128
<float> CompareScalarUnordered(Vector128
<float> left
, Vector128
<float> right
) => CompareScalarUnordered(left
, right
);
289 /// int _mm_cvtss_si32 (__m128 a)
290 /// CVTSS2SI r32, xmm/m32
292 public static int ConvertToInt32(Vector128
<float> value) => ConvertToInt32(value);
295 /// __m128 _mm_cvtsi32_ss (__m128 a, int b)
296 /// CVTSI2SS xmm, reg/m32
298 public static Vector128
<float> ConvertScalarToVector128Single(Vector128
<float> upper
, int value) => ConvertScalarToVector128Single(upper
, value);
301 /// int _mm_cvttss_si32 (__m128 a)
302 /// CVTTSS2SI r32, xmm/m32
304 public static int ConvertToInt32WithTruncation(Vector128
<float> value) => ConvertToInt32WithTruncation(value);
307 /// __m128 _mm_div_ps (__m128 a, __m128 b)
308 /// DIVPS xmm, xmm/m128
310 public static Vector128
<float> Divide(Vector128
<float> left
, Vector128
<float> right
) => Divide(left
, right
);
313 /// __m128 _mm_div_ss (__m128 a, __m128 b)
314 /// DIVSS xmm, xmm/m32
316 public static Vector128
<float> DivideScalar(Vector128
<float> left
, Vector128
<float> right
) => DivideScalar(left
, right
);
319 /// __m128 _mm_loadu_ps (float const* mem_address)
322 public static unsafe Vector128
<float> LoadVector128(float* address
) => LoadVector128(address
);
325 /// __m128 _mm_load_ss (float const* mem_address)
328 public static unsafe Vector128
<float> LoadScalarVector128(float* address
) => LoadScalarVector128(address
);
331 /// __m128 _mm_load_ps (float const* mem_address)
334 public static unsafe Vector128
<float> LoadAlignedVector128(float* address
) => LoadAlignedVector128(address
);
337 /// __m128 _mm_loadh_pi (__m128 a, __m64 const* mem_addr)
340 public static unsafe Vector128
<float> LoadHigh(Vector128
<float> lower
, float* address
) => LoadHigh(lower
, address
);
343 /// __m128 _mm_loadl_pi (__m128 a, __m64 const* mem_addr)
346 public static unsafe Vector128
<float> LoadLow(Vector128
<float> upper
, float* address
) => LoadLow(upper
, address
);
349 /// __m128 _mm_max_ps (__m128 a, __m128 b)
350 /// MAXPS xmm, xmm/m128
352 public static Vector128
<float> Max(Vector128
<float> left
, Vector128
<float> right
) => Max(left
, right
);
355 /// __m128 _mm_max_ss (__m128 a, __m128 b)
356 /// MAXSS xmm, xmm/m32
358 public static Vector128
<float> MaxScalar(Vector128
<float> left
, Vector128
<float> right
) => MaxScalar(left
, right
);
361 /// __m128 _mm_min_ps (__m128 a, __m128 b)
362 /// MINPS xmm, xmm/m128
364 public static Vector128
<float> Min(Vector128
<float> left
, Vector128
<float> right
) => Min(left
, right
);
367 /// __m128 _mm_min_ss (__m128 a, __m128 b)
368 /// MINSS xmm, xmm/m32
370 public static Vector128
<float> MinScalar(Vector128
<float> left
, Vector128
<float> right
) => MinScalar(left
, right
);
373 /// __m128 _mm_move_ss (__m128 a, __m128 b)
376 public static Vector128
<float> MoveScalar(Vector128
<float> upper
, Vector128
<float> value) => MoveScalar(upper
, value);
379 /// __m128 _mm_movehl_ps (__m128 a, __m128 b)
382 public static Vector128
<float> MoveHighToLow(Vector128
<float> left
, Vector128
<float> right
) => MoveHighToLow(left
, right
);
385 /// __m128 _mm_movelh_ps (__m128 a, __m128 b)
388 public static Vector128
<float> MoveLowToHigh(Vector128
<float> left
, Vector128
<float> right
) => MoveLowToHigh(left
, right
);
391 /// int _mm_movemask_ps (__m128 a)
392 /// MOVMSKPS reg, xmm
394 public static int MoveMask(Vector128
<float> value) => MoveMask(value);
397 /// __m128 _mm_mul_ps (__m128 a, __m128 b)
398 /// MULPS xmm, xmm/m128
400 public static Vector128
<float> Multiply(Vector128
<float> left
, Vector128
<float> right
) => Multiply(left
, right
);
403 /// __m128 _mm_mul_ss (__m128 a, __m128 b)
404 /// MULPS xmm, xmm/m32
406 public static Vector128
<float> MultiplyScalar(Vector128
<float> left
, Vector128
<float> right
) => MultiplyScalar(left
, right
);
409 /// __m128 _mm_or_ps (__m128 a, __m128 b)
410 /// ORPS xmm, xmm/m128
412 public static Vector128
<float> Or(Vector128
<float> left
, Vector128
<float> right
) => Or(left
, right
);
415 /// void _mm_prefetch(char* p, int i)
418 public static unsafe void Prefetch0(void* address
) => Prefetch0(address
);
421 /// void _mm_prefetch(char* p, int i)
424 public static unsafe void Prefetch1(void* address
) => Prefetch1(address
);
427 /// void _mm_prefetch(char* p, int i)
430 public static unsafe void Prefetch2(void* address
) => Prefetch2(address
);
433 /// void _mm_prefetch(char* p, int i)
436 public static unsafe void PrefetchNonTemporal(void* address
) => PrefetchNonTemporal(address
);
439 /// __m128 _mm_rcp_ps (__m128 a)
440 /// RCPPS xmm, xmm/m128
442 public static Vector128
<float> Reciprocal(Vector128
<float> value) => Reciprocal(value);
445 /// __m128 _mm_rcp_ss (__m128 a)
446 /// RCPSS xmm, xmm/m32
448 public static Vector128
<float> ReciprocalScalar(Vector128
<float> value) => ReciprocalScalar(value);
451 /// __m128 _mm_rcp_ss (__m128 a, __m128 b)
452 /// RCPSS xmm, xmm/m32
453 /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs.
455 public static Vector128
<float> ReciprocalScalar(Vector128
<float> upper
, Vector128
<float> value) => ReciprocalScalar(upper
, value);
458 /// __m128 _mm_rsqrt_ps (__m128 a)
459 /// RSQRTPS xmm, xmm/m128
461 public static Vector128
<float> ReciprocalSqrt(Vector128
<float> value) => ReciprocalSqrt(value);
464 /// __m128 _mm_rsqrt_ss (__m128 a)
465 /// RSQRTSS xmm, xmm/m32
467 public static Vector128
<float> ReciprocalSqrtScalar(Vector128
<float> value) => ReciprocalSqrtScalar(value);
470 /// __m128 _mm_rsqrt_ss (__m128 a, __m128 b)
471 /// RSQRTSS xmm, xmm/m32
472 /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs.
474 public static Vector128
<float> ReciprocalSqrtScalar(Vector128
<float> upper
, Vector128
<float> value) => ReciprocalSqrtScalar(upper
, value);
477 /// __m128 _mm_shuffle_ps (__m128 a, __m128 b, unsigned int control)
478 /// SHUFPS xmm, xmm/m128, imm8
480 public static Vector128
<float> Shuffle(Vector128
<float> left
, Vector128
<float> right
, byte control
) => Shuffle(left
, right
, control
);
483 /// __m128 _mm_sqrt_ps (__m128 a)
484 /// SQRTPS xmm, xmm/m128
486 public static Vector128
<float> Sqrt(Vector128
<float> value) => Sqrt(value);
489 /// __m128 _mm_sqrt_ss (__m128 a)
490 /// SQRTSS xmm, xmm/m32
492 public static Vector128
<float> SqrtScalar(Vector128
<float> value) => SqrtScalar(value);
495 /// __m128 _mm_sqrt_ss (__m128 a, __m128 b)
496 /// SQRTSS xmm, xmm/m32
497 /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs.
499 public static Vector128
<float> SqrtScalar(Vector128
<float> upper
, Vector128
<float> value) => SqrtScalar(upper
, value);
502 /// void _mm_store_ps (float* mem_addr, __m128 a)
505 public static unsafe void StoreAligned(float* address
, Vector128
<float> source
) => StoreAligned(address
, source
);
508 /// void _mm_stream_ps (float* mem_addr, __m128 a)
509 /// MOVNTPS m128, xmm
511 public static unsafe void StoreAlignedNonTemporal(float* address
, Vector128
<float> source
) => StoreAlignedNonTemporal(address
, source
);
514 /// void _mm_storeu_ps (float* mem_addr, __m128 a)
517 public static unsafe void Store(float* address
, Vector128
<float> source
) => Store(address
, source
);
520 /// void _mm_sfence(void)
523 public static void StoreFence() => StoreFence();
526 /// void _mm_store_ss (float* mem_addr, __m128 a)
529 public static unsafe void StoreScalar(float* address
, Vector128
<float> source
) => StoreScalar(address
, source
);
532 /// void _mm_storeh_pi (__m64* mem_addr, __m128 a)
535 public static unsafe void StoreHigh(float* address
, Vector128
<float> source
) => StoreHigh(address
, source
);
538 /// void _mm_storel_pi (__m64* mem_addr, __m128 a)
541 public static unsafe void StoreLow(float* address
, Vector128
<float> source
) => StoreLow(address
, source
);
544 /// __m128d _mm_sub_ps (__m128d a, __m128d b)
545 /// SUBPS xmm, xmm/m128
547 public static Vector128
<float> Subtract(Vector128
<float> left
, Vector128
<float> right
) => Subtract(left
, right
);
550 /// __m128 _mm_sub_ss (__m128 a, __m128 b)
551 /// SUBSS xmm, xmm/m32
553 public static Vector128
<float> SubtractScalar(Vector128
<float> left
, Vector128
<float> right
) => SubtractScalar(left
, right
);
556 /// __m128 _mm_unpackhi_ps (__m128 a, __m128 b)
557 /// UNPCKHPS xmm, xmm/m128
559 public static Vector128
<float> UnpackHigh(Vector128
<float> left
, Vector128
<float> right
) => UnpackHigh(left
, right
);
562 /// __m128 _mm_unpacklo_ps (__m128 a, __m128 b)
563 /// UNPCKLPS xmm, xmm/m128
565 public static Vector128
<float> UnpackLow(Vector128
<float> left
, Vector128
<float> right
) => UnpackLow(left
, right
);
568 /// __m128 _mm_xor_ps (__m128 a, __m128 b)
569 /// XORPS xmm, xmm/m128
571 public static Vector128
<float> Xor(Vector128
<float> left
, Vector128
<float> right
) => Xor(left
, right
);