1 // GNU D Compiler SIMD support functions and intrinsics.
2 // Copyright (C) 2022-2024 Free Software Foundation, Inc.
4 // GCC is free software; you can redistribute it and/or modify it under
5 // the terms of the GNU General Public License as published by the Free
6 // Software Foundation; either version 3, or (at your option) any later
9 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
10 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 // Under Section 7 of GPL version 3, you are granted additional
15 // permissions described in the GCC Runtime Library Exception, version
16 // 3.1, as published by the Free Software Foundation.
18 // You should have received a copy of the GNU General Public License and
19 // a copy of the GCC Runtime Library Exception along with this program;
20 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
21 // <http://www.gnu.org/licenses/>.
32 * Emit prefetch instruction.
34 * address = address to be prefetched
35 * writeFetch = true for write fetch, false for read fetch
36 * locality = 0..3 (0 meaning least local, 3 meaning most local)
38 void prefetch(bool writeFetch
, ubyte locality
)(const(void)* address
)
40 static assert(locality
< 4, "0..3 expected for locality");
41 import gcc
.builtins
: __builtin_prefetch
;
42 __builtin_prefetch(address
, writeFetch
, locality
);
46 * Load unaligned vector from address.
47 * This is a compiler intrinsic.
49 * p = pointer to vector
53 V
loadUnaligned(V
)(const V
* p
) if (isVectorType
!V
);
56 * Store vector to unaligned address.
57 * This is a compiler intrinsic.
59 * p = pointer to vector
60 * value = value to store
64 V
storeUnaligned(V
)(V
* p
, V value
) if (isVectorType
!V
);
67 * Construct a permutation of elements from one or two vectors, returning a
68 * vector of the same type as the input vector(s). The `mask` is an integral
69 * vector with the same width and element count as the output vector.
73 * mask = integer vector mask
75 * vector with the same type as `op1` and `op2`
78 * int4 a = [1, 2, 3, 4];
79 * int4 b = [5, 6, 7, 8];
80 * int4 mask1 = [0, 1, 1, 3];
81 * int4 mask2 = [0, 4, 2, 5];
82 * assert(shuffle(a, mask1).array == [1, 2, 2, 4]);
83 * assert(shuffle(a, b, mask2).array == [1, 5, 3, 6]);
86 template shuffle(V0
, V1
, M
)
88 static assert(isVectorType
!V0
, "first argument must be vector");
89 static assert(isVectorType
!V1
, "second argument must be vector");
90 static assert(is(BaseType
!V0
== BaseType
!V1
),
91 "first and second argument vectors must have the same element type");
92 static assert(isVectorType
!M
&& is(BaseType
!M
: long),
93 "last argument must be an integer vector");
94 static assert(numElements
!V0
== numElements
!M
&& numElements
!V1
== numElements
!M
,
95 "argument vectors and mask vector should have the same number of elements");
96 static assert(BaseType
!V0
.sizeof
== BaseType
!M
.sizeof
,
97 "argument vectors and mask vector should have the same element type size");
99 V0
shuffle(V0 op1
, V1 op2
, M mask
);
103 template shuffle(V
, M
)
105 static assert(isVectorType
!V
, "first argument must be a vector");
106 static assert(isVectorType
!M
&& is(BaseType
!M
: long),
107 "last argument must be an integer vector");
108 static assert(numElements
!V
== numElements
!M
,
109 "argument vector and mask vector should have the same number of elements");
110 static assert(BaseType
!V
.sizeof
== BaseType
!M
.sizeof
,
111 "argument vector and mask vector should have the same element type size");
113 V
shuffle(V op1
, M mask
)
115 return shuffle(op1
, op1
, mask
);
120 * Construct a permutation of elements from two vectors, returning a vector with
121 * the same element type as the input vector(s), and same length as the `mask`.
125 * index = elements indices of the vectors that should be extracted and returned
127 * vector with the same element type as `op1` and `op2`, but has an element count
128 * equal to the number of indices in `index`.
131 * int8 a = [1, -2, 3, -4, 5, -6, 7, -8];
132 * int4 b = shufflevector(a, a, 0, 2, 4, 6);
133 * assert(b.array == [1, 3, 5, 7]);
134 * int4 c = [-2, -4, -6, -8];
135 * int d = shufflevector(c, b, 4, 0, 5, 1, 6, 2, 7, 3);
136 * assert(d.array == a.array);
139 template shufflevector(V1
, V2
, M
...)
141 static assert(isVectorType
!V1
, "first argument must be vector");
142 static assert(isVectorType
!V2
, "second argument must be vector");
143 static assert(is(BaseType
!V1
== BaseType
!V2
),
144 "first and second argument vectors must have the same element type");
145 static assert(isPowerOf2
!(M
.length
),
146 "number of index arguments must be a power of 2");
148 __vector(BaseType
!V1
[M
.length
]) shufflevector(V1 op1
, V2 op2
, M index
);
152 template shufflevector(V
, index
...)
154 // Defined for compatibility with LDC.
155 static assert(isVectorType
!V
, "first argument must be a vector type");
156 static assert(numElements
!V
== index
.length
,
157 "number of index arguments must be the same number of vector elements");
159 private template ctfeConstants(m
...)
161 static if (m
.length
== 0) enum ctfeConstants
= 1;
162 else enum ctfeConstants
= m
[0] | ctfeConstants
!(m
[1 .. $]);
164 static assert(__traits(compiles
, ctfeConstants
!index
),
165 "all index arguments must be compile time constants");
167 private template validIndexes(m
...)
169 static if (m
.length
== 0) enum validIndexes
= true;
170 else enum validIndexes
= (cast(long)m
[0] > -1) && validIndexes
!(m
[1 .. $]);
172 static assert(validIndexes
!index
,
173 "all index arguments must be greater than or equal to 0");
175 V
shufflevector(V op1
, V op2
)
177 return shufflevector(op1
, op2
, index
);
182 * Extracts a single scalar element from a vector at a specified index.
183 * Defined for compatibility with LDC.
185 * val = vector to extract element from
186 * idx = index indicating the position from which to extract the element
188 * scalar of the same type as the element type of val
191 * int4 a = [0, 10, 20, 30];
192 * int k = extractelement!(int4, 2)(a);
196 BaseType
!V
extractelement(V
, int idx
)(V val
)
197 if (isVectorType
!V
&& idx
< numElements
!V
)
203 * Inserts a scalar element into a vector at a specified index.
204 * Defined for compatibility with LDC.
206 * val = vector to assign element to
207 * elt = scalar whose type is the element type of val
208 * idx = index indicating the position from which to extract the element
210 * vector of the same type as val
213 * int4 a = [0, 10, 20, 30];
214 * int4 b = insertelement!(int4, 2)(a, 50);
215 * assert(b.array == [0, 10, 50, 30]);
218 V
insertelement(V
, int idx
)(V val
, BaseType
!V elt
)
219 if (isVectorType
!V
&& idx
< numElements
!V
)
226 * Convert a vector from one integral or floating vector type to another.
227 * The result is an integral or floating vector that has had every element
228 * cast to the element type of the return type.
230 * from = input vector
235 * int4 a = [1, -2, 3, -4];
236 * float4 b = [1.5, -2.5, 3, 7];
237 * assert(convertvector!float4(a).array == [1, -2, 3, -4]);
238 * assert(convertvector!double4(a).array == [1, -2, 3, -4]);
239 * assert(convertvector!double4(b).array == [1.5, -2.5, 3, 7]);
240 * assert(convertvector!int4(b).array == [1, -2, 3, 7]);
244 template convertvector(V
, T
)
246 static assert(isVectorType
!V
&& (is(BaseType
!V
: long) ||
is(BaseType
!V
: real)),
247 "first argument must be an integer or floating vector type");
248 static assert(isVectorType
!T
&& (is(BaseType
!T
: long) ||
is(BaseType
!T
: real)),
249 "second argument must be an integer or floating vector");
250 static assert(numElements
!V
== numElements
!T
,
251 "first and second argument vectors should have the same number of elements");
257 * Construct a conditional merge of elements from two vectors, returning a
258 * vector of the same type as the input vector(s). The `mask` is an integral
259 * vector with the same width and element count as the output vector.
263 * mask = integer vector mask
265 * vector with the same type as `op1` and `op2`
268 * int4 a = [1, 2, 3, 4];
269 * int4 b = [5, 6, 7, 8];
270 * int4 mask1 = [0, 1, 1, 3];
271 * int4 mask2 = [0, 4, 2, 5];
272 * assert(shuffle(a, mask1).array == [1, 2, 2, 4]);
273 * assert(shuffle(a, b, mask2).array == [1, 5, 3, 6]);
276 template blendvector(V0
, V1
, M
)
278 static assert(isVectorType
!V0
, "first argument must be vector");
279 static assert(isVectorType
!V1
, "second argument must be vector");
280 static assert(is(BaseType
!V0
== BaseType
!V1
),
281 "first and second argument vectors must have the same element type");
282 static assert(isVectorType
!M
&& is(BaseType
!M
: long),
283 "last argument must be an integer vector");
284 static assert(numElements
!V0
== numElements
!M
&& numElements
!V1
== numElements
!M
,
285 "argument vectors and mask vector should have the same number of elements");
286 static assert(BaseType
!V0
.sizeof
== BaseType
!M
.sizeof
,
287 "argument vectors and mask vector should have the same element type size");
289 V0
blendvector(V0 op1
, V1 op2
, M mask
);
293 * Perform an element-wise comparison between two vectors, producing `0` when
294 * the comparison is false and `-1` (all bits are set to 1) otherwise.
299 * vector of the same width and number of elements as the comparison
300 * operands with a signed integral element type
303 * float4 a = [1, 3, 5, 7];
304 * float4 b = [2, 3, 4, 5];
305 * int4 c = greaterMask!float4(a, b);
306 * assert(c.array == [0, 0, -1, -1]);
309 V
equalMask(V
)(V op1
, V op2
) if (isVectorType
!V
)
314 V
notEqualMask(V
)(V op1
, V op2
) if (isVectorType
!V
)
319 V
greaterMask(V
)(V op1
, V op2
) if (isVectorType
!V
)
324 V
greaterOrEqualMask(V
)(V op1
, V op2
) if (isVectorType
!V
)
330 * Perform an element-wise logical comparison between two vectors, producing
331 * `0` when the comparison is false and `-1` (all bits are set to 1) otherwise.
336 * vector of the same width and number of elements as the comparison
337 * operands with a signed integral element type
339 V
notMask(V
)(V op1
) if (isVectorType
!V
)
345 V
andAndMask(V
)(V op1
, V op2
) if (isVectorType
!V
)
347 return (op1
!= 0) & (op2
!= 0);
351 V
orOrMask(V
)(V op1
, V op2
) if (isVectorType
!V
)
353 return (op1
!= 0) |
(op2
!= 0);
356 // Private helper templates.
359 enum bool isVectorType(T
) = is(T
: __vector(V
[N
]), V
, size_t N
);
363 alias typeof(V
.array
[0]) BaseType
;
366 template numElements(V
)
368 enum numElements
= V
.sizeof
/ BaseType
!(V
).sizeof
;
371 enum bool isPowerOf2(int Y
) = Y
&& (Y
& -Y
) == Y
;