1 /* brig-basic-inst-handler.cc -- brig basic instruction handling
2 Copyright (C) 2016-2018 Free Software Foundation, Inc.
3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
4 for General Processor Tech.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "brig-code-entry-handler.h"
25 #include "brig-util.h"
28 #include "gimple-expr.h"
30 #include "print-tree.h"
31 #include "tree-pretty-print.h"
32 #include "langhooks.h"
33 #include "stor-layout.h"
34 #include "diagnostic-core.h"
35 #include "brig-builtins.h"
36 #include "fold-const.h"
38 brig_basic_inst_handler::brig_basic_inst_handler (brig_to_generic
&parent
)
39 : brig_code_entry_handler (parent
)
43 class scalarized_sat_arithmetics
: public tree_element_binary_visitor
46 scalarized_sat_arithmetics (const BrigInstBase
&brig_inst
)
47 : m_brig_inst (brig_inst
)
49 BrigType16_t element_type
= brig_inst
.type
& BRIG_TYPE_BASE_MASK
;
51 #undef DEF_HSAIL_SAT_BUILTIN
52 #undef DEF_HSAIL_BUILTIN
53 #undef DEF_HSAIL_ATOMIC_BUILTIN
54 #undef DEF_HSAIL_INTR_BUILTIN
55 #undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN
57 #define DEF_HSAIL_SAT_BUILTIN(ENUM, BRIG_OPCODE, HSAIL_TYPE, \
59 if (brig_inst.opcode == BRIG_OPCODE && element_type == HSAIL_TYPE) \
60 m_builtin = builtin_decl_explicit (ENUM); \
62 #include "brig-builtins.def"
67 visit_element (brig_code_entry_handler
&, tree operand0
, tree operand1
)
69 /* Implement saturating arithmetics with scalar built-ins for now.
70 TODO: emit GENERIC nodes for the simplest cases or at least
71 emit vector built-ins. */
72 return call_builtin (m_builtin
, 2, TREE_TYPE (operand0
),
73 TREE_TYPE (operand0
), operand0
,
74 TREE_TYPE (operand1
), operand1
);
76 const BrigInstBase
&m_brig_inst
;
80 /* Implements a vector shuffle. ARITH_TYPE is the type of the vector,
81 OPERANDS[0] is the first vector, OPERAND[1] the second vector and
82 OPERANDS[2] the shuffle mask in HSAIL format. The output is a VEC_PERM_EXPR
83 that implements the shuffle as a GENERIC expression. */
86 brig_basic_inst_handler::build_shuffle (tree arith_type
,
87 tree_stl_vec
&operands
)
90 = get_unsigned_int_type (TREE_TYPE (TREE_TYPE (operands
[0])));
92 /* Offsets to add to the mask values to convert from the
93 HSAIL mask to VEC_PERM_EXPR masks. VEC_PERM_EXPR mask
94 assumes an index spanning from 0 to 2 times the vec
95 width while HSAIL refers separately to two different
96 input vectors, thus is not a "full shuffle" where all
97 output elements can originate from any input element. */
98 vec
<constructor_elt
, va_gc
> *mask_offset_vals
= NULL
;
100 unsigned int element_count
= gccbrig_type_vector_subparts (arith_type
);
102 vec
<constructor_elt
, va_gc
> *input_mask_vals
= NULL
;
103 size_t input_mask_element_size
= exact_log2 (element_count
);
105 /* Unpack the tightly packed mask elements to BIT_FIELD_REFs
106 from which to construct the mask vector as understood by
108 tree mask_operand
= add_temp_var ("shuffle_mask", operands
[2]);
110 tree mask_element_type
111 = build_nonstandard_integer_type (input_mask_element_size
, true);
113 for (size_t i
= 0; i
< element_count
; ++i
)
116 = build3 (BIT_FIELD_REF
, mask_element_type
, mask_operand
,
117 bitsize_int (input_mask_element_size
),
118 bitsize_int (i
* input_mask_element_size
));
120 mask_element
= convert (element_type
, mask_element
);
123 if (i
< element_count
/ 2)
124 offset
= build_int_cst (element_type
, 0);
126 offset
= build_int_cst (element_type
, element_count
);
128 CONSTRUCTOR_APPEND_ELT (mask_offset_vals
, NULL_TREE
, offset
);
129 CONSTRUCTOR_APPEND_ELT (input_mask_vals
, NULL_TREE
, mask_element
);
131 tree mask_vec_type
= build_vector_type (element_type
, element_count
);
133 tree mask_vec
= build_constructor (mask_vec_type
, input_mask_vals
);
134 tree offset_vec
= build_constructor (mask_vec_type
, mask_offset_vals
);
136 tree mask
= build2 (PLUS_EXPR
, mask_vec_type
, mask_vec
, offset_vec
);
138 tree perm
= build3 (VEC_PERM_EXPR
, TREE_TYPE (operands
[0]), operands
[0],
143 /* Unpacks (extracts) a scalar element with an index in OPERANDS[1]
144 from the vector expression in OPERANDS[0]. */
147 brig_basic_inst_handler::build_unpack (tree_stl_vec
&operands
)
149 /* Implement the unpack with a shuffle that stores the unpacked
150 element to the lowest bit positions in the dest. After that
151 a bitwise AND is used to clear the uppermost bits. */
152 tree src_element_type
= TREE_TYPE (TREE_TYPE (operands
[0]));
154 /* Perform the operations with a raw (unsigned int type) type. */
155 tree element_type
= get_unsigned_int_type (src_element_type
);
157 vec
<constructor_elt
, va_gc
> *input_mask_vals
= NULL
;
158 vec
<constructor_elt
, va_gc
> *and_mask_vals
= NULL
;
161 = gccbrig_type_vector_subparts (TREE_TYPE (operands
[0]));
162 tree vec_type
= build_vector_type (element_type
, element_count
);
164 for (size_t i
= 0; i
< element_count
; ++i
)
168 mask_element
= convert (element_type
, operands
[1]);
170 mask_element
= build_int_cst (element_type
, 0);
172 CONSTRUCTOR_APPEND_ELT (input_mask_vals
, NULL_TREE
, mask_element
);
174 tree and_mask_element
;
176 and_mask_element
= build_int_cst (element_type
, -1);
178 and_mask_element
= build_int_cst (element_type
, 0);
179 CONSTRUCTOR_APPEND_ELT (and_mask_vals
, NULL_TREE
, and_mask_element
);
182 tree mask_vec
= build_constructor (vec_type
, input_mask_vals
);
184 tree and_mask_vec
= build_constructor (vec_type
, and_mask_vals
);
186 tree perm
= build3 (VEC_PERM_EXPR
, vec_type
,
187 build_resize_convert_view (vec_type
, operands
[0]),
188 build_resize_convert_view (vec_type
, operands
[0]),
191 tree cleared
= build2 (BIT_AND_EXPR
, vec_type
, perm
, and_mask_vec
);
193 size_t s
= int_size_in_bytes (TREE_TYPE (cleared
)) * BITS_PER_UNIT
;
194 tree raw_type
= build_nonstandard_integer_type (s
, true);
196 tree as_int
= build_resize_convert_view (raw_type
, cleared
);
198 if (int_size_in_bytes (src_element_type
) < 4)
200 if (INTEGRAL_TYPE_P (src_element_type
))
201 return extend_int (as_int
, uint32_type_node
, src_element_type
);
206 /* Packs (inserts) a scalar element in OPERANDS[1]
207 to the vector in OPERANDS[0] at element position defined by
211 brig_basic_inst_handler::build_pack (tree_stl_vec
&operands
)
213 /* Implement using a bit level insertion.
214 TODO: Reuse this for implementing 'bitinsert'
215 without a builtin call. */
217 size_t ecount
= gccbrig_type_vector_subparts (TREE_TYPE (operands
[0]));
218 size_t vecsize
= int_size_in_bytes (TREE_TYPE (operands
[0])) * BITS_PER_UNIT
;
219 tree wide_type
= build_nonstandard_integer_type (vecsize
, 1);
221 tree src_vect
= build_resize_convert_view (wide_type
, operands
[0]);
222 src_vect
= add_temp_var ("src_vect", src_vect
);
224 tree scalar
= operands
[1];
225 scalar
= add_temp_var ("scalar", convert_to_integer (wide_type
, scalar
));
227 tree pos
= operands
[2];
229 /* The upper bits of the position can contain garbage.
230 Zero them for well-defined semantics. */
231 tree t
= build2 (BIT_AND_EXPR
, TREE_TYPE (pos
), operands
[2],
232 build_int_cstu (TREE_TYPE (pos
), ecount
- 1));
233 pos
= add_temp_var ("pos", convert (wide_type
, t
));
235 tree element_type
= TREE_TYPE (TREE_TYPE (operands
[0]));
236 size_t element_width
= int_size_in_bytes (element_type
) * BITS_PER_UNIT
;
237 tree ewidth
= build_int_cstu (wide_type
, element_width
);
239 tree bitoffset
= build2 (MULT_EXPR
, wide_type
, ewidth
, pos
);
240 bitoffset
= add_temp_var ("offset", bitoffset
);
243 = element_width
== 64 ? (uint64_t) -1 : ((uint64_t) 1 << element_width
) - 1;
245 tree mask
= build_int_cstu (wide_type
, mask_int
);
247 mask
= add_temp_var ("mask", convert_to_integer (wide_type
, mask
));
250 = build1 (BIT_NOT_EXPR
, wide_type
,
251 build2 (LSHIFT_EXPR
, wide_type
, mask
, bitoffset
));
254 = build2 (BIT_AND_EXPR
, wide_type
, src_vect
, clearing_mask
);
256 /* TODO: Is the AND necessary: does HSA define what
257 happens if the upper bits in the inserted element are not
259 tree element_in_position
260 = build2 (LSHIFT_EXPR
, wide_type
,
261 build2 (BIT_AND_EXPR
, wide_type
, scalar
, mask
), bitoffset
);
264 = build2 (BIT_IOR_EXPR
, wide_type
, zeroed_element
, element_in_position
);
268 /* Implement the unpack{lo,hi}. BRIG_OPCODE should tell which one and
269 ARITH_TYPE describe the type of the vector arithmetics.
270 OPERANDS[0] and OPERANDS[1] are the input vectors. */
273 brig_basic_inst_handler::build_unpack_lo_or_hi (BrigOpcode16_t brig_opcode
,
275 tree_stl_vec
&operands
)
277 tree element_type
= get_unsigned_int_type (TREE_TYPE (arith_type
));
279 = build_vector_type (element_type
,
280 gccbrig_type_vector_subparts (arith_type
));
282 size_t element_count
= gccbrig_type_vector_subparts (arith_type
);
283 vec
<constructor_elt
, va_gc
> *input_mask_vals
= NULL
;
285 size_t offset
= (brig_opcode
== BRIG_OPCODE_UNPACKLO
) ? 0 : element_count
/ 2;
287 for (size_t i
= 0; i
< element_count
/ 2; ++i
)
289 CONSTRUCTOR_APPEND_ELT (input_mask_vals
, NULL_TREE
,
290 build_int_cst (element_type
, offset
+ i
));
291 CONSTRUCTOR_APPEND_ELT (input_mask_vals
, NULL_TREE
,
292 build_int_cst (element_type
,
293 offset
+ i
+ element_count
));
296 tree mask_vec
= build_constructor (mask_vec_type
, input_mask_vals
);
298 tree perm
= build3 (VEC_PERM_EXPR
, TREE_TYPE (operands
[0]), operands
[0],
299 operands
[1], mask_vec
);
303 /* Builds a basic instruction expression from a BRIG instruction. BRIG_OPCODE
304 is the opcode, BRIG_TYPE the brig type of the instruction, ARITH_TYPE the
305 desired tree type for the instruction, and OPERANDS the instruction's
306 input operands already converted to tree nodes. */
309 brig_basic_inst_handler::build_inst_expr (BrigOpcode16_t brig_opcode
,
310 BrigType16_t brig_type
,
312 tree_stl_vec
&operands
)
314 tree_code opcode
= get_tree_code_for_hsa_opcode (brig_opcode
, brig_type
);
316 BrigType16_t inner_type
= brig_type
& BRIG_TYPE_BASE_MASK
;
318 tree instr_inner_type
319 = VECTOR_TYPE_P (arith_type
) ? TREE_TYPE (arith_type
) : arith_type
;
321 if (opcode
== RSHIFT_EXPR
|| opcode
== LSHIFT_EXPR
)
323 /* HSA defines modulo/clipping behavior for shift amounts larger
324 than the bit width, while tree.def leaves it undefined.
325 We need to mask the upper bits to ensure the defined behavior. */
327 = build_int_cst (instr_inner_type
,
328 gccbrig_hsa_type_bit_size (inner_type
) - 1);
330 tree mask
= VECTOR_TYPE_P (arith_type
)
331 ? build_vector_from_val (arith_type
, scalar_mask
)
334 /* The shift amount is a scalar, broadcast it to produce
336 if (VECTOR_TYPE_P (arith_type
))
337 operands
[1] = build_vector_from_val (arith_type
, operands
[1]);
338 operands
[1] = build2 (BIT_AND_EXPR
, arith_type
, operands
[1], mask
);
341 size_t input_count
= operands
.size ();
342 size_t output_count
= gccbrig_hsa_opcode_op_output_p (brig_opcode
, 0) ?
345 if (opcode
== TREE_LIST
)
347 /* There was no direct GENERIC opcode for the instruction;
348 try to emulate it with a chain of GENERIC nodes. */
349 if (brig_opcode
== BRIG_OPCODE_MAD
|| brig_opcode
== BRIG_OPCODE_MAD24
)
351 /* There doesn't seem to be a "standard" MAD built-in in gcc so let's
352 use a chain of multiply + add for now (double rounding method).
353 It should be easier for optimizers than a custom built-in call
354 WIDEN_MULT_EXPR is close, but requires a double size result
357 = build2 (MULT_EXPR
, arith_type
, operands
[0], operands
[1]);
358 return build2 (PLUS_EXPR
, arith_type
, mult_res
, operands
[2]);
360 else if (brig_opcode
== BRIG_OPCODE_MAD24HI
)
363 = build2 (MULT_HIGHPART_EXPR
, arith_type
, operands
[0], operands
[1]);
364 return build2 (PLUS_EXPR
, arith_type
, mult_res
, operands
[2]);
366 else if (brig_opcode
== BRIG_OPCODE_SHUFFLE
)
368 return build_shuffle (arith_type
, operands
);
370 else if (brig_opcode
== BRIG_OPCODE_UNPACKLO
371 || brig_opcode
== BRIG_OPCODE_UNPACKHI
)
373 return build_unpack_lo_or_hi (brig_opcode
, arith_type
, operands
);
375 else if (brig_opcode
== BRIG_OPCODE_UNPACK
)
377 return build_unpack (operands
);
379 else if (brig_opcode
== BRIG_OPCODE_PACK
)
381 return build_pack (operands
);
383 else if (brig_opcode
== BRIG_OPCODE_NRSQRT
)
385 /* Implement as 1.0/sqrt (x) and assume gcc instruction selects to
386 native ISA other than a division, if available.
387 TODO: this will happen only with unsafe math optimizations
388 on which cannot be used in general to remain HSAIL compliant.
389 Perhaps a builtin call would be better option here. */
390 return build2 (RDIV_EXPR
, arith_type
, build_one_cst (arith_type
),
391 expand_or_call_builtin (BRIG_OPCODE_SQRT
, brig_type
,
392 arith_type
, operands
));
394 else if (brig_opcode
== BRIG_OPCODE_NRCP
)
396 /* Implement as 1.0/x and assume gcc instruction selects to
397 native ISA other than a division, if available. */
398 return build2 (RDIV_EXPR
, arith_type
, build_one_cst (arith_type
),
401 else if (brig_opcode
== BRIG_OPCODE_LANEID
402 || brig_opcode
== BRIG_OPCODE_MAXWAVEID
403 || brig_opcode
== BRIG_OPCODE_WAVEID
)
405 /* Assuming WAVESIZE 1 (for now), therefore LANEID, WAVEID and
406 MAXWAVEID always return 0. */
407 return build_zero_cst (arith_type
);
412 else if (opcode
== CALL_EXPR
)
413 return expand_or_call_builtin (brig_opcode
, brig_type
, arith_type
,
415 else if (output_count
== 1)
417 if (input_count
== 1)
419 if (opcode
== MODIFY_EXPR
)
422 return build1 (opcode
, arith_type
, operands
[0]);
424 else if (input_count
== 2)
425 return build2 (opcode
, arith_type
, operands
[0], operands
[1]);
426 else if (input_count
== 3)
427 return build3 (opcode
, arith_type
, operands
[0], operands
[1],
438 /* Handles the basic instructions, including packed instructions. Deals
439 with the different packing modes by unpacking/packing the wanted
440 elements. Delegates most of the instruction cases to build_inst_expr(). */
443 brig_basic_inst_handler::operator () (const BrigBase
*base
)
445 const BrigInstBase
*brig_inst
= (const BrigInstBase
*) base
;
447 tree_stl_vec operands
= build_operands (*brig_inst
);
450 = gccbrig_hsa_opcode_op_output_p (brig_inst
->opcode
, 0) ? 1 : 0;
452 = operands
.size () == 0 ? 0 : (operands
.size () - output_count
);
454 gcc_assert (output_count
== 0 || output_count
== 1);
456 tree_stl_vec::iterator first_input_i
= operands
.begin ();
457 if (output_count
> 0 && operands
.size () > 0)
460 tree_stl_vec in_operands
;
461 in_operands
.assign (first_input_i
, operands
.end ());
463 BrigType16_t brig_inst_type
= brig_inst
->type
;
465 if (brig_inst
->opcode
== BRIG_OPCODE_NOP
)
466 return base
->byteCount
;
467 else if (brig_inst
->opcode
== BRIG_OPCODE_FIRSTBIT
468 || brig_inst
->opcode
== BRIG_OPCODE_LASTBIT
469 || brig_inst
->opcode
== BRIG_OPCODE_SAD
)
470 /* These instructions are reported to be always 32b in HSAIL, but we want
471 to treat them according to their input argument's type to select the
472 correct instruction/builtin. */
474 = gccbrig_tree_type_to_hsa_type (TREE_TYPE (in_operands
[0]));
476 tree instr_type
= gccbrig_tree_type_for_hsa_type (brig_inst_type
);
481 return base
->byteCount
;
484 bool is_vec_instr
= hsa_type_packed_p (brig_inst_type
);
486 size_t element_size_bits
;
487 size_t element_count
;
491 BrigType16_t brig_element_type
= brig_inst_type
& BRIG_TYPE_BASE_MASK
;
492 element_size_bits
= gccbrig_hsa_type_bit_size (brig_element_type
);
493 element_count
= gccbrig_hsa_type_bit_size (brig_inst_type
)
494 / gccbrig_hsa_type_bit_size (brig_element_type
);
498 element_size_bits
= gccbrig_hsa_type_bit_size (brig_inst_type
);
502 /* The actual arithmetics type that should be performed with the
503 operation. This is not always the same as the original BRIG
504 opcode's type due to implicit conversions of storage-only f16. */
505 tree arith_type
= gccbrig_is_bit_operation (brig_inst
->opcode
)
506 ? gccbrig_tree_type_for_hsa_type (brig_inst_type
)
507 : get_tree_expr_type_for_hsa_type (brig_inst_type
);
509 tree instr_expr
= NULL_TREE
;
511 BrigPack8_t p
= BRIG_PACK_NONE
;
512 if (brig_inst
->base
.kind
== BRIG_KIND_INST_MOD
)
513 p
= ((const BrigInstMod
*) brig_inst
)->pack
;
514 else if (brig_inst
->base
.kind
== BRIG_KIND_INST_CMP
)
515 p
= ((const BrigInstCmp
*) brig_inst
)->pack
;
517 if (p
== BRIG_PACK_PS
|| p
== BRIG_PACK_PSSAT
)
518 in_operands
[1] = build_lower_element_broadcast (in_operands
[1]);
519 else if (p
== BRIG_PACK_SP
|| p
== BRIG_PACK_SPSAT
)
520 in_operands
[0] = build_lower_element_broadcast (in_operands
[0]);
523 = get_tree_code_for_hsa_opcode (brig_inst
->opcode
, brig_inst_type
);
525 if (p
>= BRIG_PACK_PPSAT
&& p
<= BRIG_PACK_PSAT
)
527 scalarized_sat_arithmetics
sat_arith (*brig_inst
);
528 gcc_assert (input_count
== 2);
529 instr_expr
= sat_arith (*this, in_operands
[0], in_operands
[1]);
531 else if (opcode
== RETURN_EXPR
)
533 if (m_parent
.m_cf
->m_is_kernel
)
536 = build1 (GOTO_EXPR
, void_type_node
, m_parent
.m_cf
->m_exit_label
);
537 m_parent
.m_cf
->append_statement (goto_stmt
);
538 return base
->byteCount
;
542 m_parent
.m_cf
->append_return_stmt ();
543 return base
->byteCount
;
546 else if (opcode
== MULT_HIGHPART_EXPR
&&
547 is_vec_instr
&& element_size_bits
< 64)
549 /* MULT_HIGHPART_EXPR works only on target dependent vector sizes and
550 even the scalars do not seem to work at least for char elements.
552 Let's fall back to scalarization and promotion of the vector elements
553 to larger types with the MULHI computed as a regular MUL.
554 MULHI for 2x64b seems to work with the Intel CPUs I've tested so
555 that is passed on for vector processing so there is no need for
556 128b scalar arithmetics.
558 This is not modular as these type of things do not belong to the
559 frontend, there should be a legalization phase before the backend
560 that figures out the best way to compute the MULHI for any
561 integer vector datatype.
563 TODO: promote to larger vector types instead. For example
564 MULT_HIGHPART_EXPR with s8x8 doesn't work, but s16x8 seems to at least
567 tree_stl_vec operand0_elements
;
569 unpack (in_operands
[0], operand0_elements
);
571 tree_stl_vec operand1_elements
;
573 unpack (in_operands
[1], operand1_elements
);
575 tree_stl_vec result_elements
;
577 tree scalar_type
= TREE_TYPE (arith_type
);
578 BrigType16_t element_type
= brig_inst_type
& BRIG_TYPE_BASE_MASK
;
579 tree promoted_type
= short_integer_type_node
;
580 switch (element_type
)
583 promoted_type
= gccbrig_tree_type_for_hsa_type (BRIG_TYPE_S16
);
586 promoted_type
= gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U16
);
589 promoted_type
= gccbrig_tree_type_for_hsa_type (BRIG_TYPE_S32
);
592 promoted_type
= gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U32
);
595 promoted_type
= gccbrig_tree_type_for_hsa_type (BRIG_TYPE_S64
);
598 promoted_type
= gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U64
);
604 size_t promoted_type_size
= int_size_in_bytes (promoted_type
) * 8;
605 size_t element_count
= gccbrig_type_vector_subparts (arith_type
);
606 for (size_t i
= 0; i
< element_count
; ++i
)
608 tree operand0
= convert (promoted_type
, operand0_elements
.at (i
));
609 tree operand1
= convert (promoted_type
, operand1_elements
.at (i
));
612 = build2 (MULT_EXPR
, promoted_type
, operand0
, operand1
);
615 = build2 (RSHIFT_EXPR
, promoted_type
, scalar_expr
,
616 build_int_cstu (promoted_type
, promoted_type_size
/ 2));
618 result_elements
.push_back (convert (scalar_type
, scalar_expr
));
620 instr_expr
= pack (result_elements
);
624 /* 'class' is always of b1 type, let's consider it by its
625 float type when building the instruction to find the
627 if (brig_inst
->opcode
== BRIG_OPCODE_CLASS
)
628 brig_inst_type
= ((const BrigInstSourceType
*) base
)->sourceType
;
629 instr_expr
= build_inst_expr (brig_inst
->opcode
, brig_inst_type
,
630 arith_type
, in_operands
);
633 if (instr_expr
== NULL_TREE
)
636 return base
->byteCount
;
639 if (p
== BRIG_PACK_SS
|| p
== BRIG_PACK_S
|| p
== BRIG_PACK_SSSAT
640 || p
== BRIG_PACK_SSAT
)
642 /* In case of _s_ or _ss_, select only the lowest element
643 from the new input to the output. We could extract
644 the element and use a scalar operation, but try
645 to keep data in vector registers as much as possible
646 to avoid copies between scalar and vector datapaths. */
648 tree half_storage_type
= gccbrig_tree_type_for_hsa_type (brig_inst_type
);
649 bool is_fp16_operation
650 = (brig_inst_type
& BRIG_TYPE_BASE_MASK
) == BRIG_TYPE_F16
651 && !gccbrig_is_bit_operation (brig_inst
->opcode
);
653 if (is_fp16_operation
)
654 old_value
= build_h2f_conversion
655 (build_resize_convert_view (half_storage_type
, operands
[0]));
658 = build_resize_convert_view (TREE_TYPE (instr_expr
), operands
[0]);
660 size_t esize
= is_fp16_operation
? 32 : element_size_bits
;
662 /* Construct a permutation mask where other elements than the lowest one
663 is picked from the old_value. */
664 tree mask_inner_type
= build_nonstandard_integer_type (esize
, 1);
665 vec
<constructor_elt
, va_gc
> *constructor_vals
= NULL
;
666 for (size_t i
= 0; i
< element_count
; ++i
)
671 cst
= build_int_cstu (mask_inner_type
, element_count
);
673 cst
= build_int_cstu (mask_inner_type
, i
);
674 CONSTRUCTOR_APPEND_ELT (constructor_vals
, NULL_TREE
, cst
);
676 tree mask_vec_type
= build_vector_type (mask_inner_type
, element_count
);
677 tree mask
= build_vector_from_ctor (mask_vec_type
, constructor_vals
);
679 tree new_value
= create_tmp_var (TREE_TYPE (instr_expr
), "new_output");
681 = build2 (MODIFY_EXPR
, TREE_TYPE (instr_expr
), new_value
, instr_expr
);
682 m_parent
.m_cf
->append_statement (assign
);
685 = build3 (VEC_PERM_EXPR
, arith_type
, old_value
, new_value
, mask
);
687 tree lower_output
= create_tmp_var (TREE_TYPE (instr_expr
), "s_output");
688 tree assign_lower
= build2 (MODIFY_EXPR
, TREE_TYPE (instr_expr
),
689 lower_output
, instr_expr
);
690 m_parent
.m_cf
->append_statement (assign_lower
);
691 instr_expr
= lower_output
;
694 if (output_count
== 1)
695 build_output_assignment (*brig_inst
, operands
[0], instr_expr
);
697 m_parent
.m_cf
->append_statement (instr_expr
);
698 return base
->byteCount
;
701 /* Create an expression that broadcasts the lowest element of the
702 vector in VEC_OPERAND to all elements of the returned vector. */
705 brig_basic_inst_handler::build_lower_element_broadcast (tree vec_operand
)
707 /* Build the broadcast using shuffle because there's no
708 direct broadcast in GENERIC and this way there's no need for
709 a separate extract of the lowest element. */
710 tree element_type
= TREE_TYPE (TREE_TYPE (vec_operand
));
711 size_t esize
= 8 * int_size_in_bytes (element_type
);
714 = gccbrig_type_vector_subparts (TREE_TYPE (vec_operand
));
715 tree mask_inner_type
= build_nonstandard_integer_type (esize
, 1);
716 vec
<constructor_elt
, va_gc
> *constructor_vals
= NULL
;
718 /* Construct the mask. */
719 for (size_t i
= 0; i
< element_count
; ++i
)
721 tree cst
= build_int_cstu (mask_inner_type
, element_count
);
722 CONSTRUCTOR_APPEND_ELT (constructor_vals
, NULL_TREE
, cst
);
724 tree mask_vec_type
= build_vector_type (mask_inner_type
, element_count
);
725 tree mask
= build_vector_from_ctor (mask_vec_type
, constructor_vals
);
727 return build3 (VEC_PERM_EXPR
, TREE_TYPE (vec_operand
), vec_operand
,
731 /* Returns the tree code that should be used to implement the given
732 HSA instruction opcode (BRIG_OPCODE) for the given type of instruction
733 (BRIG_TYPE). In case the opcode cannot be mapped to a TREE node directly,
734 returns TREE_LIST (if it can be emulated with a simple chain of tree
735 nodes) or CALL_EXPR if the opcode should be implemented using a builtin
739 brig_basic_inst_handler::get_tree_code_for_hsa_opcode
740 (BrigOpcode16_t brig_opcode
, BrigType16_t brig_type
) const
742 BrigType16_t brig_inner_type
= brig_type
& BRIG_TYPE_BASE_MASK
;
745 case BRIG_OPCODE_NOP
:
747 case BRIG_OPCODE_ADD
:
749 case BRIG_OPCODE_CMOV
:
750 if (brig_inner_type
== brig_type
)
753 return VEC_COND_EXPR
;
754 case BRIG_OPCODE_SUB
:
756 case BRIG_OPCODE_MUL
:
757 case BRIG_OPCODE_MUL24
:
759 case BRIG_OPCODE_MULHI
:
760 case BRIG_OPCODE_MUL24HI
:
761 return MULT_HIGHPART_EXPR
;
762 case BRIG_OPCODE_DIV
:
763 if (gccbrig_is_float_type (brig_inner_type
))
766 return TRUNC_DIV_EXPR
;
767 case BRIG_OPCODE_NEG
:
769 case BRIG_OPCODE_MIN
:
770 if (gccbrig_is_float_type (brig_inner_type
))
774 case BRIG_OPCODE_MAX
:
775 if (gccbrig_is_float_type (brig_inner_type
))
779 case BRIG_OPCODE_FMA
:
781 case BRIG_OPCODE_ABS
:
783 case BRIG_OPCODE_SHL
:
785 case BRIG_OPCODE_SHR
:
789 case BRIG_OPCODE_XOR
:
791 case BRIG_OPCODE_AND
:
793 case BRIG_OPCODE_NOT
:
795 case BRIG_OPCODE_RET
:
797 case BRIG_OPCODE_MOV
:
798 case BRIG_OPCODE_LDF
:
805 case BRIG_OPCODE_REM
:
806 if (brig_type
== BRIG_TYPE_U64
|| brig_type
== BRIG_TYPE_U32
)
807 return TRUNC_MOD_EXPR
;
810 case BRIG_OPCODE_NRCP
:
811 case BRIG_OPCODE_NRSQRT
:
812 /* Implement as 1/f (x). gcc should pattern detect that and
813 use a native instruction, if available, for it. */
815 case BRIG_OPCODE_FLOOR
:
816 case BRIG_OPCODE_CEIL
:
817 case BRIG_OPCODE_SQRT
:
818 case BRIG_OPCODE_NSQRT
:
819 case BRIG_OPCODE_RINT
:
820 case BRIG_OPCODE_TRUNC
:
821 case BRIG_OPCODE_POPCOUNT
:
822 case BRIG_OPCODE_COPYSIGN
:
823 case BRIG_OPCODE_NCOS
:
824 case BRIG_OPCODE_NSIN
:
825 case BRIG_OPCODE_NLOG2
:
826 case BRIG_OPCODE_NEXP2
:
827 case BRIG_OPCODE_NFMA
:
828 /* Class has type B1 regardless of the float type, thus
829 the below builtin map search cannot find it. */
830 case BRIG_OPCODE_CLASS
:
831 case BRIG_OPCODE_WORKITEMABSID
:
835 /* Some BRIG opcodes can use the same builtins for unsigned and
836 signed types. Force these cases to unsigned types.
839 if (brig_opcode
== BRIG_OPCODE_BORROW
840 || brig_opcode
== BRIG_OPCODE_CARRY
841 || brig_opcode
== BRIG_OPCODE_LASTBIT
842 || brig_opcode
== BRIG_OPCODE_BITINSERT
)
844 if (brig_type
== BRIG_TYPE_S32
)
845 brig_type
= BRIG_TYPE_U32
;
846 else if (brig_type
== BRIG_TYPE_S64
)
847 brig_type
= BRIG_TYPE_U64
;
851 builtin_map::const_iterator i
852 = s_custom_builtins
.find (std::make_pair (brig_opcode
, brig_type
));
853 if (i
!= s_custom_builtins
.end ())
855 else if (s_custom_builtins
.find
856 (std::make_pair (brig_opcode
, brig_inner_type
))
857 != s_custom_builtins
.end ())
859 if (brig_inner_type
== BRIG_TYPE_F16
860 && s_custom_builtins
.find
861 (std::make_pair (brig_opcode
, BRIG_TYPE_F32
))
862 != s_custom_builtins
.end ())
866 return TREE_LIST
; /* Emulate using a chain of nodes. */