Daily bump.
[official-gcc.git] / gcc / brig / brigfrontend / brig-basic-inst-handler.cc
blob283da7ac80ea93291e11fb9aa63dcb6e1c079b58
1 /* brig-basic-inst-handler.cc -- brig basic instruction handling
2 Copyright (C) 2016-2018 Free Software Foundation, Inc.
3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
4 for General Processor Tech.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
24 #include "brig-code-entry-handler.h"
25 #include "brig-util.h"
27 #include "errors.h"
28 #include "gimple-expr.h"
29 #include "convert.h"
30 #include "print-tree.h"
31 #include "tree-pretty-print.h"
32 #include "langhooks.h"
33 #include "stor-layout.h"
34 #include "diagnostic-core.h"
35 #include "brig-builtins.h"
36 #include "fold-const.h"
38 brig_basic_inst_handler::brig_basic_inst_handler (brig_to_generic &parent)
39 : brig_code_entry_handler (parent)
43 class scalarized_sat_arithmetics : public tree_element_binary_visitor
45 public:
46 scalarized_sat_arithmetics (const BrigInstBase &brig_inst)
47 : m_brig_inst (brig_inst)
49 BrigType16_t element_type = brig_inst.type & BRIG_TYPE_BASE_MASK;
51 #undef DEF_HSAIL_SAT_BUILTIN
52 #undef DEF_HSAIL_BUILTIN
53 #undef DEF_HSAIL_ATOMIC_BUILTIN
54 #undef DEF_HSAIL_INTR_BUILTIN
55 #undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN
57 #define DEF_HSAIL_SAT_BUILTIN(ENUM, BRIG_OPCODE, HSAIL_TYPE, \
58 NAME, TYPE, ATTRS) \
59 if (brig_inst.opcode == BRIG_OPCODE && element_type == HSAIL_TYPE) \
60 m_builtin = builtin_decl_explicit (ENUM); \
61 else
62 #include "brig-builtins.def"
63 gcc_unreachable ();
66 virtual tree
67 visit_element (brig_code_entry_handler &, tree operand0, tree operand1)
69 /* Implement saturating arithmetics with scalar built-ins for now.
70 TODO: emit GENERIC nodes for the simplest cases or at least
71 emit vector built-ins. */
72 return call_builtin (m_builtin, 2, TREE_TYPE (operand0),
73 TREE_TYPE (operand0), operand0,
74 TREE_TYPE (operand1), operand1);
76 const BrigInstBase &m_brig_inst;
77 tree m_builtin;
80 /* Implements a vector shuffle. ARITH_TYPE is the type of the vector,
81 OPERANDS[0] is the first vector, OPERAND[1] the second vector and
82 OPERANDS[2] the shuffle mask in HSAIL format. The output is a VEC_PERM_EXPR
83 that implements the shuffle as a GENERIC expression. */
85 tree
86 brig_basic_inst_handler::build_shuffle (tree arith_type,
87 tree_stl_vec &operands)
89 tree element_type
90 = get_unsigned_int_type (TREE_TYPE (TREE_TYPE (operands[0])));
92 /* Offsets to add to the mask values to convert from the
93 HSAIL mask to VEC_PERM_EXPR masks. VEC_PERM_EXPR mask
94 assumes an index spanning from 0 to 2 times the vec
95 width while HSAIL refers separately to two different
96 input vectors, thus is not a "full shuffle" where all
97 output elements can originate from any input element. */
98 vec<constructor_elt, va_gc> *mask_offset_vals = NULL;
100 unsigned int element_count = gccbrig_type_vector_subparts (arith_type);
102 vec<constructor_elt, va_gc> *input_mask_vals = NULL;
103 size_t input_mask_element_size = exact_log2 (element_count);
105 /* Unpack the tightly packed mask elements to BIT_FIELD_REFs
106 from which to construct the mask vector as understood by
107 VEC_PERM_EXPR. */
108 tree mask_operand = add_temp_var ("shuffle_mask", operands[2]);
110 tree mask_element_type
111 = build_nonstandard_integer_type (input_mask_element_size, true);
113 for (size_t i = 0; i < element_count; ++i)
115 tree mask_element
116 = build3 (BIT_FIELD_REF, mask_element_type, mask_operand,
117 bitsize_int (input_mask_element_size),
118 bitsize_int (i * input_mask_element_size));
120 mask_element = convert (element_type, mask_element);
122 tree offset;
123 if (i < element_count / 2)
124 offset = build_int_cst (element_type, 0);
125 else
126 offset = build_int_cst (element_type, element_count);
128 CONSTRUCTOR_APPEND_ELT (mask_offset_vals, NULL_TREE, offset);
129 CONSTRUCTOR_APPEND_ELT (input_mask_vals, NULL_TREE, mask_element);
131 tree mask_vec_type = build_vector_type (element_type, element_count);
133 tree mask_vec = build_constructor (mask_vec_type, input_mask_vals);
134 tree offset_vec = build_constructor (mask_vec_type, mask_offset_vals);
136 tree mask = build2 (PLUS_EXPR, mask_vec_type, mask_vec, offset_vec);
138 tree perm = build3 (VEC_PERM_EXPR, TREE_TYPE (operands[0]), operands[0],
139 operands[1], mask);
140 return perm;
143 /* Unpacks (extracts) a scalar element with an index in OPERANDS[1]
144 from the vector expression in OPERANDS[0]. */
146 tree
147 brig_basic_inst_handler::build_unpack (tree_stl_vec &operands)
149 /* Implement the unpack with a shuffle that stores the unpacked
150 element to the lowest bit positions in the dest. After that
151 a bitwise AND is used to clear the uppermost bits. */
152 tree src_element_type = TREE_TYPE (TREE_TYPE (operands[0]));
154 /* Perform the operations with a raw (unsigned int type) type. */
155 tree element_type = get_unsigned_int_type (src_element_type);
157 vec<constructor_elt, va_gc> *input_mask_vals = NULL;
158 vec<constructor_elt, va_gc> *and_mask_vals = NULL;
160 size_t element_count
161 = gccbrig_type_vector_subparts (TREE_TYPE (operands[0]));
162 tree vec_type = build_vector_type (element_type, element_count);
164 for (size_t i = 0; i < element_count; ++i)
166 tree mask_element;
167 if (i == 0)
168 mask_element = convert (element_type, operands[1]);
169 else
170 mask_element = build_int_cst (element_type, 0);
172 CONSTRUCTOR_APPEND_ELT (input_mask_vals, NULL_TREE, mask_element);
174 tree and_mask_element;
175 if (i == 0)
176 and_mask_element = build_int_cst (element_type, -1);
177 else
178 and_mask_element = build_int_cst (element_type, 0);
179 CONSTRUCTOR_APPEND_ELT (and_mask_vals, NULL_TREE, and_mask_element);
182 tree mask_vec = build_constructor (vec_type, input_mask_vals);
184 tree and_mask_vec = build_constructor (vec_type, and_mask_vals);
186 tree perm = build3 (VEC_PERM_EXPR, vec_type,
187 build_resize_convert_view (vec_type, operands[0]),
188 build_resize_convert_view (vec_type, operands[0]),
189 mask_vec);
191 tree cleared = build2 (BIT_AND_EXPR, vec_type, perm, and_mask_vec);
193 size_t s = int_size_in_bytes (TREE_TYPE (cleared)) * BITS_PER_UNIT;
194 tree raw_type = build_nonstandard_integer_type (s, true);
196 tree as_int = build_resize_convert_view (raw_type, cleared);
198 if (int_size_in_bytes (src_element_type) < 4)
200 if (INTEGRAL_TYPE_P (src_element_type))
201 return extend_int (as_int, uint32_type_node, src_element_type);
203 return as_int;
206 /* Packs (inserts) a scalar element in OPERANDS[1]
207 to the vector in OPERANDS[0] at element position defined by
208 OPERANDS[2]. */
210 tree
211 brig_basic_inst_handler::build_pack (tree_stl_vec &operands)
213 /* Implement using a bit level insertion.
214 TODO: Reuse this for implementing 'bitinsert'
215 without a builtin call. */
217 size_t ecount = gccbrig_type_vector_subparts (TREE_TYPE (operands[0]));
218 size_t vecsize = int_size_in_bytes (TREE_TYPE (operands[0])) * BITS_PER_UNIT;
219 tree wide_type = build_nonstandard_integer_type (vecsize, 1);
221 tree src_vect = build_resize_convert_view (wide_type, operands[0]);
222 src_vect = add_temp_var ("src_vect", src_vect);
224 tree scalar = operands[1];
225 scalar = add_temp_var ("scalar", convert_to_integer (wide_type, scalar));
227 tree pos = operands[2];
229 /* The upper bits of the position can contain garbage.
230 Zero them for well-defined semantics. */
231 tree t = build2 (BIT_AND_EXPR, TREE_TYPE (pos), operands[2],
232 build_int_cstu (TREE_TYPE (pos), ecount - 1));
233 pos = add_temp_var ("pos", convert (wide_type, t));
235 tree element_type = TREE_TYPE (TREE_TYPE (operands[0]));
236 size_t element_width = int_size_in_bytes (element_type) * BITS_PER_UNIT;
237 tree ewidth = build_int_cstu (wide_type, element_width);
239 tree bitoffset = build2 (MULT_EXPR, wide_type, ewidth, pos);
240 bitoffset = add_temp_var ("offset", bitoffset);
242 uint64_t mask_int
243 = element_width == 64 ? (uint64_t) -1 : ((uint64_t) 1 << element_width) - 1;
245 tree mask = build_int_cstu (wide_type, mask_int);
247 mask = add_temp_var ("mask", convert_to_integer (wide_type, mask));
249 tree clearing_mask
250 = build1 (BIT_NOT_EXPR, wide_type,
251 build2 (LSHIFT_EXPR, wide_type, mask, bitoffset));
253 tree zeroed_element
254 = build2 (BIT_AND_EXPR, wide_type, src_vect, clearing_mask);
256 /* TODO: Is the AND necessary: does HSA define what
257 happens if the upper bits in the inserted element are not
258 zero? */
259 tree element_in_position
260 = build2 (LSHIFT_EXPR, wide_type,
261 build2 (BIT_AND_EXPR, wide_type, scalar, mask), bitoffset);
263 tree inserted
264 = build2 (BIT_IOR_EXPR, wide_type, zeroed_element, element_in_position);
265 return inserted;
268 /* Implement the unpack{lo,hi}. BRIG_OPCODE should tell which one and
269 ARITH_TYPE describe the type of the vector arithmetics.
270 OPERANDS[0] and OPERANDS[1] are the input vectors. */
272 tree
273 brig_basic_inst_handler::build_unpack_lo_or_hi (BrigOpcode16_t brig_opcode,
274 tree arith_type,
275 tree_stl_vec &operands)
277 tree element_type = get_unsigned_int_type (TREE_TYPE (arith_type));
278 tree mask_vec_type
279 = build_vector_type (element_type,
280 gccbrig_type_vector_subparts (arith_type));
282 size_t element_count = gccbrig_type_vector_subparts (arith_type);
283 vec<constructor_elt, va_gc> *input_mask_vals = NULL;
285 size_t offset = (brig_opcode == BRIG_OPCODE_UNPACKLO) ? 0 : element_count / 2;
287 for (size_t i = 0; i < element_count / 2; ++i)
289 CONSTRUCTOR_APPEND_ELT (input_mask_vals, NULL_TREE,
290 build_int_cst (element_type, offset + i));
291 CONSTRUCTOR_APPEND_ELT (input_mask_vals, NULL_TREE,
292 build_int_cst (element_type,
293 offset + i + element_count));
296 tree mask_vec = build_constructor (mask_vec_type, input_mask_vals);
298 tree perm = build3 (VEC_PERM_EXPR, TREE_TYPE (operands[0]), operands[0],
299 operands[1], mask_vec);
300 return perm;
303 /* Builds a basic instruction expression from a BRIG instruction. BRIG_OPCODE
304 is the opcode, BRIG_TYPE the brig type of the instruction, ARITH_TYPE the
305 desired tree type for the instruction, and OPERANDS the instruction's
306 input operands already converted to tree nodes. */
308 tree
309 brig_basic_inst_handler::build_inst_expr (BrigOpcode16_t brig_opcode,
310 BrigType16_t brig_type,
311 tree arith_type,
312 tree_stl_vec &operands)
314 tree_code opcode = get_tree_code_for_hsa_opcode (brig_opcode, brig_type);
316 BrigType16_t inner_type = brig_type & BRIG_TYPE_BASE_MASK;
318 tree instr_inner_type
319 = VECTOR_TYPE_P (arith_type) ? TREE_TYPE (arith_type) : arith_type;
321 if (opcode == RSHIFT_EXPR || opcode == LSHIFT_EXPR)
323 /* HSA defines modulo/clipping behavior for shift amounts larger
324 than the bit width, while tree.def leaves it undefined.
325 We need to mask the upper bits to ensure the defined behavior. */
326 tree scalar_mask
327 = build_int_cst (instr_inner_type,
328 gccbrig_hsa_type_bit_size (inner_type) - 1);
330 tree mask = VECTOR_TYPE_P (arith_type)
331 ? build_vector_from_val (arith_type, scalar_mask)
332 : scalar_mask;
334 /* The shift amount is a scalar, broadcast it to produce
335 a vector shift. */
336 if (VECTOR_TYPE_P (arith_type))
337 operands[1] = build_vector_from_val (arith_type, operands[1]);
338 operands[1] = build2 (BIT_AND_EXPR, arith_type, operands[1], mask);
341 size_t input_count = operands.size ();
342 size_t output_count = gccbrig_hsa_opcode_op_output_p (brig_opcode, 0) ?
343 1 : 0;
345 if (opcode == TREE_LIST)
347 /* There was no direct GENERIC opcode for the instruction;
348 try to emulate it with a chain of GENERIC nodes. */
349 if (brig_opcode == BRIG_OPCODE_MAD || brig_opcode == BRIG_OPCODE_MAD24)
351 /* There doesn't seem to be a "standard" MAD built-in in gcc so let's
352 use a chain of multiply + add for now (double rounding method).
353 It should be easier for optimizers than a custom built-in call
354 WIDEN_MULT_EXPR is close, but requires a double size result
355 type. */
356 tree mult_res
357 = build2 (MULT_EXPR, arith_type, operands[0], operands[1]);
358 return build2 (PLUS_EXPR, arith_type, mult_res, operands[2]);
360 else if (brig_opcode == BRIG_OPCODE_MAD24HI)
362 tree mult_res
363 = build2 (MULT_HIGHPART_EXPR, arith_type, operands[0], operands[1]);
364 return build2 (PLUS_EXPR, arith_type, mult_res, operands[2]);
366 else if (brig_opcode == BRIG_OPCODE_SHUFFLE)
368 return build_shuffle (arith_type, operands);
370 else if (brig_opcode == BRIG_OPCODE_UNPACKLO
371 || brig_opcode == BRIG_OPCODE_UNPACKHI)
373 return build_unpack_lo_or_hi (brig_opcode, arith_type, operands);
375 else if (brig_opcode == BRIG_OPCODE_UNPACK)
377 return build_unpack (operands);
379 else if (brig_opcode == BRIG_OPCODE_PACK)
381 return build_pack (operands);
383 else if (brig_opcode == BRIG_OPCODE_NRSQRT)
385 /* Implement as 1.0/sqrt (x) and assume gcc instruction selects to
386 native ISA other than a division, if available.
387 TODO: this will happen only with unsafe math optimizations
388 on which cannot be used in general to remain HSAIL compliant.
389 Perhaps a builtin call would be better option here. */
390 return build2 (RDIV_EXPR, arith_type, build_one_cst (arith_type),
391 expand_or_call_builtin (BRIG_OPCODE_SQRT, brig_type,
392 arith_type, operands));
394 else if (brig_opcode == BRIG_OPCODE_NRCP)
396 /* Implement as 1.0/x and assume gcc instruction selects to
397 native ISA other than a division, if available. */
398 return build2 (RDIV_EXPR, arith_type, build_one_cst (arith_type),
399 operands[0]);
401 else if (brig_opcode == BRIG_OPCODE_LANEID
402 || brig_opcode == BRIG_OPCODE_MAXWAVEID
403 || brig_opcode == BRIG_OPCODE_WAVEID)
405 /* Assuming WAVESIZE 1 (for now), therefore LANEID, WAVEID and
406 MAXWAVEID always return 0. */
407 return build_zero_cst (arith_type);
409 else
410 gcc_unreachable ();
412 else if (opcode == CALL_EXPR)
413 return expand_or_call_builtin (brig_opcode, brig_type, arith_type,
414 operands);
415 else if (output_count == 1)
417 if (input_count == 1)
419 if (opcode == MODIFY_EXPR)
420 return operands[0];
421 else
422 return build1 (opcode, arith_type, operands[0]);
424 else if (input_count == 2)
425 return build2 (opcode, arith_type, operands[0], operands[1]);
426 else if (input_count == 3)
427 return build3 (opcode, arith_type, operands[0], operands[1],
428 operands[2]);
429 else
430 gcc_unreachable ();
432 else
433 gcc_unreachable ();
435 return NULL_TREE;
438 /* Handles the basic instructions, including packed instructions. Deals
439 with the different packing modes by unpacking/packing the wanted
440 elements. Delegates most of the instruction cases to build_inst_expr(). */
442 size_t
443 brig_basic_inst_handler::operator () (const BrigBase *base)
445 const BrigInstBase *brig_inst = (const BrigInstBase *) base;
447 tree_stl_vec operands = build_operands (*brig_inst);
449 size_t output_count
450 = gccbrig_hsa_opcode_op_output_p (brig_inst->opcode, 0) ? 1 : 0;
451 size_t input_count
452 = operands.size () == 0 ? 0 : (operands.size () - output_count);
454 gcc_assert (output_count == 0 || output_count == 1);
456 tree_stl_vec::iterator first_input_i = operands.begin ();
457 if (output_count > 0 && operands.size () > 0)
458 ++first_input_i;
460 tree_stl_vec in_operands;
461 in_operands.assign (first_input_i, operands.end ());
463 BrigType16_t brig_inst_type = brig_inst->type;
465 if (brig_inst->opcode == BRIG_OPCODE_NOP)
466 return base->byteCount;
467 else if (brig_inst->opcode == BRIG_OPCODE_FIRSTBIT
468 || brig_inst->opcode == BRIG_OPCODE_LASTBIT
469 || brig_inst->opcode == BRIG_OPCODE_SAD)
470 /* These instructions are reported to be always 32b in HSAIL, but we want
471 to treat them according to their input argument's type to select the
472 correct instruction/builtin. */
473 brig_inst_type
474 = gccbrig_tree_type_to_hsa_type (TREE_TYPE (in_operands[0]));
476 tree instr_type = gccbrig_tree_type_for_hsa_type (brig_inst_type);
478 if (!instr_type)
480 gcc_unreachable ();
481 return base->byteCount;
484 bool is_vec_instr = hsa_type_packed_p (brig_inst_type);
486 size_t element_size_bits;
487 size_t element_count;
489 if (is_vec_instr)
491 BrigType16_t brig_element_type = brig_inst_type & BRIG_TYPE_BASE_MASK;
492 element_size_bits = gccbrig_hsa_type_bit_size (brig_element_type);
493 element_count = gccbrig_hsa_type_bit_size (brig_inst_type)
494 / gccbrig_hsa_type_bit_size (brig_element_type);
496 else
498 element_size_bits = gccbrig_hsa_type_bit_size (brig_inst_type);
499 element_count = 1;
502 /* The actual arithmetics type that should be performed with the
503 operation. This is not always the same as the original BRIG
504 opcode's type due to implicit conversions of storage-only f16. */
505 tree arith_type = gccbrig_is_bit_operation (brig_inst->opcode)
506 ? gccbrig_tree_type_for_hsa_type (brig_inst_type)
507 : get_tree_expr_type_for_hsa_type (brig_inst_type);
509 tree instr_expr = NULL_TREE;
511 BrigPack8_t p = BRIG_PACK_NONE;
512 if (brig_inst->base.kind == BRIG_KIND_INST_MOD)
513 p = ((const BrigInstMod *) brig_inst)->pack;
514 else if (brig_inst->base.kind == BRIG_KIND_INST_CMP)
515 p = ((const BrigInstCmp *) brig_inst)->pack;
517 if (p == BRIG_PACK_PS || p == BRIG_PACK_PSSAT)
518 in_operands[1] = build_lower_element_broadcast (in_operands[1]);
519 else if (p == BRIG_PACK_SP || p == BRIG_PACK_SPSAT)
520 in_operands[0] = build_lower_element_broadcast (in_operands[0]);
522 tree_code opcode
523 = get_tree_code_for_hsa_opcode (brig_inst->opcode, brig_inst_type);
525 if (p >= BRIG_PACK_PPSAT && p <= BRIG_PACK_PSAT)
527 scalarized_sat_arithmetics sat_arith (*brig_inst);
528 gcc_assert (input_count == 2);
529 instr_expr = sat_arith (*this, in_operands[0], in_operands[1]);
531 else if (opcode == RETURN_EXPR)
533 if (m_parent.m_cf->m_is_kernel)
535 tree goto_stmt
536 = build1 (GOTO_EXPR, void_type_node, m_parent.m_cf->m_exit_label);
537 m_parent.m_cf->append_statement (goto_stmt);
538 return base->byteCount;
540 else
542 m_parent.m_cf->append_return_stmt ();
543 return base->byteCount;
546 else if (opcode == MULT_HIGHPART_EXPR &&
547 is_vec_instr && element_size_bits < 64)
549 /* MULT_HIGHPART_EXPR works only on target dependent vector sizes and
550 even the scalars do not seem to work at least for char elements.
552 Let's fall back to scalarization and promotion of the vector elements
553 to larger types with the MULHI computed as a regular MUL.
554 MULHI for 2x64b seems to work with the Intel CPUs I've tested so
555 that is passed on for vector processing so there is no need for
556 128b scalar arithmetics.
558 This is not modular as these type of things do not belong to the
559 frontend, there should be a legalization phase before the backend
560 that figures out the best way to compute the MULHI for any
561 integer vector datatype.
563 TODO: promote to larger vector types instead. For example
564 MULT_HIGHPART_EXPR with s8x8 doesn't work, but s16x8 seems to at least
565 with my x86-64.
567 tree_stl_vec operand0_elements;
568 if (input_count > 0)
569 unpack (in_operands[0], operand0_elements);
571 tree_stl_vec operand1_elements;
572 if (input_count > 1)
573 unpack (in_operands[1], operand1_elements);
575 tree_stl_vec result_elements;
577 tree scalar_type = TREE_TYPE (arith_type);
578 BrigType16_t element_type = brig_inst_type & BRIG_TYPE_BASE_MASK;
579 tree promoted_type = short_integer_type_node;
580 switch (element_type)
582 case BRIG_TYPE_S8:
583 promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_S16);
584 break;
585 case BRIG_TYPE_U8:
586 promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U16);
587 break;
588 case BRIG_TYPE_S16:
589 promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_S32);
590 break;
591 case BRIG_TYPE_U16:
592 promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U32);
593 break;
594 case BRIG_TYPE_S32:
595 promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_S64);
596 break;
597 case BRIG_TYPE_U32:
598 promoted_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_U64);
599 break;
600 default:
601 gcc_unreachable ();
604 size_t promoted_type_size = int_size_in_bytes (promoted_type) * 8;
605 size_t element_count = gccbrig_type_vector_subparts (arith_type);
606 for (size_t i = 0; i < element_count; ++i)
608 tree operand0 = convert (promoted_type, operand0_elements.at (i));
609 tree operand1 = convert (promoted_type, operand1_elements.at (i));
611 tree scalar_expr
612 = build2 (MULT_EXPR, promoted_type, operand0, operand1);
614 scalar_expr
615 = build2 (RSHIFT_EXPR, promoted_type, scalar_expr,
616 build_int_cstu (promoted_type, promoted_type_size / 2));
618 result_elements.push_back (convert (scalar_type, scalar_expr));
620 instr_expr = pack (result_elements);
622 else
624 /* 'class' is always of b1 type, let's consider it by its
625 float type when building the instruction to find the
626 correct builtin. */
627 if (brig_inst->opcode == BRIG_OPCODE_CLASS)
628 brig_inst_type = ((const BrigInstSourceType *) base)->sourceType;
629 instr_expr = build_inst_expr (brig_inst->opcode, brig_inst_type,
630 arith_type, in_operands);
633 if (instr_expr == NULL_TREE)
635 gcc_unreachable ();
636 return base->byteCount;
639 if (p == BRIG_PACK_SS || p == BRIG_PACK_S || p == BRIG_PACK_SSSAT
640 || p == BRIG_PACK_SSAT)
642 /* In case of _s_ or _ss_, select only the lowest element
643 from the new input to the output. We could extract
644 the element and use a scalar operation, but try
645 to keep data in vector registers as much as possible
646 to avoid copies between scalar and vector datapaths. */
647 tree old_value;
648 tree half_storage_type = gccbrig_tree_type_for_hsa_type (brig_inst_type);
649 bool is_fp16_operation
650 = (brig_inst_type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16
651 && !gccbrig_is_bit_operation (brig_inst->opcode);
653 if (is_fp16_operation)
654 old_value = build_h2f_conversion
655 (build_resize_convert_view (half_storage_type, operands[0]));
656 else
657 old_value
658 = build_resize_convert_view (TREE_TYPE (instr_expr), operands[0]);
660 size_t esize = is_fp16_operation ? 32 : element_size_bits;
662 /* Construct a permutation mask where other elements than the lowest one
663 is picked from the old_value. */
664 tree mask_inner_type = build_nonstandard_integer_type (esize, 1);
665 vec<constructor_elt, va_gc> *constructor_vals = NULL;
666 for (size_t i = 0; i < element_count; ++i)
668 tree cst;
670 if (i == 0)
671 cst = build_int_cstu (mask_inner_type, element_count);
672 else
673 cst = build_int_cstu (mask_inner_type, i);
674 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, cst);
676 tree mask_vec_type = build_vector_type (mask_inner_type, element_count);
677 tree mask = build_vector_from_ctor (mask_vec_type, constructor_vals);
679 tree new_value = create_tmp_var (TREE_TYPE (instr_expr), "new_output");
680 tree assign
681 = build2 (MODIFY_EXPR, TREE_TYPE (instr_expr), new_value, instr_expr);
682 m_parent.m_cf->append_statement (assign);
684 instr_expr
685 = build3 (VEC_PERM_EXPR, arith_type, old_value, new_value, mask);
687 tree lower_output = create_tmp_var (TREE_TYPE (instr_expr), "s_output");
688 tree assign_lower = build2 (MODIFY_EXPR, TREE_TYPE (instr_expr),
689 lower_output, instr_expr);
690 m_parent.m_cf->append_statement (assign_lower);
691 instr_expr = lower_output;
694 if (output_count == 1)
695 build_output_assignment (*brig_inst, operands[0], instr_expr);
696 else
697 m_parent.m_cf->append_statement (instr_expr);
698 return base->byteCount;
701 /* Create an expression that broadcasts the lowest element of the
702 vector in VEC_OPERAND to all elements of the returned vector. */
704 tree
705 brig_basic_inst_handler::build_lower_element_broadcast (tree vec_operand)
707 /* Build the broadcast using shuffle because there's no
708 direct broadcast in GENERIC and this way there's no need for
709 a separate extract of the lowest element. */
710 tree element_type = TREE_TYPE (TREE_TYPE (vec_operand));
711 size_t esize = 8 * int_size_in_bytes (element_type);
713 size_t element_count
714 = gccbrig_type_vector_subparts (TREE_TYPE (vec_operand));
715 tree mask_inner_type = build_nonstandard_integer_type (esize, 1);
716 vec<constructor_elt, va_gc> *constructor_vals = NULL;
718 /* Construct the mask. */
719 for (size_t i = 0; i < element_count; ++i)
721 tree cst = build_int_cstu (mask_inner_type, element_count);
722 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, cst);
724 tree mask_vec_type = build_vector_type (mask_inner_type, element_count);
725 tree mask = build_vector_from_ctor (mask_vec_type, constructor_vals);
727 return build3 (VEC_PERM_EXPR, TREE_TYPE (vec_operand), vec_operand,
728 vec_operand, mask);
731 /* Returns the tree code that should be used to implement the given
732 HSA instruction opcode (BRIG_OPCODE) for the given type of instruction
733 (BRIG_TYPE). In case the opcode cannot be mapped to a TREE node directly,
734 returns TREE_LIST (if it can be emulated with a simple chain of tree
735 nodes) or CALL_EXPR if the opcode should be implemented using a builtin
736 call. */
738 tree_code
739 brig_basic_inst_handler::get_tree_code_for_hsa_opcode
740 (BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const
742 BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
743 switch (brig_opcode)
745 case BRIG_OPCODE_NOP:
746 return NOP_EXPR;
747 case BRIG_OPCODE_ADD:
748 return PLUS_EXPR;
749 case BRIG_OPCODE_CMOV:
750 if (brig_inner_type == brig_type)
751 return COND_EXPR;
752 else
753 return VEC_COND_EXPR;
754 case BRIG_OPCODE_SUB:
755 return MINUS_EXPR;
756 case BRIG_OPCODE_MUL:
757 case BRIG_OPCODE_MUL24:
758 return MULT_EXPR;
759 case BRIG_OPCODE_MULHI:
760 case BRIG_OPCODE_MUL24HI:
761 return MULT_HIGHPART_EXPR;
762 case BRIG_OPCODE_DIV:
763 if (gccbrig_is_float_type (brig_inner_type))
764 return RDIV_EXPR;
765 else
766 return TRUNC_DIV_EXPR;
767 case BRIG_OPCODE_NEG:
768 return NEGATE_EXPR;
769 case BRIG_OPCODE_MIN:
770 if (gccbrig_is_float_type (brig_inner_type))
771 return CALL_EXPR;
772 else
773 return MIN_EXPR;
774 case BRIG_OPCODE_MAX:
775 if (gccbrig_is_float_type (brig_inner_type))
776 return CALL_EXPR;
777 else
778 return MAX_EXPR;
779 case BRIG_OPCODE_FMA:
780 return FMA_EXPR;
781 case BRIG_OPCODE_ABS:
782 return ABS_EXPR;
783 case BRIG_OPCODE_SHL:
784 return LSHIFT_EXPR;
785 case BRIG_OPCODE_SHR:
786 return RSHIFT_EXPR;
787 case BRIG_OPCODE_OR:
788 return BIT_IOR_EXPR;
789 case BRIG_OPCODE_XOR:
790 return BIT_XOR_EXPR;
791 case BRIG_OPCODE_AND:
792 return BIT_AND_EXPR;
793 case BRIG_OPCODE_NOT:
794 return BIT_NOT_EXPR;
795 case BRIG_OPCODE_RET:
796 return RETURN_EXPR;
797 case BRIG_OPCODE_MOV:
798 case BRIG_OPCODE_LDF:
799 return MODIFY_EXPR;
800 case BRIG_OPCODE_LD:
801 case BRIG_OPCODE_ST:
802 return MEM_REF;
803 case BRIG_OPCODE_BR:
804 return GOTO_EXPR;
805 case BRIG_OPCODE_REM:
806 if (brig_type == BRIG_TYPE_U64 || brig_type == BRIG_TYPE_U32)
807 return TRUNC_MOD_EXPR;
808 else
809 return CALL_EXPR;
810 case BRIG_OPCODE_NRCP:
811 case BRIG_OPCODE_NRSQRT:
812 /* Implement as 1/f (x). gcc should pattern detect that and
813 use a native instruction, if available, for it. */
814 return TREE_LIST;
815 case BRIG_OPCODE_FLOOR:
816 case BRIG_OPCODE_CEIL:
817 case BRIG_OPCODE_SQRT:
818 case BRIG_OPCODE_NSQRT:
819 case BRIG_OPCODE_RINT:
820 case BRIG_OPCODE_TRUNC:
821 case BRIG_OPCODE_POPCOUNT:
822 case BRIG_OPCODE_COPYSIGN:
823 case BRIG_OPCODE_NCOS:
824 case BRIG_OPCODE_NSIN:
825 case BRIG_OPCODE_NLOG2:
826 case BRIG_OPCODE_NEXP2:
827 case BRIG_OPCODE_NFMA:
828 /* Class has type B1 regardless of the float type, thus
829 the below builtin map search cannot find it. */
830 case BRIG_OPCODE_CLASS:
831 case BRIG_OPCODE_WORKITEMABSID:
832 return CALL_EXPR;
833 default:
835 /* Some BRIG opcodes can use the same builtins for unsigned and
836 signed types. Force these cases to unsigned types.
839 if (brig_opcode == BRIG_OPCODE_BORROW
840 || brig_opcode == BRIG_OPCODE_CARRY
841 || brig_opcode == BRIG_OPCODE_LASTBIT
842 || brig_opcode == BRIG_OPCODE_BITINSERT)
844 if (brig_type == BRIG_TYPE_S32)
845 brig_type = BRIG_TYPE_U32;
846 else if (brig_type == BRIG_TYPE_S64)
847 brig_type = BRIG_TYPE_U64;
851 builtin_map::const_iterator i
852 = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type));
853 if (i != s_custom_builtins.end ())
854 return CALL_EXPR;
855 else if (s_custom_builtins.find
856 (std::make_pair (brig_opcode, brig_inner_type))
857 != s_custom_builtins.end ())
858 return CALL_EXPR;
859 if (brig_inner_type == BRIG_TYPE_F16
860 && s_custom_builtins.find
861 (std::make_pair (brig_opcode, BRIG_TYPE_F32))
862 != s_custom_builtins.end ())
863 return CALL_EXPR;
864 break;
866 return TREE_LIST; /* Emulate using a chain of nodes. */