Add support for ARMv8-R architecture
[official-gcc.git] / gcc / expmed.c
blob9026472724c4086bd3771538fcbe07f0afb848e1
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2017 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "emit-rtl.h"
35 #include "diagnostic-core.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "dojump.h"
39 #include "explow.h"
40 #include "expr.h"
41 #include "langhooks.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 rtx, bool);
53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 rtx, bool);
56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT,
59 unsigned HOST_WIDE_INT,
60 rtx, bool);
61 static rtx extract_fixed_bit_field (machine_mode, rtx,
62 unsigned HOST_WIDE_INT,
63 unsigned HOST_WIDE_INT, rtx, int, bool);
64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
65 unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, rtx, int, bool);
67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
69 unsigned HOST_WIDE_INT, int, bool);
70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
75 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
76 The mask is truncated if necessary to the width of mode MODE. The
77 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
79 static inline rtx
80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
82 return immed_wide_int_const
83 (wi::shifted_mask (bitpos, bitsize, complement,
84 GET_MODE_PRECISION (mode)), mode);
87 /* Test whether a value is zero of a power of two. */
88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
89 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
91 struct init_expmed_rtl
93 rtx reg;
94 rtx plus;
95 rtx neg;
96 rtx mult;
97 rtx sdiv;
98 rtx udiv;
99 rtx sdiv_32;
100 rtx smod_32;
101 rtx wide_mult;
102 rtx wide_lshr;
103 rtx wide_trunc;
104 rtx shift;
105 rtx shift_mult;
106 rtx shift_add;
107 rtx shift_sub0;
108 rtx shift_sub1;
109 rtx zext;
110 rtx trunc;
112 rtx pow2[MAX_BITS_PER_WORD];
113 rtx cint[MAX_BITS_PER_WORD];
116 static void
117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
118 machine_mode from_mode, bool speed)
120 int to_size, from_size;
121 rtx which;
123 to_size = GET_MODE_PRECISION (to_mode);
124 from_size = GET_MODE_PRECISION (from_mode);
126 /* Most partial integers have a precision less than the "full"
127 integer it requires for storage. In case one doesn't, for
128 comparison purposes here, reduce the bit size by one in that
129 case. */
130 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
131 && pow2p_hwi (to_size))
132 to_size --;
133 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
134 && pow2p_hwi (from_size))
135 from_size --;
137 /* Assume cost of zero-extend and sign-extend is the same. */
138 which = (to_size < from_size ? all->trunc : all->zext);
140 PUT_MODE (all->reg, from_mode);
141 set_convert_cost (to_mode, from_mode, speed,
142 set_src_cost (which, to_mode, speed));
145 static void
146 init_expmed_one_mode (struct init_expmed_rtl *all,
147 machine_mode mode, int speed)
149 int m, n, mode_bitsize;
150 machine_mode mode_from;
152 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
154 PUT_MODE (all->reg, mode);
155 PUT_MODE (all->plus, mode);
156 PUT_MODE (all->neg, mode);
157 PUT_MODE (all->mult, mode);
158 PUT_MODE (all->sdiv, mode);
159 PUT_MODE (all->udiv, mode);
160 PUT_MODE (all->sdiv_32, mode);
161 PUT_MODE (all->smod_32, mode);
162 PUT_MODE (all->wide_trunc, mode);
163 PUT_MODE (all->shift, mode);
164 PUT_MODE (all->shift_mult, mode);
165 PUT_MODE (all->shift_add, mode);
166 PUT_MODE (all->shift_sub0, mode);
167 PUT_MODE (all->shift_sub1, mode);
168 PUT_MODE (all->zext, mode);
169 PUT_MODE (all->trunc, mode);
171 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
172 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
173 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
174 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
175 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
177 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
178 <= 2 * add_cost (speed, mode)));
179 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
180 <= 4 * add_cost (speed, mode)));
182 set_shift_cost (speed, mode, 0, 0);
184 int cost = add_cost (speed, mode);
185 set_shiftadd_cost (speed, mode, 0, cost);
186 set_shiftsub0_cost (speed, mode, 0, cost);
187 set_shiftsub1_cost (speed, mode, 0, cost);
190 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
191 for (m = 1; m < n; m++)
193 XEXP (all->shift, 1) = all->cint[m];
194 XEXP (all->shift_mult, 1) = all->pow2[m];
196 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
197 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
198 speed));
199 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
200 speed));
201 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
202 speed));
205 if (SCALAR_INT_MODE_P (mode))
207 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
208 mode_from = (machine_mode)(mode_from + 1))
209 init_expmed_one_conv (all, mode, mode_from, speed);
211 if (GET_MODE_CLASS (mode) == MODE_INT)
213 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
214 if (wider_mode != VOIDmode)
216 PUT_MODE (all->zext, wider_mode);
217 PUT_MODE (all->wide_mult, wider_mode);
218 PUT_MODE (all->wide_lshr, wider_mode);
219 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
221 set_mul_widen_cost (speed, wider_mode,
222 set_src_cost (all->wide_mult, wider_mode, speed));
223 set_mul_highpart_cost (speed, mode,
224 set_src_cost (all->wide_trunc, mode, speed));
229 void
230 init_expmed (void)
232 struct init_expmed_rtl all;
233 machine_mode mode = QImode;
234 int m, speed;
236 memset (&all, 0, sizeof all);
237 for (m = 1; m < MAX_BITS_PER_WORD; m++)
239 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
240 all.cint[m] = GEN_INT (m);
243 /* Avoid using hard regs in ways which may be unsupported. */
244 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
245 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
246 all.neg = gen_rtx_NEG (mode, all.reg);
247 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
248 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
249 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
250 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
251 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
252 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
253 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
254 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
255 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
256 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
257 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
258 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
259 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
260 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
261 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
263 for (speed = 0; speed < 2; speed++)
265 crtl->maybe_hot_insn_p = speed;
266 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
268 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
269 mode = (machine_mode)(mode + 1))
270 init_expmed_one_mode (&all, mode, speed);
272 if (MIN_MODE_PARTIAL_INT != VOIDmode)
273 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
274 mode = (machine_mode)(mode + 1))
275 init_expmed_one_mode (&all, mode, speed);
277 if (MIN_MODE_VECTOR_INT != VOIDmode)
278 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
279 mode = (machine_mode)(mode + 1))
280 init_expmed_one_mode (&all, mode, speed);
283 if (alg_hash_used_p ())
285 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
286 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
288 else
289 set_alg_hash_used_p (true);
290 default_rtl_profile ();
292 ggc_free (all.trunc);
293 ggc_free (all.shift_sub1);
294 ggc_free (all.shift_sub0);
295 ggc_free (all.shift_add);
296 ggc_free (all.shift_mult);
297 ggc_free (all.shift);
298 ggc_free (all.wide_trunc);
299 ggc_free (all.wide_lshr);
300 ggc_free (all.wide_mult);
301 ggc_free (all.zext);
302 ggc_free (all.smod_32);
303 ggc_free (all.sdiv_32);
304 ggc_free (all.udiv);
305 ggc_free (all.sdiv);
306 ggc_free (all.mult);
307 ggc_free (all.neg);
308 ggc_free (all.plus);
309 ggc_free (all.reg);
312 /* Return an rtx representing minus the value of X.
313 MODE is the intended mode of the result,
314 useful if X is a CONST_INT. */
317 negate_rtx (machine_mode mode, rtx x)
319 rtx result = simplify_unary_operation (NEG, mode, x, mode);
321 if (result == 0)
322 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
324 return result;
327 /* Whether reverse storage order is supported on the target. */
328 static int reverse_storage_order_supported = -1;
330 /* Check whether reverse storage order is supported on the target. */
332 static void
333 check_reverse_storage_order_support (void)
335 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
337 reverse_storage_order_supported = 0;
338 sorry ("reverse scalar storage order");
340 else
341 reverse_storage_order_supported = 1;
344 /* Whether reverse FP storage order is supported on the target. */
345 static int reverse_float_storage_order_supported = -1;
347 /* Check whether reverse FP storage order is supported on the target. */
349 static void
350 check_reverse_float_storage_order_support (void)
352 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
354 reverse_float_storage_order_supported = 0;
355 sorry ("reverse floating-point scalar storage order");
357 else
358 reverse_float_storage_order_supported = 1;
361 /* Return an rtx representing value of X with reverse storage order.
362 MODE is the intended mode of the result,
363 useful if X is a CONST_INT. */
366 flip_storage_order (machine_mode mode, rtx x)
368 machine_mode int_mode;
369 rtx result;
371 if (mode == QImode)
372 return x;
374 if (COMPLEX_MODE_P (mode))
376 rtx real = read_complex_part (x, false);
377 rtx imag = read_complex_part (x, true);
379 real = flip_storage_order (GET_MODE_INNER (mode), real);
380 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
382 return gen_rtx_CONCAT (mode, real, imag);
385 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
386 check_reverse_storage_order_support ();
388 if (SCALAR_INT_MODE_P (mode))
389 int_mode = mode;
390 else
392 if (FLOAT_MODE_P (mode)
393 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
394 check_reverse_float_storage_order_support ();
396 int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
397 if (int_mode == BLKmode)
399 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
400 return x;
402 x = gen_lowpart (int_mode, x);
405 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
406 if (result == 0)
407 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
409 if (int_mode != mode)
410 result = gen_lowpart (mode, result);
412 return result;
415 /* Adjust bitfield memory MEM so that it points to the first unit of mode
416 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
417 If MODE is BLKmode, return a reference to every byte in the bitfield.
418 Set *NEW_BITNUM to the bit position of the field within the new memory. */
420 static rtx
421 narrow_bit_field_mem (rtx mem, machine_mode mode,
422 unsigned HOST_WIDE_INT bitsize,
423 unsigned HOST_WIDE_INT bitnum,
424 unsigned HOST_WIDE_INT *new_bitnum)
426 if (mode == BLKmode)
428 *new_bitnum = bitnum % BITS_PER_UNIT;
429 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
430 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
431 / BITS_PER_UNIT);
432 return adjust_bitfield_address_size (mem, mode, offset, size);
434 else
436 unsigned int unit = GET_MODE_BITSIZE (mode);
437 *new_bitnum = bitnum % unit;
438 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
439 return adjust_bitfield_address (mem, mode, offset);
443 /* The caller wants to perform insertion or extraction PATTERN on a
444 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
445 BITREGION_START and BITREGION_END are as for store_bit_field
446 and FIELDMODE is the natural mode of the field.
448 Search for a mode that is compatible with the memory access
449 restrictions and (where applicable) with a register insertion or
450 extraction. Return the new memory on success, storing the adjusted
451 bit position in *NEW_BITNUM. Return null otherwise. */
453 static rtx
454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
455 rtx op0, HOST_WIDE_INT bitsize,
456 HOST_WIDE_INT bitnum,
457 unsigned HOST_WIDE_INT bitregion_start,
458 unsigned HOST_WIDE_INT bitregion_end,
459 machine_mode fieldmode,
460 unsigned HOST_WIDE_INT *new_bitnum)
462 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
463 bitregion_end, MEM_ALIGN (op0),
464 MEM_VOLATILE_P (op0));
465 machine_mode best_mode;
466 if (iter.next_mode (&best_mode))
468 /* We can use a memory in BEST_MODE. See whether this is true for
469 any wider modes. All other things being equal, we prefer to
470 use the widest mode possible because it tends to expose more
471 CSE opportunities. */
472 if (!iter.prefer_smaller_modes ())
474 /* Limit the search to the mode required by the corresponding
475 register insertion or extraction instruction, if any. */
476 machine_mode limit_mode = word_mode;
477 extraction_insn insn;
478 if (get_best_reg_extraction_insn (&insn, pattern,
479 GET_MODE_BITSIZE (best_mode),
480 fieldmode))
481 limit_mode = insn.field_mode;
483 machine_mode wider_mode;
484 while (iter.next_mode (&wider_mode)
485 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
486 best_mode = wider_mode;
488 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
489 new_bitnum);
491 return NULL_RTX;
494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
495 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
496 offset is then BITNUM / BITS_PER_UNIT. */
498 static bool
499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
500 unsigned HOST_WIDE_INT bitsize,
501 machine_mode struct_mode)
503 if (BYTES_BIG_ENDIAN)
504 return (bitnum % BITS_PER_UNIT == 0
505 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
506 || (bitnum + bitsize) % BITS_PER_WORD == 0));
507 else
508 return bitnum % BITS_PER_WORD == 0;
511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
512 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
513 Return false if the access would touch memory outside the range
514 BITREGION_START to BITREGION_END for conformance to the C++ memory
515 model. */
517 static bool
518 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
519 unsigned HOST_WIDE_INT bitnum,
520 machine_mode fieldmode,
521 unsigned HOST_WIDE_INT bitregion_start,
522 unsigned HOST_WIDE_INT bitregion_end)
524 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
526 /* -fstrict-volatile-bitfields must be enabled and we must have a
527 volatile MEM. */
528 if (!MEM_P (op0)
529 || !MEM_VOLATILE_P (op0)
530 || flag_strict_volatile_bitfields <= 0)
531 return false;
533 /* Non-integral modes likely only happen with packed structures.
534 Punt. */
535 if (!SCALAR_INT_MODE_P (fieldmode))
536 return false;
538 /* The bit size must not be larger than the field mode, and
539 the field mode must not be larger than a word. */
540 if (bitsize > modesize || modesize > BITS_PER_WORD)
541 return false;
543 /* Check for cases of unaligned fields that must be split. */
544 if (bitnum % modesize + bitsize > modesize)
545 return false;
547 /* The memory must be sufficiently aligned for a MODESIZE access.
548 This condition guarantees, that the memory access will not
549 touch anything after the end of the structure. */
550 if (MEM_ALIGN (op0) < modesize)
551 return false;
553 /* Check for cases where the C++ memory model applies. */
554 if (bitregion_end != 0
555 && (bitnum - bitnum % modesize < bitregion_start
556 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
557 return false;
559 return true;
562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
563 bit number BITNUM can be treated as a simple value of mode MODE. */
565 static bool
566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
567 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
569 return (MEM_P (op0)
570 && bitnum % BITS_PER_UNIT == 0
571 && bitsize == GET_MODE_BITSIZE (mode)
572 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
573 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
574 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
577 /* Try to use instruction INSV to store VALUE into a field of OP0.
578 BITSIZE and BITNUM are as for store_bit_field. */
580 static bool
581 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
582 unsigned HOST_WIDE_INT bitsize,
583 unsigned HOST_WIDE_INT bitnum,
584 rtx value)
586 struct expand_operand ops[4];
587 rtx value1;
588 rtx xop0 = op0;
589 rtx_insn *last = get_last_insn ();
590 bool copy_back = false;
592 machine_mode op_mode = insv->field_mode;
593 unsigned int unit = GET_MODE_BITSIZE (op_mode);
594 if (bitsize == 0 || bitsize > unit)
595 return false;
597 if (MEM_P (xop0))
598 /* Get a reference to the first byte of the field. */
599 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
600 &bitnum);
601 else
603 /* Convert from counting within OP0 to counting in OP_MODE. */
604 if (BYTES_BIG_ENDIAN)
605 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
607 /* If xop0 is a register, we need it in OP_MODE
608 to make it acceptable to the format of insv. */
609 if (GET_CODE (xop0) == SUBREG)
610 /* We can't just change the mode, because this might clobber op0,
611 and we will need the original value of op0 if insv fails. */
612 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
613 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
614 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
617 /* If the destination is a paradoxical subreg such that we need a
618 truncate to the inner mode, perform the insertion on a temporary and
619 truncate the result to the original destination. Note that we can't
620 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
621 X) 0)) is (reg:N X). */
622 if (GET_CODE (xop0) == SUBREG
623 && REG_P (SUBREG_REG (xop0))
624 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
625 op_mode))
627 rtx tem = gen_reg_rtx (op_mode);
628 emit_move_insn (tem, xop0);
629 xop0 = tem;
630 copy_back = true;
633 /* There are similar overflow check at the start of store_bit_field_1,
634 but that only check the situation where the field lies completely
635 outside the register, while there do have situation where the field
636 lies partialy in the register, we need to adjust bitsize for this
637 partial overflow situation. Without this fix, pr48335-2.c on big-endian
638 will broken on those arch support bit insert instruction, like arm, aarch64
639 etc. */
640 if (bitsize + bitnum > unit && bitnum < unit)
642 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
643 "destination object, data truncated into %wu-bit",
644 bitsize, unit - bitnum);
645 bitsize = unit - bitnum;
648 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
649 "backwards" from the size of the unit we are inserting into.
650 Otherwise, we count bits from the most significant on a
651 BYTES/BITS_BIG_ENDIAN machine. */
653 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
654 bitnum = unit - bitsize - bitnum;
656 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
657 value1 = value;
658 if (GET_MODE (value) != op_mode)
660 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
662 rtx tmp;
663 /* Optimization: Don't bother really extending VALUE
664 if it has all the bits we will actually use. However,
665 if we must narrow it, be sure we do it correctly. */
667 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
669 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
670 if (! tmp)
671 tmp = simplify_gen_subreg (op_mode,
672 force_reg (GET_MODE (value),
673 value1),
674 GET_MODE (value), 0);
676 else
678 tmp = gen_lowpart_if_possible (op_mode, value1);
679 if (! tmp)
680 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
681 value1));
683 value1 = tmp;
685 else if (CONST_INT_P (value))
686 value1 = gen_int_mode (INTVAL (value), op_mode);
687 else
688 /* Parse phase is supposed to make VALUE's data type
689 match that of the component reference, which is a type
690 at least as wide as the field; so VALUE should have
691 a mode that corresponds to that type. */
692 gcc_assert (CONSTANT_P (value));
695 create_fixed_operand (&ops[0], xop0);
696 create_integer_operand (&ops[1], bitsize);
697 create_integer_operand (&ops[2], bitnum);
698 create_input_operand (&ops[3], value1, op_mode);
699 if (maybe_expand_insn (insv->icode, 4, ops))
701 if (copy_back)
702 convert_move (op0, xop0, true);
703 return true;
705 delete_insns_since (last);
706 return false;
709 /* A subroutine of store_bit_field, with the same arguments. Return true
710 if the operation could be implemented.
712 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
713 no other way of implementing the operation. If FALLBACK_P is false,
714 return false instead. */
716 static bool
717 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
718 unsigned HOST_WIDE_INT bitnum,
719 unsigned HOST_WIDE_INT bitregion_start,
720 unsigned HOST_WIDE_INT bitregion_end,
721 machine_mode fieldmode,
722 rtx value, bool reverse, bool fallback_p)
724 rtx op0 = str_rtx;
725 rtx orig_value;
727 while (GET_CODE (op0) == SUBREG)
729 /* The following line once was done only if WORDS_BIG_ENDIAN,
730 but I think that is a mistake. WORDS_BIG_ENDIAN is
731 meaningful at a much higher level; when structures are copied
732 between memory and regs, the higher-numbered regs
733 always get higher addresses. */
734 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
735 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
736 int byte_offset = 0;
738 /* Paradoxical subregs need special handling on big-endian machines. */
739 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
741 int difference = inner_mode_size - outer_mode_size;
743 if (WORDS_BIG_ENDIAN)
744 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
745 if (BYTES_BIG_ENDIAN)
746 byte_offset += difference % UNITS_PER_WORD;
748 else
749 byte_offset = SUBREG_BYTE (op0);
751 bitnum += byte_offset * BITS_PER_UNIT;
752 op0 = SUBREG_REG (op0);
755 /* No action is needed if the target is a register and if the field
756 lies completely outside that register. This can occur if the source
757 code contains an out-of-bounds access to a small array. */
758 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
759 return true;
761 /* Use vec_set patterns for inserting parts of vectors whenever
762 available. */
763 if (VECTOR_MODE_P (GET_MODE (op0))
764 && !MEM_P (op0)
765 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
766 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
767 && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
768 && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
770 struct expand_operand ops[3];
771 machine_mode outermode = GET_MODE (op0);
772 machine_mode innermode = GET_MODE_INNER (outermode);
773 enum insn_code icode = optab_handler (vec_set_optab, outermode);
774 int pos = bitnum / GET_MODE_BITSIZE (innermode);
776 create_fixed_operand (&ops[0], op0);
777 create_input_operand (&ops[1], value, innermode);
778 create_integer_operand (&ops[2], pos);
779 if (maybe_expand_insn (icode, 3, ops))
780 return true;
783 /* If the target is a register, overwriting the entire object, or storing
784 a full-word or multi-word field can be done with just a SUBREG. */
785 if (!MEM_P (op0)
786 && bitsize == GET_MODE_BITSIZE (fieldmode)
787 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
788 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
790 /* Use the subreg machinery either to narrow OP0 to the required
791 words or to cope with mode punning between equal-sized modes.
792 In the latter case, use subreg on the rhs side, not lhs. */
793 rtx sub;
795 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
797 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
798 if (sub)
800 if (reverse)
801 sub = flip_storage_order (GET_MODE (op0), sub);
802 emit_move_insn (op0, sub);
803 return true;
806 else
808 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
809 bitnum / BITS_PER_UNIT);
810 if (sub)
812 if (reverse)
813 value = flip_storage_order (fieldmode, value);
814 emit_move_insn (sub, value);
815 return true;
820 /* If the target is memory, storing any naturally aligned field can be
821 done with a simple store. For targets that support fast unaligned
822 memory, any naturally sized, unit aligned field can be done directly. */
823 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
825 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
826 if (reverse)
827 value = flip_storage_order (fieldmode, value);
828 emit_move_insn (op0, value);
829 return true;
832 /* Make sure we are playing with integral modes. Pun with subregs
833 if we aren't. This must come after the entire register case above,
834 since that case is valid for any mode. The following cases are only
835 valid for integral modes. */
837 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
838 if (imode != GET_MODE (op0))
840 if (MEM_P (op0))
841 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
842 else
844 gcc_assert (imode != BLKmode);
845 op0 = gen_lowpart (imode, op0);
850 /* Storing an lsb-aligned field in a register
851 can be done with a movstrict instruction. */
853 if (!MEM_P (op0)
854 && !reverse
855 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
856 && bitsize == GET_MODE_BITSIZE (fieldmode)
857 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
859 struct expand_operand ops[2];
860 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
861 rtx arg0 = op0;
862 unsigned HOST_WIDE_INT subreg_off;
864 if (GET_CODE (arg0) == SUBREG)
866 /* Else we've got some float mode source being extracted into
867 a different float mode destination -- this combination of
868 subregs results in Severe Tire Damage. */
869 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
870 || GET_MODE_CLASS (fieldmode) == MODE_INT
871 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
872 arg0 = SUBREG_REG (arg0);
875 subreg_off = bitnum / BITS_PER_UNIT;
876 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
878 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
880 create_fixed_operand (&ops[0], arg0);
881 /* Shrink the source operand to FIELDMODE. */
882 create_convert_operand_to (&ops[1], value, fieldmode, false);
883 if (maybe_expand_insn (icode, 2, ops))
884 return true;
888 /* Handle fields bigger than a word. */
890 if (bitsize > BITS_PER_WORD)
892 /* Here we transfer the words of the field
893 in the order least significant first.
894 This is because the most significant word is the one which may
895 be less than full.
896 However, only do that if the value is not BLKmode. */
898 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
899 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
900 unsigned int i;
901 rtx_insn *last;
903 /* This is the mode we must force value to, so that there will be enough
904 subwords to extract. Note that fieldmode will often (always?) be
905 VOIDmode, because that is what store_field uses to indicate that this
906 is a bit field, but passing VOIDmode to operand_subword_force
907 is not allowed. */
908 fieldmode = GET_MODE (value);
909 if (fieldmode == VOIDmode)
910 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
912 last = get_last_insn ();
913 for (i = 0; i < nwords; i++)
915 /* If I is 0, use the low-order word in both field and target;
916 if I is 1, use the next to lowest word; and so on. */
917 unsigned int wordnum = (backwards
918 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
919 - i - 1
920 : i);
921 unsigned int bit_offset = (backwards ^ reverse
922 ? MAX ((int) bitsize - ((int) i + 1)
923 * BITS_PER_WORD,
925 : (int) i * BITS_PER_WORD);
926 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
927 unsigned HOST_WIDE_INT new_bitsize =
928 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
930 /* If the remaining chunk doesn't have full wordsize we have
931 to make sure that for big-endian machines the higher order
932 bits are used. */
933 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
934 value_word = simplify_expand_binop (word_mode, lshr_optab,
935 value_word,
936 GEN_INT (BITS_PER_WORD
937 - new_bitsize),
938 NULL_RTX, true,
939 OPTAB_LIB_WIDEN);
941 if (!store_bit_field_1 (op0, new_bitsize,
942 bitnum + bit_offset,
943 bitregion_start, bitregion_end,
944 word_mode,
945 value_word, reverse, fallback_p))
947 delete_insns_since (last);
948 return false;
951 return true;
954 /* If VALUE has a floating-point or complex mode, access it as an
955 integer of the corresponding size. This can occur on a machine
956 with 64 bit registers that uses SFmode for float. It can also
957 occur for unaligned float or complex fields. */
958 orig_value = value;
959 if (GET_MODE (value) != VOIDmode
960 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
961 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
963 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
964 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
967 /* If OP0 is a multi-word register, narrow it to the affected word.
968 If the region spans two words, defer to store_split_bit_field.
969 Don't do this if op0 is a single hard register wider than word
970 such as a float or vector register. */
971 if (!MEM_P (op0)
972 && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD
973 && (!REG_P (op0)
974 || !HARD_REGISTER_P (op0)
975 || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1))
977 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
979 if (!fallback_p)
980 return false;
982 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
983 bitregion_end, value, reverse);
984 return true;
986 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
987 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
988 gcc_assert (op0);
989 bitnum %= BITS_PER_WORD;
992 /* From here on we can assume that the field to be stored in fits
993 within a word. If the destination is a register, it too fits
994 in a word. */
996 extraction_insn insv;
997 if (!MEM_P (op0)
998 && !reverse
999 && get_best_reg_extraction_insn (&insv, EP_insv,
1000 GET_MODE_BITSIZE (GET_MODE (op0)),
1001 fieldmode)
1002 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1003 return true;
1005 /* If OP0 is a memory, try copying it to a register and seeing if a
1006 cheap register alternative is available. */
1007 if (MEM_P (op0) && !reverse)
1009 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1010 fieldmode)
1011 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1012 return true;
1014 rtx_insn *last = get_last_insn ();
1016 /* Try loading part of OP0 into a register, inserting the bitfield
1017 into that, and then copying the result back to OP0. */
1018 unsigned HOST_WIDE_INT bitpos;
1019 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1020 bitregion_start, bitregion_end,
1021 fieldmode, &bitpos);
1022 if (xop0)
1024 rtx tempreg = copy_to_reg (xop0);
1025 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1026 bitregion_start, bitregion_end,
1027 fieldmode, orig_value, reverse, false))
1029 emit_move_insn (xop0, tempreg);
1030 return true;
1032 delete_insns_since (last);
1036 if (!fallback_p)
1037 return false;
1039 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1040 bitregion_end, value, reverse);
1041 return true;
1044 /* Generate code to store value from rtx VALUE
1045 into a bit-field within structure STR_RTX
1046 containing BITSIZE bits starting at bit BITNUM.
1048 BITREGION_START is bitpos of the first bitfield in this region.
1049 BITREGION_END is the bitpos of the ending bitfield in this region.
1050 These two fields are 0, if the C++ memory model does not apply,
1051 or we are not interested in keeping track of bitfield regions.
1053 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1055 If REVERSE is true, the store is to be done in reverse order. */
1057 void
1058 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1059 unsigned HOST_WIDE_INT bitnum,
1060 unsigned HOST_WIDE_INT bitregion_start,
1061 unsigned HOST_WIDE_INT bitregion_end,
1062 machine_mode fieldmode,
1063 rtx value, bool reverse)
1065 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1066 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1067 bitregion_start, bitregion_end))
1069 /* Storing of a full word can be done with a simple store.
1070 We know here that the field can be accessed with one single
1071 instruction. For targets that support unaligned memory,
1072 an unaligned access may be necessary. */
1073 if (bitsize == GET_MODE_BITSIZE (fieldmode))
1075 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1076 bitnum / BITS_PER_UNIT);
1077 if (reverse)
1078 value = flip_storage_order (fieldmode, value);
1079 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1080 emit_move_insn (str_rtx, value);
1082 else
1084 rtx temp;
1086 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1087 &bitnum);
1088 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1089 temp = copy_to_reg (str_rtx);
1090 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1091 fieldmode, value, reverse, true))
1092 gcc_unreachable ();
1094 emit_move_insn (str_rtx, temp);
1097 return;
1100 /* Under the C++0x memory model, we must not touch bits outside the
1101 bit region. Adjust the address to start at the beginning of the
1102 bit region. */
1103 if (MEM_P (str_rtx) && bitregion_start > 0)
1105 machine_mode bestmode;
1106 HOST_WIDE_INT offset, size;
1108 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1110 offset = bitregion_start / BITS_PER_UNIT;
1111 bitnum -= bitregion_start;
1112 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1113 bitregion_end -= bitregion_start;
1114 bitregion_start = 0;
1115 bestmode = get_best_mode (bitsize, bitnum,
1116 bitregion_start, bitregion_end,
1117 MEM_ALIGN (str_rtx), VOIDmode,
1118 MEM_VOLATILE_P (str_rtx));
1119 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1122 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1123 bitregion_start, bitregion_end,
1124 fieldmode, value, reverse, true))
1125 gcc_unreachable ();
1128 /* Use shifts and boolean operations to store VALUE into a bit field of
1129 width BITSIZE in OP0, starting at bit BITNUM.
1131 If REVERSE is true, the store is to be done in reverse order. */
1133 static void
1134 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1135 unsigned HOST_WIDE_INT bitnum,
1136 unsigned HOST_WIDE_INT bitregion_start,
1137 unsigned HOST_WIDE_INT bitregion_end,
1138 rtx value, bool reverse)
1140 /* There is a case not handled here:
1141 a structure with a known alignment of just a halfword
1142 and a field split across two aligned halfwords within the structure.
1143 Or likewise a structure with a known alignment of just a byte
1144 and a field split across two bytes.
1145 Such cases are not supposed to be able to occur. */
1147 if (MEM_P (op0))
1149 machine_mode mode = GET_MODE (op0);
1150 if (GET_MODE_BITSIZE (mode) == 0
1151 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1152 mode = word_mode;
1153 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1154 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1156 if (mode == VOIDmode)
1158 /* The only way this should occur is if the field spans word
1159 boundaries. */
1160 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1161 bitregion_end, value, reverse);
1162 return;
1165 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1168 store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1171 /* Helper function for store_fixed_bit_field, stores
1172 the bit field always using the MODE of OP0. */
1174 static void
1175 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1176 unsigned HOST_WIDE_INT bitnum,
1177 rtx value, bool reverse)
1179 machine_mode mode;
1180 rtx temp;
1181 int all_zero = 0;
1182 int all_one = 0;
1184 mode = GET_MODE (op0);
1185 gcc_assert (SCALAR_INT_MODE_P (mode));
1187 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1188 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1190 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1191 /* BITNUM is the distance between our msb
1192 and that of the containing datum.
1193 Convert it to the distance from the lsb. */
1194 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1196 /* Now BITNUM is always the distance between our lsb
1197 and that of OP0. */
1199 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1200 we must first convert its mode to MODE. */
1202 if (CONST_INT_P (value))
1204 unsigned HOST_WIDE_INT v = UINTVAL (value);
1206 if (bitsize < HOST_BITS_PER_WIDE_INT)
1207 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1209 if (v == 0)
1210 all_zero = 1;
1211 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1212 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1213 || (bitsize == HOST_BITS_PER_WIDE_INT
1214 && v == HOST_WIDE_INT_M1U))
1215 all_one = 1;
1217 value = lshift_value (mode, v, bitnum);
1219 else
1221 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1222 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1224 if (GET_MODE (value) != mode)
1225 value = convert_to_mode (mode, value, 1);
1227 if (must_and)
1228 value = expand_binop (mode, and_optab, value,
1229 mask_rtx (mode, 0, bitsize, 0),
1230 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1231 if (bitnum > 0)
1232 value = expand_shift (LSHIFT_EXPR, mode, value,
1233 bitnum, NULL_RTX, 1);
1236 if (reverse)
1237 value = flip_storage_order (mode, value);
1239 /* Now clear the chosen bits in OP0,
1240 except that if VALUE is -1 we need not bother. */
1241 /* We keep the intermediates in registers to allow CSE to combine
1242 consecutive bitfield assignments. */
1244 temp = force_reg (mode, op0);
1246 if (! all_one)
1248 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1249 if (reverse)
1250 mask = flip_storage_order (mode, mask);
1251 temp = expand_binop (mode, and_optab, temp, mask,
1252 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1253 temp = force_reg (mode, temp);
1256 /* Now logical-or VALUE into OP0, unless it is zero. */
1258 if (! all_zero)
1260 temp = expand_binop (mode, ior_optab, temp, value,
1261 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1262 temp = force_reg (mode, temp);
1265 if (op0 != temp)
1267 op0 = copy_rtx (op0);
1268 emit_move_insn (op0, temp);
1272 /* Store a bit field that is split across multiple accessible memory objects.
1274 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1275 BITSIZE is the field width; BITPOS the position of its first bit
1276 (within the word).
1277 VALUE is the value to store.
1279 If REVERSE is true, the store is to be done in reverse order.
1281 This does not yet handle fields wider than BITS_PER_WORD. */
1283 static void
1284 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1285 unsigned HOST_WIDE_INT bitpos,
1286 unsigned HOST_WIDE_INT bitregion_start,
1287 unsigned HOST_WIDE_INT bitregion_end,
1288 rtx value, bool reverse)
1290 unsigned int unit, total_bits, bitsdone = 0;
1292 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1293 much at a time. */
1294 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1295 unit = BITS_PER_WORD;
1296 else
1297 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1299 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1300 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1301 again, and we will mutually recurse forever. */
1302 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1303 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1305 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1306 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1307 that VALUE might be a floating-point constant. */
1308 if (CONSTANT_P (value) && !CONST_INT_P (value))
1310 rtx word = gen_lowpart_common (word_mode, value);
1312 if (word && (value != word))
1313 value = word;
1314 else
1315 value = gen_lowpart_common (word_mode,
1316 force_reg (GET_MODE (value) != VOIDmode
1317 ? GET_MODE (value)
1318 : word_mode, value));
1321 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1323 while (bitsdone < bitsize)
1325 unsigned HOST_WIDE_INT thissize;
1326 unsigned HOST_WIDE_INT thispos;
1327 unsigned HOST_WIDE_INT offset;
1328 rtx part, word;
1330 offset = (bitpos + bitsdone) / unit;
1331 thispos = (bitpos + bitsdone) % unit;
1333 /* When region of bytes we can touch is restricted, decrease
1334 UNIT close to the end of the region as needed. If op0 is a REG
1335 or SUBREG of REG, don't do this, as there can't be data races
1336 on a register and we can expand shorter code in some cases. */
1337 if (bitregion_end
1338 && unit > BITS_PER_UNIT
1339 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1340 && !REG_P (op0)
1341 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1343 unit = unit / 2;
1344 continue;
1347 /* THISSIZE must not overrun a word boundary. Otherwise,
1348 store_fixed_bit_field will call us again, and we will mutually
1349 recurse forever. */
1350 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1351 thissize = MIN (thissize, unit - thispos);
1353 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1355 /* Fetch successively less significant portions. */
1356 if (CONST_INT_P (value))
1357 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1358 >> (bitsize - bitsdone - thissize))
1359 & ((HOST_WIDE_INT_1 << thissize) - 1));
1360 /* Likewise, but the source is little-endian. */
1361 else if (reverse)
1362 part = extract_fixed_bit_field (word_mode, value, thissize,
1363 bitsize - bitsdone - thissize,
1364 NULL_RTX, 1, false);
1365 else
1367 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1368 /* The args are chosen so that the last part includes the
1369 lsb. Give extract_bit_field the value it needs (with
1370 endianness compensation) to fetch the piece we want. */
1371 part = extract_fixed_bit_field (word_mode, value, thissize,
1372 total_bits - bitsize + bitsdone,
1373 NULL_RTX, 1, false);
1376 else
1378 /* Fetch successively more significant portions. */
1379 if (CONST_INT_P (value))
1380 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1381 >> bitsdone)
1382 & ((HOST_WIDE_INT_1 << thissize) - 1));
1383 /* Likewise, but the source is big-endian. */
1384 else if (reverse)
1385 part = extract_fixed_bit_field (word_mode, value, thissize,
1386 total_bits - bitsdone - thissize,
1387 NULL_RTX, 1, false);
1388 else
1389 part = extract_fixed_bit_field (word_mode, value, thissize,
1390 bitsdone, NULL_RTX, 1, false);
1393 /* If OP0 is a register, then handle OFFSET here. */
1394 if (SUBREG_P (op0) || REG_P (op0))
1396 machine_mode op0_mode = GET_MODE (op0);
1397 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1398 word = offset ? const0_rtx : op0;
1399 else
1400 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1401 GET_MODE (op0));
1402 offset &= BITS_PER_WORD / unit - 1;
1404 else
1405 word = op0;
1407 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1408 it is just an out-of-bounds access. Ignore it. */
1409 if (word != const0_rtx)
1410 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1411 bitregion_start, bitregion_end, part,
1412 reverse);
1413 bitsdone += thissize;
1417 /* A subroutine of extract_bit_field_1 that converts return value X
1418 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1419 to extract_bit_field. */
1421 static rtx
1422 convert_extracted_bit_field (rtx x, machine_mode mode,
1423 machine_mode tmode, bool unsignedp)
1425 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1426 return x;
1428 /* If the x mode is not a scalar integral, first convert to the
1429 integer mode of that size and then access it as a floating-point
1430 value via a SUBREG. */
1431 if (!SCALAR_INT_MODE_P (tmode))
1433 machine_mode smode;
1435 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1436 x = convert_to_mode (smode, x, unsignedp);
1437 x = force_reg (smode, x);
1438 return gen_lowpart (tmode, x);
1441 return convert_to_mode (tmode, x, unsignedp);
1444 /* Try to use an ext(z)v pattern to extract a field from OP0.
1445 Return the extracted value on success, otherwise return null.
1446 EXT_MODE is the mode of the extraction and the other arguments
1447 are as for extract_bit_field. */
1449 static rtx
1450 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1451 unsigned HOST_WIDE_INT bitsize,
1452 unsigned HOST_WIDE_INT bitnum,
1453 int unsignedp, rtx target,
1454 machine_mode mode, machine_mode tmode)
1456 struct expand_operand ops[4];
1457 rtx spec_target = target;
1458 rtx spec_target_subreg = 0;
1459 machine_mode ext_mode = extv->field_mode;
1460 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1462 if (bitsize == 0 || unit < bitsize)
1463 return NULL_RTX;
1465 if (MEM_P (op0))
1466 /* Get a reference to the first byte of the field. */
1467 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1468 &bitnum);
1469 else
1471 /* Convert from counting within OP0 to counting in EXT_MODE. */
1472 if (BYTES_BIG_ENDIAN)
1473 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1475 /* If op0 is a register, we need it in EXT_MODE to make it
1476 acceptable to the format of ext(z)v. */
1477 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1478 return NULL_RTX;
1479 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1480 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1483 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1484 "backwards" from the size of the unit we are extracting from.
1485 Otherwise, we count bits from the most significant on a
1486 BYTES/BITS_BIG_ENDIAN machine. */
1488 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1489 bitnum = unit - bitsize - bitnum;
1491 if (target == 0)
1492 target = spec_target = gen_reg_rtx (tmode);
1494 if (GET_MODE (target) != ext_mode)
1496 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1497 between the mode of the extraction (word_mode) and the target
1498 mode. Instead, create a temporary and use convert_move to set
1499 the target. */
1500 if (REG_P (target)
1501 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1503 target = gen_lowpart (ext_mode, target);
1504 if (GET_MODE_PRECISION (ext_mode)
1505 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1506 spec_target_subreg = target;
1508 else
1509 target = gen_reg_rtx (ext_mode);
1512 create_output_operand (&ops[0], target, ext_mode);
1513 create_fixed_operand (&ops[1], op0);
1514 create_integer_operand (&ops[2], bitsize);
1515 create_integer_operand (&ops[3], bitnum);
1516 if (maybe_expand_insn (extv->icode, 4, ops))
1518 target = ops[0].value;
1519 if (target == spec_target)
1520 return target;
1521 if (target == spec_target_subreg)
1522 return spec_target;
1523 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1525 return NULL_RTX;
1528 /* A subroutine of extract_bit_field, with the same arguments.
1529 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1530 if we can find no other means of implementing the operation.
1531 if FALLBACK_P is false, return NULL instead. */
1533 static rtx
1534 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1535 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1536 machine_mode mode, machine_mode tmode,
1537 bool reverse, bool fallback_p, rtx *alt_rtl)
1539 rtx op0 = str_rtx;
1540 machine_mode int_mode;
1541 machine_mode mode1;
1543 if (tmode == VOIDmode)
1544 tmode = mode;
1546 while (GET_CODE (op0) == SUBREG)
1548 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1549 op0 = SUBREG_REG (op0);
1552 /* If we have an out-of-bounds access to a register, just return an
1553 uninitialized register of the required mode. This can occur if the
1554 source code contains an out-of-bounds access to a small array. */
1555 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1556 return gen_reg_rtx (tmode);
1558 if (REG_P (op0)
1559 && mode == GET_MODE (op0)
1560 && bitnum == 0
1561 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1563 if (reverse)
1564 op0 = flip_storage_order (mode, op0);
1565 /* We're trying to extract a full register from itself. */
1566 return op0;
1569 /* See if we can get a better vector mode before extracting. */
1570 if (VECTOR_MODE_P (GET_MODE (op0))
1571 && !MEM_P (op0)
1572 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1574 machine_mode new_mode;
1576 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1577 new_mode = MIN_MODE_VECTOR_FLOAT;
1578 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1579 new_mode = MIN_MODE_VECTOR_FRACT;
1580 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1581 new_mode = MIN_MODE_VECTOR_UFRACT;
1582 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1583 new_mode = MIN_MODE_VECTOR_ACCUM;
1584 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1585 new_mode = MIN_MODE_VECTOR_UACCUM;
1586 else
1587 new_mode = MIN_MODE_VECTOR_INT;
1589 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1590 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1591 && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1592 && targetm.vector_mode_supported_p (new_mode))
1593 break;
1594 if (new_mode != VOIDmode)
1595 op0 = gen_lowpart (new_mode, op0);
1598 /* Use vec_extract patterns for extracting parts of vectors whenever
1599 available. */
1600 if (VECTOR_MODE_P (GET_MODE (op0))
1601 && !MEM_P (op0)
1602 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1603 && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1604 == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1606 struct expand_operand ops[3];
1607 machine_mode outermode = GET_MODE (op0);
1608 machine_mode innermode = GET_MODE_INNER (outermode);
1609 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1610 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1612 create_output_operand (&ops[0], target, innermode);
1613 ops[0].target = 1;
1614 create_input_operand (&ops[1], op0, outermode);
1615 create_integer_operand (&ops[2], pos);
1616 if (maybe_expand_insn (icode, 3, ops))
1618 if (alt_rtl && ops[0].target)
1619 *alt_rtl = target;
1620 target = ops[0].value;
1621 if (GET_MODE (target) != mode)
1622 return gen_lowpart (tmode, target);
1623 return target;
1627 /* Make sure we are playing with integral modes. Pun with subregs
1628 if we aren't. */
1630 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1631 if (imode != GET_MODE (op0))
1633 if (MEM_P (op0))
1634 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1635 else if (imode != BLKmode)
1637 op0 = gen_lowpart (imode, op0);
1639 /* If we got a SUBREG, force it into a register since we
1640 aren't going to be able to do another SUBREG on it. */
1641 if (GET_CODE (op0) == SUBREG)
1642 op0 = force_reg (imode, op0);
1644 else
1646 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1647 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1648 emit_move_insn (mem, op0);
1649 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1654 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1655 If that's wrong, the solution is to test for it and set TARGET to 0
1656 if needed. */
1658 /* Get the mode of the field to use for atomic access or subreg
1659 conversion. */
1660 mode1 = mode;
1661 if (SCALAR_INT_MODE_P (tmode))
1663 machine_mode try_mode = mode_for_size (bitsize,
1664 GET_MODE_CLASS (tmode), 0);
1665 if (try_mode != BLKmode)
1666 mode1 = try_mode;
1668 gcc_assert (mode1 != BLKmode);
1670 /* Extraction of a full MODE1 value can be done with a subreg as long
1671 as the least significant bit of the value is the least significant
1672 bit of either OP0 or a word of OP0. */
1673 if (!MEM_P (op0)
1674 && !reverse
1675 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1676 && bitsize == GET_MODE_BITSIZE (mode1)
1677 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1679 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1680 bitnum / BITS_PER_UNIT);
1681 if (sub)
1682 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1685 /* Extraction of a full MODE1 value can be done with a load as long as
1686 the field is on a byte boundary and is sufficiently aligned. */
1687 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1689 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1690 if (reverse)
1691 op0 = flip_storage_order (mode1, op0);
1692 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1695 /* Handle fields bigger than a word. */
1697 if (bitsize > BITS_PER_WORD)
1699 /* Here we transfer the words of the field
1700 in the order least significant first.
1701 This is because the most significant word is the one which may
1702 be less than full. */
1704 const bool backwards = WORDS_BIG_ENDIAN;
1705 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1706 unsigned int i;
1707 rtx_insn *last;
1709 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1710 target = gen_reg_rtx (mode);
1712 /* In case we're about to clobber a base register or something
1713 (see gcc.c-torture/execute/20040625-1.c). */
1714 if (reg_mentioned_p (target, str_rtx))
1715 target = gen_reg_rtx (mode);
1717 /* Indicate for flow that the entire target reg is being set. */
1718 emit_clobber (target);
1720 last = get_last_insn ();
1721 for (i = 0; i < nwords; i++)
1723 /* If I is 0, use the low-order word in both field and target;
1724 if I is 1, use the next to lowest word; and so on. */
1725 /* Word number in TARGET to use. */
1726 unsigned int wordnum
1727 = (backwards
1728 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1729 : i);
1730 /* Offset from start of field in OP0. */
1731 unsigned int bit_offset = (backwards ^ reverse
1732 ? MAX ((int) bitsize - ((int) i + 1)
1733 * BITS_PER_WORD,
1735 : (int) i * BITS_PER_WORD);
1736 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1737 rtx result_part
1738 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1739 bitsize - i * BITS_PER_WORD),
1740 bitnum + bit_offset, 1, target_part,
1741 mode, word_mode, reverse, fallback_p, NULL);
1743 gcc_assert (target_part);
1744 if (!result_part)
1746 delete_insns_since (last);
1747 return NULL;
1750 if (result_part != target_part)
1751 emit_move_insn (target_part, result_part);
1754 if (unsignedp)
1756 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1757 need to be zero'd out. */
1758 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1760 unsigned int i, total_words;
1762 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1763 for (i = nwords; i < total_words; i++)
1764 emit_move_insn
1765 (operand_subword (target,
1766 backwards ? total_words - i - 1 : i,
1767 1, VOIDmode),
1768 const0_rtx);
1770 return target;
1773 /* Signed bit field: sign-extend with two arithmetic shifts. */
1774 target = expand_shift (LSHIFT_EXPR, mode, target,
1775 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1776 return expand_shift (RSHIFT_EXPR, mode, target,
1777 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1780 /* If OP0 is a multi-word register, narrow it to the affected word.
1781 If the region spans two words, defer to extract_split_bit_field. */
1782 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1784 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1786 if (!fallback_p)
1787 return NULL_RTX;
1788 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1789 reverse);
1790 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1792 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1793 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1794 bitnum %= BITS_PER_WORD;
1797 /* From here on we know the desired field is smaller than a word.
1798 If OP0 is a register, it too fits within a word. */
1799 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1800 extraction_insn extv;
1801 if (!MEM_P (op0)
1802 && !reverse
1803 /* ??? We could limit the structure size to the part of OP0 that
1804 contains the field, with appropriate checks for endianness
1805 and TRULY_NOOP_TRUNCATION. */
1806 && get_best_reg_extraction_insn (&extv, pattern,
1807 GET_MODE_BITSIZE (GET_MODE (op0)),
1808 tmode))
1810 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1811 unsignedp, target, mode,
1812 tmode);
1813 if (result)
1814 return result;
1817 /* If OP0 is a memory, try copying it to a register and seeing if a
1818 cheap register alternative is available. */
1819 if (MEM_P (op0) & !reverse)
1821 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1822 tmode))
1824 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1825 bitnum, unsignedp,
1826 target, mode,
1827 tmode);
1828 if (result)
1829 return result;
1832 rtx_insn *last = get_last_insn ();
1834 /* Try loading part of OP0 into a register and extracting the
1835 bitfield from that. */
1836 unsigned HOST_WIDE_INT bitpos;
1837 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1838 0, 0, tmode, &bitpos);
1839 if (xop0)
1841 xop0 = copy_to_reg (xop0);
1842 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1843 unsignedp, target,
1844 mode, tmode, reverse, false, NULL);
1845 if (result)
1846 return result;
1847 delete_insns_since (last);
1851 if (!fallback_p)
1852 return NULL;
1854 /* Find a correspondingly-sized integer field, so we can apply
1855 shifts and masks to it. */
1856 int_mode = int_mode_for_mode (tmode);
1857 if (int_mode == BLKmode)
1858 int_mode = int_mode_for_mode (mode);
1859 /* Should probably push op0 out to memory and then do a load. */
1860 gcc_assert (int_mode != BLKmode);
1862 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1863 unsignedp, reverse);
1865 /* Complex values must be reversed piecewise, so we need to undo the global
1866 reversal, convert to the complex mode and reverse again. */
1867 if (reverse && COMPLEX_MODE_P (tmode))
1869 target = flip_storage_order (int_mode, target);
1870 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1871 target = flip_storage_order (tmode, target);
1873 else
1874 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1876 return target;
1879 /* Generate code to extract a byte-field from STR_RTX
1880 containing BITSIZE bits, starting at BITNUM,
1881 and put it in TARGET if possible (if TARGET is nonzero).
1882 Regardless of TARGET, we return the rtx for where the value is placed.
1884 STR_RTX is the structure containing the byte (a REG or MEM).
1885 UNSIGNEDP is nonzero if this is an unsigned bit field.
1886 MODE is the natural mode of the field value once extracted.
1887 TMODE is the mode the caller would like the value to have;
1888 but the value may be returned with type MODE instead.
1890 If REVERSE is true, the extraction is to be done in reverse order.
1892 If a TARGET is specified and we can store in it at no extra cost,
1893 we do so, and return TARGET.
1894 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1895 if they are equally easy. */
1898 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1899 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1900 machine_mode mode, machine_mode tmode, bool reverse,
1901 rtx *alt_rtl)
1903 machine_mode mode1;
1905 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1906 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1907 mode1 = GET_MODE (str_rtx);
1908 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1909 mode1 = GET_MODE (target);
1910 else
1911 mode1 = tmode;
1913 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1915 /* Extraction of a full MODE1 value can be done with a simple load.
1916 We know here that the field can be accessed with one single
1917 instruction. For targets that support unaligned memory,
1918 an unaligned access may be necessary. */
1919 if (bitsize == GET_MODE_BITSIZE (mode1))
1921 rtx result = adjust_bitfield_address (str_rtx, mode1,
1922 bitnum / BITS_PER_UNIT);
1923 if (reverse)
1924 result = flip_storage_order (mode1, result);
1925 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1926 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1929 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1930 &bitnum);
1931 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1932 str_rtx = copy_to_reg (str_rtx);
1935 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1936 target, mode, tmode, reverse, true, alt_rtl);
1939 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1940 from bit BITNUM of OP0.
1942 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1943 If REVERSE is true, the extraction is to be done in reverse order.
1945 If TARGET is nonzero, attempts to store the value there
1946 and return TARGET, but this is not guaranteed.
1947 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1949 static rtx
1950 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1951 unsigned HOST_WIDE_INT bitsize,
1952 unsigned HOST_WIDE_INT bitnum, rtx target,
1953 int unsignedp, bool reverse)
1955 if (MEM_P (op0))
1957 machine_mode mode
1958 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1959 MEM_VOLATILE_P (op0));
1961 if (mode == VOIDmode)
1962 /* The only way this should occur is if the field spans word
1963 boundaries. */
1964 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1965 reverse);
1967 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1970 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1971 target, unsignedp, reverse);
1974 /* Helper function for extract_fixed_bit_field, extracts
1975 the bit field always using the MODE of OP0. */
1977 static rtx
1978 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1979 unsigned HOST_WIDE_INT bitsize,
1980 unsigned HOST_WIDE_INT bitnum, rtx target,
1981 int unsignedp, bool reverse)
1983 machine_mode mode = GET_MODE (op0);
1984 gcc_assert (SCALAR_INT_MODE_P (mode));
1986 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1987 for invalid input, such as extract equivalent of f5 from
1988 gcc.dg/pr48335-2.c. */
1990 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1991 /* BITNUM is the distance between our msb and that of OP0.
1992 Convert it to the distance from the lsb. */
1993 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1995 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1996 We have reduced the big-endian case to the little-endian case. */
1997 if (reverse)
1998 op0 = flip_storage_order (mode, op0);
2000 if (unsignedp)
2002 if (bitnum)
2004 /* If the field does not already start at the lsb,
2005 shift it so it does. */
2006 /* Maybe propagate the target for the shift. */
2007 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2008 if (tmode != mode)
2009 subtarget = 0;
2010 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2012 /* Convert the value to the desired mode. */
2013 if (mode != tmode)
2014 op0 = convert_to_mode (tmode, op0, 1);
2016 /* Unless the msb of the field used to be the msb when we shifted,
2017 mask out the upper bits. */
2019 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2020 return expand_binop (GET_MODE (op0), and_optab, op0,
2021 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2022 target, 1, OPTAB_LIB_WIDEN);
2023 return op0;
2026 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2027 then arithmetic-shift its lsb to the lsb of the word. */
2028 op0 = force_reg (mode, op0);
2030 /* Find the narrowest integer mode that contains the field. */
2032 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2033 mode = GET_MODE_WIDER_MODE (mode))
2034 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2036 op0 = convert_to_mode (mode, op0, 0);
2037 break;
2040 if (mode != tmode)
2041 target = 0;
2043 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2045 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2046 /* Maybe propagate the target for the shift. */
2047 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2048 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2051 return expand_shift (RSHIFT_EXPR, mode, op0,
2052 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2055 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2056 VALUE << BITPOS. */
2058 static rtx
2059 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2060 int bitpos)
2062 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2065 /* Extract a bit field that is split across two words
2066 and return an RTX for the result.
2068 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2069 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2070 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2072 If REVERSE is true, the extraction is to be done in reverse order. */
2074 static rtx
2075 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2076 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2077 bool reverse)
2079 unsigned int unit;
2080 unsigned int bitsdone = 0;
2081 rtx result = NULL_RTX;
2082 int first = 1;
2084 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2085 much at a time. */
2086 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2087 unit = BITS_PER_WORD;
2088 else
2089 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2091 while (bitsdone < bitsize)
2093 unsigned HOST_WIDE_INT thissize;
2094 rtx part, word;
2095 unsigned HOST_WIDE_INT thispos;
2096 unsigned HOST_WIDE_INT offset;
2098 offset = (bitpos + bitsdone) / unit;
2099 thispos = (bitpos + bitsdone) % unit;
2101 /* THISSIZE must not overrun a word boundary. Otherwise,
2102 extract_fixed_bit_field will call us again, and we will mutually
2103 recurse forever. */
2104 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2105 thissize = MIN (thissize, unit - thispos);
2107 /* If OP0 is a register, then handle OFFSET here. */
2108 if (SUBREG_P (op0) || REG_P (op0))
2110 word = operand_subword_force (op0, offset, GET_MODE (op0));
2111 offset = 0;
2113 else
2114 word = op0;
2116 /* Extract the parts in bit-counting order,
2117 whose meaning is determined by BYTES_PER_UNIT.
2118 OFFSET is in UNITs, and UNIT is in bits. */
2119 part = extract_fixed_bit_field (word_mode, word, thissize,
2120 offset * unit + thispos, 0, 1, reverse);
2121 bitsdone += thissize;
2123 /* Shift this part into place for the result. */
2124 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2126 if (bitsize != bitsdone)
2127 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2128 bitsize - bitsdone, 0, 1);
2130 else
2132 if (bitsdone != thissize)
2133 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2134 bitsdone - thissize, 0, 1);
2137 if (first)
2138 result = part;
2139 else
2140 /* Combine the parts with bitwise or. This works
2141 because we extracted each part as an unsigned bit field. */
2142 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2143 OPTAB_LIB_WIDEN);
2145 first = 0;
2148 /* Unsigned bit field: we are done. */
2149 if (unsignedp)
2150 return result;
2151 /* Signed bit field: sign-extend with two arithmetic shifts. */
2152 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2153 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2154 return expand_shift (RSHIFT_EXPR, word_mode, result,
2155 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2158 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2159 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2160 MODE, fill the upper bits with zeros. Fail if the layout of either
2161 mode is unknown (as for CC modes) or if the extraction would involve
2162 unprofitable mode punning. Return the value on success, otherwise
2163 return null.
2165 This is different from gen_lowpart* in these respects:
2167 - the returned value must always be considered an rvalue
2169 - when MODE is wider than SRC_MODE, the extraction involves
2170 a zero extension
2172 - when MODE is smaller than SRC_MODE, the extraction involves
2173 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2175 In other words, this routine performs a computation, whereas the
2176 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2177 operations. */
2180 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2182 machine_mode int_mode, src_int_mode;
2184 if (mode == src_mode)
2185 return src;
2187 if (CONSTANT_P (src))
2189 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2190 fails, it will happily create (subreg (symbol_ref)) or similar
2191 invalid SUBREGs. */
2192 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2193 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2194 if (ret)
2195 return ret;
2197 if (GET_MODE (src) == VOIDmode
2198 || !validate_subreg (mode, src_mode, src, byte))
2199 return NULL_RTX;
2201 src = force_reg (GET_MODE (src), src);
2202 return gen_rtx_SUBREG (mode, src, byte);
2205 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2206 return NULL_RTX;
2208 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2209 && MODES_TIEABLE_P (mode, src_mode))
2211 rtx x = gen_lowpart_common (mode, src);
2212 if (x)
2213 return x;
2216 src_int_mode = int_mode_for_mode (src_mode);
2217 int_mode = int_mode_for_mode (mode);
2218 if (src_int_mode == BLKmode || int_mode == BLKmode)
2219 return NULL_RTX;
2221 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2222 return NULL_RTX;
2223 if (!MODES_TIEABLE_P (int_mode, mode))
2224 return NULL_RTX;
2226 src = gen_lowpart (src_int_mode, src);
2227 src = convert_modes (int_mode, src_int_mode, src, true);
2228 src = gen_lowpart (mode, src);
2229 return src;
2232 /* Add INC into TARGET. */
2234 void
2235 expand_inc (rtx target, rtx inc)
2237 rtx value = expand_binop (GET_MODE (target), add_optab,
2238 target, inc,
2239 target, 0, OPTAB_LIB_WIDEN);
2240 if (value != target)
2241 emit_move_insn (target, value);
2244 /* Subtract DEC from TARGET. */
2246 void
2247 expand_dec (rtx target, rtx dec)
2249 rtx value = expand_binop (GET_MODE (target), sub_optab,
2250 target, dec,
2251 target, 0, OPTAB_LIB_WIDEN);
2252 if (value != target)
2253 emit_move_insn (target, value);
2256 /* Output a shift instruction for expression code CODE,
2257 with SHIFTED being the rtx for the value to shift,
2258 and AMOUNT the rtx for the amount to shift by.
2259 Store the result in the rtx TARGET, if that is convenient.
2260 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2261 Return the rtx for where the value is.
2262 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2263 in which case 0 is returned. */
2265 static rtx
2266 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2267 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2269 rtx op1, temp = 0;
2270 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2271 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2272 optab lshift_optab = ashl_optab;
2273 optab rshift_arith_optab = ashr_optab;
2274 optab rshift_uns_optab = lshr_optab;
2275 optab lrotate_optab = rotl_optab;
2276 optab rrotate_optab = rotr_optab;
2277 machine_mode op1_mode;
2278 machine_mode scalar_mode = mode;
2279 int attempt;
2280 bool speed = optimize_insn_for_speed_p ();
2282 if (VECTOR_MODE_P (mode))
2283 scalar_mode = GET_MODE_INNER (mode);
2284 op1 = amount;
2285 op1_mode = GET_MODE (op1);
2287 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2288 shift amount is a vector, use the vector/vector shift patterns. */
2289 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2291 lshift_optab = vashl_optab;
2292 rshift_arith_optab = vashr_optab;
2293 rshift_uns_optab = vlshr_optab;
2294 lrotate_optab = vrotl_optab;
2295 rrotate_optab = vrotr_optab;
2298 /* Previously detected shift-counts computed by NEGATE_EXPR
2299 and shifted in the other direction; but that does not work
2300 on all machines. */
2302 if (SHIFT_COUNT_TRUNCATED)
2304 if (CONST_INT_P (op1)
2305 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2306 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2307 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2308 % GET_MODE_BITSIZE (scalar_mode));
2309 else if (GET_CODE (op1) == SUBREG
2310 && subreg_lowpart_p (op1)
2311 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2312 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2313 op1 = SUBREG_REG (op1);
2316 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2317 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2318 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2319 amount instead. */
2320 if (rotate
2321 && CONST_INT_P (op1)
2322 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2323 GET_MODE_BITSIZE (scalar_mode) - 1))
2325 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2326 left = !left;
2327 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2330 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2331 Note that this is not the case for bigger values. For instance a rotation
2332 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2333 0x04030201 (bswapsi). */
2334 if (rotate
2335 && CONST_INT_P (op1)
2336 && INTVAL (op1) == BITS_PER_UNIT
2337 && GET_MODE_SIZE (scalar_mode) == 2
2338 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2339 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2340 unsignedp);
2342 if (op1 == const0_rtx)
2343 return shifted;
2345 /* Check whether its cheaper to implement a left shift by a constant
2346 bit count by a sequence of additions. */
2347 if (code == LSHIFT_EXPR
2348 && CONST_INT_P (op1)
2349 && INTVAL (op1) > 0
2350 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2351 && INTVAL (op1) < MAX_BITS_PER_WORD
2352 && (shift_cost (speed, mode, INTVAL (op1))
2353 > INTVAL (op1) * add_cost (speed, mode))
2354 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2356 int i;
2357 for (i = 0; i < INTVAL (op1); i++)
2359 temp = force_reg (mode, shifted);
2360 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2361 unsignedp, OPTAB_LIB_WIDEN);
2363 return shifted;
2366 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2368 enum optab_methods methods;
2370 if (attempt == 0)
2371 methods = OPTAB_DIRECT;
2372 else if (attempt == 1)
2373 methods = OPTAB_WIDEN;
2374 else
2375 methods = OPTAB_LIB_WIDEN;
2377 if (rotate)
2379 /* Widening does not work for rotation. */
2380 if (methods == OPTAB_WIDEN)
2381 continue;
2382 else if (methods == OPTAB_LIB_WIDEN)
2384 /* If we have been unable to open-code this by a rotation,
2385 do it as the IOR of two shifts. I.e., to rotate A
2386 by N bits, compute
2387 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2388 where C is the bitsize of A.
2390 It is theoretically possible that the target machine might
2391 not be able to perform either shift and hence we would
2392 be making two libcalls rather than just the one for the
2393 shift (similarly if IOR could not be done). We will allow
2394 this extremely unlikely lossage to avoid complicating the
2395 code below. */
2397 rtx subtarget = target == shifted ? 0 : target;
2398 rtx new_amount, other_amount;
2399 rtx temp1;
2401 new_amount = op1;
2402 if (op1 == const0_rtx)
2403 return shifted;
2404 else if (CONST_INT_P (op1))
2405 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2406 - INTVAL (op1));
2407 else
2409 other_amount
2410 = simplify_gen_unary (NEG, GET_MODE (op1),
2411 op1, GET_MODE (op1));
2412 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2413 other_amount
2414 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2415 gen_int_mode (mask, GET_MODE (op1)));
2418 shifted = force_reg (mode, shifted);
2420 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2421 mode, shifted, new_amount, 0, 1);
2422 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2423 mode, shifted, other_amount,
2424 subtarget, 1);
2425 return expand_binop (mode, ior_optab, temp, temp1, target,
2426 unsignedp, methods);
2429 temp = expand_binop (mode,
2430 left ? lrotate_optab : rrotate_optab,
2431 shifted, op1, target, unsignedp, methods);
2433 else if (unsignedp)
2434 temp = expand_binop (mode,
2435 left ? lshift_optab : rshift_uns_optab,
2436 shifted, op1, target, unsignedp, methods);
2438 /* Do arithmetic shifts.
2439 Also, if we are going to widen the operand, we can just as well
2440 use an arithmetic right-shift instead of a logical one. */
2441 if (temp == 0 && ! rotate
2442 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2444 enum optab_methods methods1 = methods;
2446 /* If trying to widen a log shift to an arithmetic shift,
2447 don't accept an arithmetic shift of the same size. */
2448 if (unsignedp)
2449 methods1 = OPTAB_MUST_WIDEN;
2451 /* Arithmetic shift */
2453 temp = expand_binop (mode,
2454 left ? lshift_optab : rshift_arith_optab,
2455 shifted, op1, target, unsignedp, methods1);
2458 /* We used to try extzv here for logical right shifts, but that was
2459 only useful for one machine, the VAX, and caused poor code
2460 generation there for lshrdi3, so the code was deleted and a
2461 define_expand for lshrsi3 was added to vax.md. */
2464 gcc_assert (temp != NULL_RTX || may_fail);
2465 return temp;
2468 /* Output a shift instruction for expression code CODE,
2469 with SHIFTED being the rtx for the value to shift,
2470 and AMOUNT the amount to shift by.
2471 Store the result in the rtx TARGET, if that is convenient.
2472 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2473 Return the rtx for where the value is. */
2476 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2477 int amount, rtx target, int unsignedp)
2479 return expand_shift_1 (code, mode,
2480 shifted, GEN_INT (amount), target, unsignedp);
2483 /* Likewise, but return 0 if that cannot be done. */
2485 static rtx
2486 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2487 int amount, rtx target, int unsignedp)
2489 return expand_shift_1 (code, mode,
2490 shifted, GEN_INT (amount), target, unsignedp, true);
2493 /* Output a shift instruction for expression code CODE,
2494 with SHIFTED being the rtx for the value to shift,
2495 and AMOUNT the tree for the amount to shift by.
2496 Store the result in the rtx TARGET, if that is convenient.
2497 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2498 Return the rtx for where the value is. */
2501 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2502 tree amount, rtx target, int unsignedp)
2504 return expand_shift_1 (code, mode,
2505 shifted, expand_normal (amount), target, unsignedp);
2509 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2510 const struct mult_cost *, machine_mode mode);
2511 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2512 const struct algorithm *, enum mult_variant);
2513 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2514 static rtx extract_high_half (machine_mode, rtx);
2515 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2516 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2517 int, int);
2518 /* Compute and return the best algorithm for multiplying by T.
2519 The algorithm must cost less than cost_limit
2520 If retval.cost >= COST_LIMIT, no algorithm was found and all
2521 other field of the returned struct are undefined.
2522 MODE is the machine mode of the multiplication. */
2524 static void
2525 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2526 const struct mult_cost *cost_limit, machine_mode mode)
2528 int m;
2529 struct algorithm *alg_in, *best_alg;
2530 struct mult_cost best_cost;
2531 struct mult_cost new_limit;
2532 int op_cost, op_latency;
2533 unsigned HOST_WIDE_INT orig_t = t;
2534 unsigned HOST_WIDE_INT q;
2535 int maxm, hash_index;
2536 bool cache_hit = false;
2537 enum alg_code cache_alg = alg_zero;
2538 bool speed = optimize_insn_for_speed_p ();
2539 machine_mode imode;
2540 struct alg_hash_entry *entry_ptr;
2542 /* Indicate that no algorithm is yet found. If no algorithm
2543 is found, this value will be returned and indicate failure. */
2544 alg_out->cost.cost = cost_limit->cost + 1;
2545 alg_out->cost.latency = cost_limit->latency + 1;
2547 if (cost_limit->cost < 0
2548 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2549 return;
2551 /* Be prepared for vector modes. */
2552 imode = GET_MODE_INNER (mode);
2554 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2556 /* Restrict the bits of "t" to the multiplication's mode. */
2557 t &= GET_MODE_MASK (imode);
2559 /* t == 1 can be done in zero cost. */
2560 if (t == 1)
2562 alg_out->ops = 1;
2563 alg_out->cost.cost = 0;
2564 alg_out->cost.latency = 0;
2565 alg_out->op[0] = alg_m;
2566 return;
2569 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2570 fail now. */
2571 if (t == 0)
2573 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2574 return;
2575 else
2577 alg_out->ops = 1;
2578 alg_out->cost.cost = zero_cost (speed);
2579 alg_out->cost.latency = zero_cost (speed);
2580 alg_out->op[0] = alg_zero;
2581 return;
2585 /* We'll be needing a couple extra algorithm structures now. */
2587 alg_in = XALLOCA (struct algorithm);
2588 best_alg = XALLOCA (struct algorithm);
2589 best_cost = *cost_limit;
2591 /* Compute the hash index. */
2592 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2594 /* See if we already know what to do for T. */
2595 entry_ptr = alg_hash_entry_ptr (hash_index);
2596 if (entry_ptr->t == t
2597 && entry_ptr->mode == mode
2598 && entry_ptr->speed == speed
2599 && entry_ptr->alg != alg_unknown)
2601 cache_alg = entry_ptr->alg;
2603 if (cache_alg == alg_impossible)
2605 /* The cache tells us that it's impossible to synthesize
2606 multiplication by T within entry_ptr->cost. */
2607 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2608 /* COST_LIMIT is at least as restrictive as the one
2609 recorded in the hash table, in which case we have no
2610 hope of synthesizing a multiplication. Just
2611 return. */
2612 return;
2614 /* If we get here, COST_LIMIT is less restrictive than the
2615 one recorded in the hash table, so we may be able to
2616 synthesize a multiplication. Proceed as if we didn't
2617 have the cache entry. */
2619 else
2621 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2622 /* The cached algorithm shows that this multiplication
2623 requires more cost than COST_LIMIT. Just return. This
2624 way, we don't clobber this cache entry with
2625 alg_impossible but retain useful information. */
2626 return;
2628 cache_hit = true;
2630 switch (cache_alg)
2632 case alg_shift:
2633 goto do_alg_shift;
2635 case alg_add_t_m2:
2636 case alg_sub_t_m2:
2637 goto do_alg_addsub_t_m2;
2639 case alg_add_factor:
2640 case alg_sub_factor:
2641 goto do_alg_addsub_factor;
2643 case alg_add_t2_m:
2644 goto do_alg_add_t2_m;
2646 case alg_sub_t2_m:
2647 goto do_alg_sub_t2_m;
2649 default:
2650 gcc_unreachable ();
2655 /* If we have a group of zero bits at the low-order part of T, try
2656 multiplying by the remaining bits and then doing a shift. */
2658 if ((t & 1) == 0)
2660 do_alg_shift:
2661 m = ctz_or_zero (t); /* m = number of low zero bits */
2662 if (m < maxm)
2664 q = t >> m;
2665 /* The function expand_shift will choose between a shift and
2666 a sequence of additions, so the observed cost is given as
2667 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2668 op_cost = m * add_cost (speed, mode);
2669 if (shift_cost (speed, mode, m) < op_cost)
2670 op_cost = shift_cost (speed, mode, m);
2671 new_limit.cost = best_cost.cost - op_cost;
2672 new_limit.latency = best_cost.latency - op_cost;
2673 synth_mult (alg_in, q, &new_limit, mode);
2675 alg_in->cost.cost += op_cost;
2676 alg_in->cost.latency += op_cost;
2677 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2679 best_cost = alg_in->cost;
2680 std::swap (alg_in, best_alg);
2681 best_alg->log[best_alg->ops] = m;
2682 best_alg->op[best_alg->ops] = alg_shift;
2685 /* See if treating ORIG_T as a signed number yields a better
2686 sequence. Try this sequence only for a negative ORIG_T
2687 as it would be useless for a non-negative ORIG_T. */
2688 if ((HOST_WIDE_INT) orig_t < 0)
2690 /* Shift ORIG_T as follows because a right shift of a
2691 negative-valued signed type is implementation
2692 defined. */
2693 q = ~(~orig_t >> m);
2694 /* The function expand_shift will choose between a shift
2695 and a sequence of additions, so the observed cost is
2696 given as MIN (m * add_cost(speed, mode),
2697 shift_cost(speed, mode, m)). */
2698 op_cost = m * add_cost (speed, mode);
2699 if (shift_cost (speed, mode, m) < op_cost)
2700 op_cost = shift_cost (speed, mode, m);
2701 new_limit.cost = best_cost.cost - op_cost;
2702 new_limit.latency = best_cost.latency - op_cost;
2703 synth_mult (alg_in, q, &new_limit, mode);
2705 alg_in->cost.cost += op_cost;
2706 alg_in->cost.latency += op_cost;
2707 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2709 best_cost = alg_in->cost;
2710 std::swap (alg_in, best_alg);
2711 best_alg->log[best_alg->ops] = m;
2712 best_alg->op[best_alg->ops] = alg_shift;
2716 if (cache_hit)
2717 goto done;
2720 /* If we have an odd number, add or subtract one. */
2721 if ((t & 1) != 0)
2723 unsigned HOST_WIDE_INT w;
2725 do_alg_addsub_t_m2:
2726 for (w = 1; (w & t) != 0; w <<= 1)
2728 /* If T was -1, then W will be zero after the loop. This is another
2729 case where T ends with ...111. Handling this with (T + 1) and
2730 subtract 1 produces slightly better code and results in algorithm
2731 selection much faster than treating it like the ...0111 case
2732 below. */
2733 if (w == 0
2734 || (w > 2
2735 /* Reject the case where t is 3.
2736 Thus we prefer addition in that case. */
2737 && t != 3))
2739 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2741 op_cost = add_cost (speed, mode);
2742 new_limit.cost = best_cost.cost - op_cost;
2743 new_limit.latency = best_cost.latency - op_cost;
2744 synth_mult (alg_in, t + 1, &new_limit, mode);
2746 alg_in->cost.cost += op_cost;
2747 alg_in->cost.latency += op_cost;
2748 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2750 best_cost = alg_in->cost;
2751 std::swap (alg_in, best_alg);
2752 best_alg->log[best_alg->ops] = 0;
2753 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2756 else
2758 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2760 op_cost = add_cost (speed, mode);
2761 new_limit.cost = best_cost.cost - op_cost;
2762 new_limit.latency = best_cost.latency - op_cost;
2763 synth_mult (alg_in, t - 1, &new_limit, mode);
2765 alg_in->cost.cost += op_cost;
2766 alg_in->cost.latency += op_cost;
2767 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2769 best_cost = alg_in->cost;
2770 std::swap (alg_in, best_alg);
2771 best_alg->log[best_alg->ops] = 0;
2772 best_alg->op[best_alg->ops] = alg_add_t_m2;
2776 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2777 quickly with a - a * n for some appropriate constant n. */
2778 m = exact_log2 (-orig_t + 1);
2779 if (m >= 0 && m < maxm)
2781 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2782 /* If the target has a cheap shift-and-subtract insn use
2783 that in preference to a shift insn followed by a sub insn.
2784 Assume that the shift-and-sub is "atomic" with a latency
2785 equal to it's cost, otherwise assume that on superscalar
2786 hardware the shift may be executed concurrently with the
2787 earlier steps in the algorithm. */
2788 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2790 op_cost = shiftsub1_cost (speed, mode, m);
2791 op_latency = op_cost;
2793 else
2794 op_latency = add_cost (speed, mode);
2796 new_limit.cost = best_cost.cost - op_cost;
2797 new_limit.latency = best_cost.latency - op_latency;
2798 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2799 &new_limit, mode);
2801 alg_in->cost.cost += op_cost;
2802 alg_in->cost.latency += op_latency;
2803 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2805 best_cost = alg_in->cost;
2806 std::swap (alg_in, best_alg);
2807 best_alg->log[best_alg->ops] = m;
2808 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2812 if (cache_hit)
2813 goto done;
2816 /* Look for factors of t of the form
2817 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2818 If we find such a factor, we can multiply by t using an algorithm that
2819 multiplies by q, shift the result by m and add/subtract it to itself.
2821 We search for large factors first and loop down, even if large factors
2822 are less probable than small; if we find a large factor we will find a
2823 good sequence quickly, and therefore be able to prune (by decreasing
2824 COST_LIMIT) the search. */
2826 do_alg_addsub_factor:
2827 for (m = floor_log2 (t - 1); m >= 2; m--)
2829 unsigned HOST_WIDE_INT d;
2831 d = (HOST_WIDE_INT_1U << m) + 1;
2832 if (t % d == 0 && t > d && m < maxm
2833 && (!cache_hit || cache_alg == alg_add_factor))
2835 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2836 if (shiftadd_cost (speed, mode, m) <= op_cost)
2837 op_cost = shiftadd_cost (speed, mode, m);
2839 op_latency = op_cost;
2842 new_limit.cost = best_cost.cost - op_cost;
2843 new_limit.latency = best_cost.latency - op_latency;
2844 synth_mult (alg_in, t / d, &new_limit, mode);
2846 alg_in->cost.cost += op_cost;
2847 alg_in->cost.latency += op_latency;
2848 if (alg_in->cost.latency < op_cost)
2849 alg_in->cost.latency = op_cost;
2850 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2852 best_cost = alg_in->cost;
2853 std::swap (alg_in, best_alg);
2854 best_alg->log[best_alg->ops] = m;
2855 best_alg->op[best_alg->ops] = alg_add_factor;
2857 /* Other factors will have been taken care of in the recursion. */
2858 break;
2861 d = (HOST_WIDE_INT_1U << m) - 1;
2862 if (t % d == 0 && t > d && m < maxm
2863 && (!cache_hit || cache_alg == alg_sub_factor))
2865 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2866 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2867 op_cost = shiftsub0_cost (speed, mode, m);
2869 op_latency = op_cost;
2871 new_limit.cost = best_cost.cost - op_cost;
2872 new_limit.latency = best_cost.latency - op_latency;
2873 synth_mult (alg_in, t / d, &new_limit, mode);
2875 alg_in->cost.cost += op_cost;
2876 alg_in->cost.latency += op_latency;
2877 if (alg_in->cost.latency < op_cost)
2878 alg_in->cost.latency = op_cost;
2879 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2881 best_cost = alg_in->cost;
2882 std::swap (alg_in, best_alg);
2883 best_alg->log[best_alg->ops] = m;
2884 best_alg->op[best_alg->ops] = alg_sub_factor;
2886 break;
2889 if (cache_hit)
2890 goto done;
2892 /* Try shift-and-add (load effective address) instructions,
2893 i.e. do a*3, a*5, a*9. */
2894 if ((t & 1) != 0)
2896 do_alg_add_t2_m:
2897 q = t - 1;
2898 m = ctz_hwi (q);
2899 if (q && m < maxm)
2901 op_cost = shiftadd_cost (speed, mode, m);
2902 new_limit.cost = best_cost.cost - op_cost;
2903 new_limit.latency = best_cost.latency - op_cost;
2904 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2906 alg_in->cost.cost += op_cost;
2907 alg_in->cost.latency += op_cost;
2908 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2910 best_cost = alg_in->cost;
2911 std::swap (alg_in, best_alg);
2912 best_alg->log[best_alg->ops] = m;
2913 best_alg->op[best_alg->ops] = alg_add_t2_m;
2916 if (cache_hit)
2917 goto done;
2919 do_alg_sub_t2_m:
2920 q = t + 1;
2921 m = ctz_hwi (q);
2922 if (q && m < maxm)
2924 op_cost = shiftsub0_cost (speed, mode, m);
2925 new_limit.cost = best_cost.cost - op_cost;
2926 new_limit.latency = best_cost.latency - op_cost;
2927 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2929 alg_in->cost.cost += op_cost;
2930 alg_in->cost.latency += op_cost;
2931 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2933 best_cost = alg_in->cost;
2934 std::swap (alg_in, best_alg);
2935 best_alg->log[best_alg->ops] = m;
2936 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2939 if (cache_hit)
2940 goto done;
2943 done:
2944 /* If best_cost has not decreased, we have not found any algorithm. */
2945 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2947 /* We failed to find an algorithm. Record alg_impossible for
2948 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2949 we are asked to find an algorithm for T within the same or
2950 lower COST_LIMIT, we can immediately return to the
2951 caller. */
2952 entry_ptr->t = t;
2953 entry_ptr->mode = mode;
2954 entry_ptr->speed = speed;
2955 entry_ptr->alg = alg_impossible;
2956 entry_ptr->cost = *cost_limit;
2957 return;
2960 /* Cache the result. */
2961 if (!cache_hit)
2963 entry_ptr->t = t;
2964 entry_ptr->mode = mode;
2965 entry_ptr->speed = speed;
2966 entry_ptr->alg = best_alg->op[best_alg->ops];
2967 entry_ptr->cost.cost = best_cost.cost;
2968 entry_ptr->cost.latency = best_cost.latency;
2971 /* If we are getting a too long sequence for `struct algorithm'
2972 to record, make this search fail. */
2973 if (best_alg->ops == MAX_BITS_PER_WORD)
2974 return;
2976 /* Copy the algorithm from temporary space to the space at alg_out.
2977 We avoid using structure assignment because the majority of
2978 best_alg is normally undefined, and this is a critical function. */
2979 alg_out->ops = best_alg->ops + 1;
2980 alg_out->cost = best_cost;
2981 memcpy (alg_out->op, best_alg->op,
2982 alg_out->ops * sizeof *alg_out->op);
2983 memcpy (alg_out->log, best_alg->log,
2984 alg_out->ops * sizeof *alg_out->log);
2987 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2988 Try three variations:
2990 - a shift/add sequence based on VAL itself
2991 - a shift/add sequence based on -VAL, followed by a negation
2992 - a shift/add sequence based on VAL - 1, followed by an addition.
2994 Return true if the cheapest of these cost less than MULT_COST,
2995 describing the algorithm in *ALG and final fixup in *VARIANT. */
2997 bool
2998 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2999 struct algorithm *alg, enum mult_variant *variant,
3000 int mult_cost)
3002 struct algorithm alg2;
3003 struct mult_cost limit;
3004 int op_cost;
3005 bool speed = optimize_insn_for_speed_p ();
3007 /* Fail quickly for impossible bounds. */
3008 if (mult_cost < 0)
3009 return false;
3011 /* Ensure that mult_cost provides a reasonable upper bound.
3012 Any constant multiplication can be performed with less
3013 than 2 * bits additions. */
3014 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3015 if (mult_cost > op_cost)
3016 mult_cost = op_cost;
3018 *variant = basic_variant;
3019 limit.cost = mult_cost;
3020 limit.latency = mult_cost;
3021 synth_mult (alg, val, &limit, mode);
3023 /* This works only if the inverted value actually fits in an
3024 `unsigned int' */
3025 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3027 op_cost = neg_cost (speed, mode);
3028 if (MULT_COST_LESS (&alg->cost, mult_cost))
3030 limit.cost = alg->cost.cost - op_cost;
3031 limit.latency = alg->cost.latency - op_cost;
3033 else
3035 limit.cost = mult_cost - op_cost;
3036 limit.latency = mult_cost - op_cost;
3039 synth_mult (&alg2, -val, &limit, mode);
3040 alg2.cost.cost += op_cost;
3041 alg2.cost.latency += op_cost;
3042 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3043 *alg = alg2, *variant = negate_variant;
3046 /* This proves very useful for division-by-constant. */
3047 op_cost = add_cost (speed, mode);
3048 if (MULT_COST_LESS (&alg->cost, mult_cost))
3050 limit.cost = alg->cost.cost - op_cost;
3051 limit.latency = alg->cost.latency - op_cost;
3053 else
3055 limit.cost = mult_cost - op_cost;
3056 limit.latency = mult_cost - op_cost;
3059 synth_mult (&alg2, val - 1, &limit, mode);
3060 alg2.cost.cost += op_cost;
3061 alg2.cost.latency += op_cost;
3062 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3063 *alg = alg2, *variant = add_variant;
3065 return MULT_COST_LESS (&alg->cost, mult_cost);
3068 /* A subroutine of expand_mult, used for constant multiplications.
3069 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3070 convenient. Use the shift/add sequence described by ALG and apply
3071 the final fixup specified by VARIANT. */
3073 static rtx
3074 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3075 rtx target, const struct algorithm *alg,
3076 enum mult_variant variant)
3078 unsigned HOST_WIDE_INT val_so_far;
3079 rtx_insn *insn;
3080 rtx accum, tem;
3081 int opno;
3082 machine_mode nmode;
3084 /* Avoid referencing memory over and over and invalid sharing
3085 on SUBREGs. */
3086 op0 = force_reg (mode, op0);
3088 /* ACCUM starts out either as OP0 or as a zero, depending on
3089 the first operation. */
3091 if (alg->op[0] == alg_zero)
3093 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3094 val_so_far = 0;
3096 else if (alg->op[0] == alg_m)
3098 accum = copy_to_mode_reg (mode, op0);
3099 val_so_far = 1;
3101 else
3102 gcc_unreachable ();
3104 for (opno = 1; opno < alg->ops; opno++)
3106 int log = alg->log[opno];
3107 rtx shift_subtarget = optimize ? 0 : accum;
3108 rtx add_target
3109 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3110 && !optimize)
3111 ? target : 0;
3112 rtx accum_target = optimize ? 0 : accum;
3113 rtx accum_inner;
3115 switch (alg->op[opno])
3117 case alg_shift:
3118 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3119 /* REG_EQUAL note will be attached to the following insn. */
3120 emit_move_insn (accum, tem);
3121 val_so_far <<= log;
3122 break;
3124 case alg_add_t_m2:
3125 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3126 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3127 add_target ? add_target : accum_target);
3128 val_so_far += HOST_WIDE_INT_1U << log;
3129 break;
3131 case alg_sub_t_m2:
3132 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3133 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3134 add_target ? add_target : accum_target);
3135 val_so_far -= HOST_WIDE_INT_1U << log;
3136 break;
3138 case alg_add_t2_m:
3139 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3140 log, shift_subtarget, 0);
3141 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3142 add_target ? add_target : accum_target);
3143 val_so_far = (val_so_far << log) + 1;
3144 break;
3146 case alg_sub_t2_m:
3147 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3148 log, shift_subtarget, 0);
3149 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3150 add_target ? add_target : accum_target);
3151 val_so_far = (val_so_far << log) - 1;
3152 break;
3154 case alg_add_factor:
3155 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3156 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3157 add_target ? add_target : accum_target);
3158 val_so_far += val_so_far << log;
3159 break;
3161 case alg_sub_factor:
3162 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3163 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3164 (add_target
3165 ? add_target : (optimize ? 0 : tem)));
3166 val_so_far = (val_so_far << log) - val_so_far;
3167 break;
3169 default:
3170 gcc_unreachable ();
3173 if (SCALAR_INT_MODE_P (mode))
3175 /* Write a REG_EQUAL note on the last insn so that we can cse
3176 multiplication sequences. Note that if ACCUM is a SUBREG,
3177 we've set the inner register and must properly indicate that. */
3178 tem = op0, nmode = mode;
3179 accum_inner = accum;
3180 if (GET_CODE (accum) == SUBREG)
3182 accum_inner = SUBREG_REG (accum);
3183 nmode = GET_MODE (accum_inner);
3184 tem = gen_lowpart (nmode, op0);
3187 insn = get_last_insn ();
3188 set_dst_reg_note (insn, REG_EQUAL,
3189 gen_rtx_MULT (nmode, tem,
3190 gen_int_mode (val_so_far, nmode)),
3191 accum_inner);
3195 if (variant == negate_variant)
3197 val_so_far = -val_so_far;
3198 accum = expand_unop (mode, neg_optab, accum, target, 0);
3200 else if (variant == add_variant)
3202 val_so_far = val_so_far + 1;
3203 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3206 /* Compare only the bits of val and val_so_far that are significant
3207 in the result mode, to avoid sign-/zero-extension confusion. */
3208 nmode = GET_MODE_INNER (mode);
3209 val &= GET_MODE_MASK (nmode);
3210 val_so_far &= GET_MODE_MASK (nmode);
3211 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3213 return accum;
3216 /* Perform a multiplication and return an rtx for the result.
3217 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3218 TARGET is a suggestion for where to store the result (an rtx).
3220 We check specially for a constant integer as OP1.
3221 If you want this check for OP0 as well, then before calling
3222 you should swap the two operands if OP0 would be constant. */
3225 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3226 int unsignedp)
3228 enum mult_variant variant;
3229 struct algorithm algorithm;
3230 rtx scalar_op1;
3231 int max_cost;
3232 bool speed = optimize_insn_for_speed_p ();
3233 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3235 if (CONSTANT_P (op0))
3236 std::swap (op0, op1);
3238 /* For vectors, there are several simplifications that can be made if
3239 all elements of the vector constant are identical. */
3240 scalar_op1 = unwrap_const_vec_duplicate (op1);
3242 if (INTEGRAL_MODE_P (mode))
3244 rtx fake_reg;
3245 HOST_WIDE_INT coeff;
3246 bool is_neg;
3247 int mode_bitsize;
3249 if (op1 == CONST0_RTX (mode))
3250 return op1;
3251 if (op1 == CONST1_RTX (mode))
3252 return op0;
3253 if (op1 == CONSTM1_RTX (mode))
3254 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3255 op0, target, 0);
3257 if (do_trapv)
3258 goto skip_synth;
3260 /* If mode is integer vector mode, check if the backend supports
3261 vector lshift (by scalar or vector) at all. If not, we can't use
3262 synthetized multiply. */
3263 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3264 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3265 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3266 goto skip_synth;
3268 /* These are the operations that are potentially turned into
3269 a sequence of shifts and additions. */
3270 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3272 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3273 less than or equal in size to `unsigned int' this doesn't matter.
3274 If the mode is larger than `unsigned int', then synth_mult works
3275 only if the constant value exactly fits in an `unsigned int' without
3276 any truncation. This means that multiplying by negative values does
3277 not work; results are off by 2^32 on a 32 bit machine. */
3278 if (CONST_INT_P (scalar_op1))
3280 coeff = INTVAL (scalar_op1);
3281 is_neg = coeff < 0;
3283 #if TARGET_SUPPORTS_WIDE_INT
3284 else if (CONST_WIDE_INT_P (scalar_op1))
3285 #else
3286 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3287 #endif
3289 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3290 /* Perfect power of 2 (other than 1, which is handled above). */
3291 if (shift > 0)
3292 return expand_shift (LSHIFT_EXPR, mode, op0,
3293 shift, target, unsignedp);
3294 else
3295 goto skip_synth;
3297 else
3298 goto skip_synth;
3300 /* We used to test optimize here, on the grounds that it's better to
3301 produce a smaller program when -O is not used. But this causes
3302 such a terrible slowdown sometimes that it seems better to always
3303 use synth_mult. */
3305 /* Special case powers of two. */
3306 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3307 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3308 return expand_shift (LSHIFT_EXPR, mode, op0,
3309 floor_log2 (coeff), target, unsignedp);
3311 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3313 /* Attempt to handle multiplication of DImode values by negative
3314 coefficients, by performing the multiplication by a positive
3315 multiplier and then inverting the result. */
3316 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3318 /* Its safe to use -coeff even for INT_MIN, as the
3319 result is interpreted as an unsigned coefficient.
3320 Exclude cost of op0 from max_cost to match the cost
3321 calculation of the synth_mult. */
3322 coeff = -(unsigned HOST_WIDE_INT) coeff;
3323 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3324 mode, speed)
3325 - neg_cost (speed, mode));
3326 if (max_cost <= 0)
3327 goto skip_synth;
3329 /* Special case powers of two. */
3330 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3332 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3333 floor_log2 (coeff), target, unsignedp);
3334 return expand_unop (mode, neg_optab, temp, target, 0);
3337 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3338 max_cost))
3340 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3341 &algorithm, variant);
3342 return expand_unop (mode, neg_optab, temp, target, 0);
3344 goto skip_synth;
3347 /* Exclude cost of op0 from max_cost to match the cost
3348 calculation of the synth_mult. */
3349 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3350 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3351 return expand_mult_const (mode, op0, coeff, target,
3352 &algorithm, variant);
3354 skip_synth:
3356 /* Expand x*2.0 as x+x. */
3357 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3358 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3360 op0 = force_reg (GET_MODE (op0), op0);
3361 return expand_binop (mode, add_optab, op0, op0,
3362 target, unsignedp, OPTAB_LIB_WIDEN);
3365 /* This used to use umul_optab if unsigned, but for non-widening multiply
3366 there is no difference between signed and unsigned. */
3367 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3368 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3369 gcc_assert (op0);
3370 return op0;
3373 /* Return a cost estimate for multiplying a register by the given
3374 COEFFicient in the given MODE and SPEED. */
3377 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3379 int max_cost;
3380 struct algorithm algorithm;
3381 enum mult_variant variant;
3383 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3384 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3385 mode, speed);
3386 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3387 return algorithm.cost.cost;
3388 else
3389 return max_cost;
3392 /* Perform a widening multiplication and return an rtx for the result.
3393 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3394 TARGET is a suggestion for where to store the result (an rtx).
3395 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3396 or smul_widen_optab.
3398 We check specially for a constant integer as OP1, comparing the
3399 cost of a widening multiply against the cost of a sequence of shifts
3400 and adds. */
3403 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3404 int unsignedp, optab this_optab)
3406 bool speed = optimize_insn_for_speed_p ();
3407 rtx cop1;
3409 if (CONST_INT_P (op1)
3410 && GET_MODE (op0) != VOIDmode
3411 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3412 this_optab == umul_widen_optab))
3413 && CONST_INT_P (cop1)
3414 && (INTVAL (cop1) >= 0
3415 || HWI_COMPUTABLE_MODE_P (mode)))
3417 HOST_WIDE_INT coeff = INTVAL (cop1);
3418 int max_cost;
3419 enum mult_variant variant;
3420 struct algorithm algorithm;
3422 if (coeff == 0)
3423 return CONST0_RTX (mode);
3425 /* Special case powers of two. */
3426 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3428 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3429 return expand_shift (LSHIFT_EXPR, mode, op0,
3430 floor_log2 (coeff), target, unsignedp);
3433 /* Exclude cost of op0 from max_cost to match the cost
3434 calculation of the synth_mult. */
3435 max_cost = mul_widen_cost (speed, mode);
3436 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3437 max_cost))
3439 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3440 return expand_mult_const (mode, op0, coeff, target,
3441 &algorithm, variant);
3444 return expand_binop (mode, this_optab, op0, op1, target,
3445 unsignedp, OPTAB_LIB_WIDEN);
3448 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3449 replace division by D, and put the least significant N bits of the result
3450 in *MULTIPLIER_PTR and return the most significant bit.
3452 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3453 needed precision is in PRECISION (should be <= N).
3455 PRECISION should be as small as possible so this function can choose
3456 multiplier more freely.
3458 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3459 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3461 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3462 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3464 unsigned HOST_WIDE_INT
3465 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3466 unsigned HOST_WIDE_INT *multiplier_ptr,
3467 int *post_shift_ptr, int *lgup_ptr)
3469 int lgup, post_shift;
3470 int pow, pow2;
3472 /* lgup = ceil(log2(divisor)); */
3473 lgup = ceil_log2 (d);
3475 gcc_assert (lgup <= n);
3477 pow = n + lgup;
3478 pow2 = n + lgup - precision;
3480 /* mlow = 2^(N + lgup)/d */
3481 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3482 wide_int mlow = wi::udiv_trunc (val, d);
3484 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3485 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3486 wide_int mhigh = wi::udiv_trunc (val, d);
3488 /* If precision == N, then mlow, mhigh exceed 2^N
3489 (but they do not exceed 2^(N+1)). */
3491 /* Reduce to lowest terms. */
3492 for (post_shift = lgup; post_shift > 0; post_shift--)
3494 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3495 HOST_BITS_PER_WIDE_INT);
3496 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3497 HOST_BITS_PER_WIDE_INT);
3498 if (ml_lo >= mh_lo)
3499 break;
3501 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3502 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3505 *post_shift_ptr = post_shift;
3506 *lgup_ptr = lgup;
3507 if (n < HOST_BITS_PER_WIDE_INT)
3509 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3510 *multiplier_ptr = mhigh.to_uhwi () & mask;
3511 return mhigh.to_uhwi () >= mask;
3513 else
3515 *multiplier_ptr = mhigh.to_uhwi ();
3516 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3520 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3521 congruent to 1 (mod 2**N). */
3523 static unsigned HOST_WIDE_INT
3524 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3526 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3528 /* The algorithm notes that the choice y = x satisfies
3529 x*y == 1 mod 2^3, since x is assumed odd.
3530 Each iteration doubles the number of bits of significance in y. */
3532 unsigned HOST_WIDE_INT mask;
3533 unsigned HOST_WIDE_INT y = x;
3534 int nbit = 3;
3536 mask = (n == HOST_BITS_PER_WIDE_INT
3537 ? HOST_WIDE_INT_M1U
3538 : (HOST_WIDE_INT_1U << n) - 1);
3540 while (nbit < n)
3542 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3543 nbit *= 2;
3545 return y;
3548 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3549 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3550 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3551 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3552 become signed.
3554 The result is put in TARGET if that is convenient.
3556 MODE is the mode of operation. */
3559 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3560 rtx op1, rtx target, int unsignedp)
3562 rtx tem;
3563 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3565 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3566 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3567 tem = expand_and (mode, tem, op1, NULL_RTX);
3568 adj_operand
3569 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3570 adj_operand);
3572 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3573 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3574 tem = expand_and (mode, tem, op0, NULL_RTX);
3575 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3576 target);
3578 return target;
3581 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3583 static rtx
3584 extract_high_half (machine_mode mode, rtx op)
3586 machine_mode wider_mode;
3588 if (mode == word_mode)
3589 return gen_highpart (mode, op);
3591 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3593 wider_mode = GET_MODE_WIDER_MODE (mode);
3594 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3595 GET_MODE_BITSIZE (mode), 0, 1);
3596 return convert_modes (mode, wider_mode, op, 0);
3599 /* Like expmed_mult_highpart, but only consider using a multiplication
3600 optab. OP1 is an rtx for the constant operand. */
3602 static rtx
3603 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3604 rtx target, int unsignedp, int max_cost)
3606 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3607 machine_mode wider_mode;
3608 optab moptab;
3609 rtx tem;
3610 int size;
3611 bool speed = optimize_insn_for_speed_p ();
3613 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3615 wider_mode = GET_MODE_WIDER_MODE (mode);
3616 size = GET_MODE_BITSIZE (mode);
3618 /* Firstly, try using a multiplication insn that only generates the needed
3619 high part of the product, and in the sign flavor of unsignedp. */
3620 if (mul_highpart_cost (speed, mode) < max_cost)
3622 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3623 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3624 unsignedp, OPTAB_DIRECT);
3625 if (tem)
3626 return tem;
3629 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3630 Need to adjust the result after the multiplication. */
3631 if (size - 1 < BITS_PER_WORD
3632 && (mul_highpart_cost (speed, mode)
3633 + 2 * shift_cost (speed, mode, size-1)
3634 + 4 * add_cost (speed, mode) < max_cost))
3636 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3637 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3638 unsignedp, OPTAB_DIRECT);
3639 if (tem)
3640 /* We used the wrong signedness. Adjust the result. */
3641 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3642 tem, unsignedp);
3645 /* Try widening multiplication. */
3646 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3647 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3648 && mul_widen_cost (speed, wider_mode) < max_cost)
3650 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3651 unsignedp, OPTAB_WIDEN);
3652 if (tem)
3653 return extract_high_half (mode, tem);
3656 /* Try widening the mode and perform a non-widening multiplication. */
3657 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3658 && size - 1 < BITS_PER_WORD
3659 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3660 < max_cost))
3662 rtx_insn *insns;
3663 rtx wop0, wop1;
3665 /* We need to widen the operands, for example to ensure the
3666 constant multiplier is correctly sign or zero extended.
3667 Use a sequence to clean-up any instructions emitted by
3668 the conversions if things don't work out. */
3669 start_sequence ();
3670 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3671 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3672 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3673 unsignedp, OPTAB_WIDEN);
3674 insns = get_insns ();
3675 end_sequence ();
3677 if (tem)
3679 emit_insn (insns);
3680 return extract_high_half (mode, tem);
3684 /* Try widening multiplication of opposite signedness, and adjust. */
3685 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3686 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3687 && size - 1 < BITS_PER_WORD
3688 && (mul_widen_cost (speed, wider_mode)
3689 + 2 * shift_cost (speed, mode, size-1)
3690 + 4 * add_cost (speed, mode) < max_cost))
3692 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3693 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3694 if (tem != 0)
3696 tem = extract_high_half (mode, tem);
3697 /* We used the wrong signedness. Adjust the result. */
3698 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3699 target, unsignedp);
3703 return 0;
3706 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3707 putting the high half of the result in TARGET if that is convenient,
3708 and return where the result is. If the operation can not be performed,
3709 0 is returned.
3711 MODE is the mode of operation and result.
3713 UNSIGNEDP nonzero means unsigned multiply.
3715 MAX_COST is the total allowed cost for the expanded RTL. */
3717 static rtx
3718 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3719 rtx target, int unsignedp, int max_cost)
3721 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3722 unsigned HOST_WIDE_INT cnst1;
3723 int extra_cost;
3724 bool sign_adjust = false;
3725 enum mult_variant variant;
3726 struct algorithm alg;
3727 rtx tem;
3728 bool speed = optimize_insn_for_speed_p ();
3730 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3731 /* We can't support modes wider than HOST_BITS_PER_INT. */
3732 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3734 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3736 /* We can't optimize modes wider than BITS_PER_WORD.
3737 ??? We might be able to perform double-word arithmetic if
3738 mode == word_mode, however all the cost calculations in
3739 synth_mult etc. assume single-word operations. */
3740 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3741 return expmed_mult_highpart_optab (mode, op0, op1, target,
3742 unsignedp, max_cost);
3744 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3746 /* Check whether we try to multiply by a negative constant. */
3747 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3749 sign_adjust = true;
3750 extra_cost += add_cost (speed, mode);
3753 /* See whether shift/add multiplication is cheap enough. */
3754 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3755 max_cost - extra_cost))
3757 /* See whether the specialized multiplication optabs are
3758 cheaper than the shift/add version. */
3759 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3760 alg.cost.cost + extra_cost);
3761 if (tem)
3762 return tem;
3764 tem = convert_to_mode (wider_mode, op0, unsignedp);
3765 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3766 tem = extract_high_half (mode, tem);
3768 /* Adjust result for signedness. */
3769 if (sign_adjust)
3770 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3772 return tem;
3774 return expmed_mult_highpart_optab (mode, op0, op1, target,
3775 unsignedp, max_cost);
3779 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3781 static rtx
3782 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3784 rtx result, temp, shift;
3785 rtx_code_label *label;
3786 int logd;
3787 int prec = GET_MODE_PRECISION (mode);
3789 logd = floor_log2 (d);
3790 result = gen_reg_rtx (mode);
3792 /* Avoid conditional branches when they're expensive. */
3793 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3794 && optimize_insn_for_speed_p ())
3796 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3797 mode, 0, -1);
3798 if (signmask)
3800 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3801 signmask = force_reg (mode, signmask);
3802 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3804 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3805 which instruction sequence to use. If logical right shifts
3806 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3807 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3809 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3810 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3811 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3812 > COSTS_N_INSNS (2)))
3814 temp = expand_binop (mode, xor_optab, op0, signmask,
3815 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3816 temp = expand_binop (mode, sub_optab, temp, signmask,
3817 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3818 temp = expand_binop (mode, and_optab, temp,
3819 gen_int_mode (masklow, mode),
3820 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3821 temp = expand_binop (mode, xor_optab, temp, signmask,
3822 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3823 temp = expand_binop (mode, sub_optab, temp, signmask,
3824 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3826 else
3828 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3829 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3830 signmask = force_reg (mode, signmask);
3832 temp = expand_binop (mode, add_optab, op0, signmask,
3833 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3834 temp = expand_binop (mode, and_optab, temp,
3835 gen_int_mode (masklow, mode),
3836 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3837 temp = expand_binop (mode, sub_optab, temp, signmask,
3838 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3840 return temp;
3844 /* Mask contains the mode's signbit and the significant bits of the
3845 modulus. By including the signbit in the operation, many targets
3846 can avoid an explicit compare operation in the following comparison
3847 against zero. */
3848 wide_int mask = wi::mask (logd, false, prec);
3849 mask = wi::set_bit (mask, prec - 1);
3851 temp = expand_binop (mode, and_optab, op0,
3852 immed_wide_int_const (mask, mode),
3853 result, 1, OPTAB_LIB_WIDEN);
3854 if (temp != result)
3855 emit_move_insn (result, temp);
3857 label = gen_label_rtx ();
3858 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3860 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3861 0, OPTAB_LIB_WIDEN);
3863 mask = wi::mask (logd, true, prec);
3864 temp = expand_binop (mode, ior_optab, temp,
3865 immed_wide_int_const (mask, mode),
3866 result, 1, OPTAB_LIB_WIDEN);
3867 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3868 0, OPTAB_LIB_WIDEN);
3869 if (temp != result)
3870 emit_move_insn (result, temp);
3871 emit_label (label);
3872 return result;
3875 /* Expand signed division of OP0 by a power of two D in mode MODE.
3876 This routine is only called for positive values of D. */
3878 static rtx
3879 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3881 rtx temp;
3882 rtx_code_label *label;
3883 int logd;
3885 logd = floor_log2 (d);
3887 if (d == 2
3888 && BRANCH_COST (optimize_insn_for_speed_p (),
3889 false) >= 1)
3891 temp = gen_reg_rtx (mode);
3892 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3893 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3894 0, OPTAB_LIB_WIDEN);
3895 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3898 if (HAVE_conditional_move
3899 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3901 rtx temp2;
3903 start_sequence ();
3904 temp2 = copy_to_mode_reg (mode, op0);
3905 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3906 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3907 temp = force_reg (mode, temp);
3909 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3910 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3911 mode, temp, temp2, mode, 0);
3912 if (temp2)
3914 rtx_insn *seq = get_insns ();
3915 end_sequence ();
3916 emit_insn (seq);
3917 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3919 end_sequence ();
3922 if (BRANCH_COST (optimize_insn_for_speed_p (),
3923 false) >= 2)
3925 int ushift = GET_MODE_BITSIZE (mode) - logd;
3927 temp = gen_reg_rtx (mode);
3928 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3929 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3930 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3931 > COSTS_N_INSNS (1))
3932 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3933 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3934 else
3935 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3936 ushift, NULL_RTX, 1);
3937 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3938 0, OPTAB_LIB_WIDEN);
3939 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3942 label = gen_label_rtx ();
3943 temp = copy_to_mode_reg (mode, op0);
3944 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3945 expand_inc (temp, gen_int_mode (d - 1, mode));
3946 emit_label (label);
3947 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3950 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3951 if that is convenient, and returning where the result is.
3952 You may request either the quotient or the remainder as the result;
3953 specify REM_FLAG nonzero to get the remainder.
3955 CODE is the expression code for which kind of division this is;
3956 it controls how rounding is done. MODE is the machine mode to use.
3957 UNSIGNEDP nonzero means do unsigned division. */
3959 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3960 and then correct it by or'ing in missing high bits
3961 if result of ANDI is nonzero.
3962 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3963 This could optimize to a bfexts instruction.
3964 But C doesn't use these operations, so their optimizations are
3965 left for later. */
3966 /* ??? For modulo, we don't actually need the highpart of the first product,
3967 the low part will do nicely. And for small divisors, the second multiply
3968 can also be a low-part only multiply or even be completely left out.
3969 E.g. to calculate the remainder of a division by 3 with a 32 bit
3970 multiply, multiply with 0x55555556 and extract the upper two bits;
3971 the result is exact for inputs up to 0x1fffffff.
3972 The input range can be reduced by using cross-sum rules.
3973 For odd divisors >= 3, the following table gives right shift counts
3974 so that if a number is shifted by an integer multiple of the given
3975 amount, the remainder stays the same:
3976 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3977 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3978 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3979 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3980 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3982 Cross-sum rules for even numbers can be derived by leaving as many bits
3983 to the right alone as the divisor has zeros to the right.
3984 E.g. if x is an unsigned 32 bit number:
3985 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3989 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3990 rtx op0, rtx op1, rtx target, int unsignedp)
3992 machine_mode compute_mode;
3993 rtx tquotient;
3994 rtx quotient = 0, remainder = 0;
3995 rtx_insn *last;
3996 int size;
3997 rtx_insn *insn;
3998 optab optab1, optab2;
3999 int op1_is_constant, op1_is_pow2 = 0;
4000 int max_cost, extra_cost;
4001 static HOST_WIDE_INT last_div_const = 0;
4002 bool speed = optimize_insn_for_speed_p ();
4004 op1_is_constant = CONST_INT_P (op1);
4005 if (op1_is_constant)
4007 wide_int ext_op1 = rtx_mode_t (op1, mode);
4008 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4009 || (! unsignedp
4010 && wi::popcount (wi::neg (ext_op1)) == 1));
4014 This is the structure of expand_divmod:
4016 First comes code to fix up the operands so we can perform the operations
4017 correctly and efficiently.
4019 Second comes a switch statement with code specific for each rounding mode.
4020 For some special operands this code emits all RTL for the desired
4021 operation, for other cases, it generates only a quotient and stores it in
4022 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4023 to indicate that it has not done anything.
4025 Last comes code that finishes the operation. If QUOTIENT is set and
4026 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4027 QUOTIENT is not set, it is computed using trunc rounding.
4029 We try to generate special code for division and remainder when OP1 is a
4030 constant. If |OP1| = 2**n we can use shifts and some other fast
4031 operations. For other values of OP1, we compute a carefully selected
4032 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4033 by m.
4035 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4036 half of the product. Different strategies for generating the product are
4037 implemented in expmed_mult_highpart.
4039 If what we actually want is the remainder, we generate that by another
4040 by-constant multiplication and a subtraction. */
4042 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4043 code below will malfunction if we are, so check here and handle
4044 the special case if so. */
4045 if (op1 == const1_rtx)
4046 return rem_flag ? const0_rtx : op0;
4048 /* When dividing by -1, we could get an overflow.
4049 negv_optab can handle overflows. */
4050 if (! unsignedp && op1 == constm1_rtx)
4052 if (rem_flag)
4053 return const0_rtx;
4054 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4055 ? negv_optab : neg_optab, op0, target, 0);
4058 if (target
4059 /* Don't use the function value register as a target
4060 since we have to read it as well as write it,
4061 and function-inlining gets confused by this. */
4062 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4063 /* Don't clobber an operand while doing a multi-step calculation. */
4064 || ((rem_flag || op1_is_constant)
4065 && (reg_mentioned_p (target, op0)
4066 || (MEM_P (op0) && MEM_P (target))))
4067 || reg_mentioned_p (target, op1)
4068 || (MEM_P (op1) && MEM_P (target))))
4069 target = 0;
4071 /* Get the mode in which to perform this computation. Normally it will
4072 be MODE, but sometimes we can't do the desired operation in MODE.
4073 If so, pick a wider mode in which we can do the operation. Convert
4074 to that mode at the start to avoid repeated conversions.
4076 First see what operations we need. These depend on the expression
4077 we are evaluating. (We assume that divxx3 insns exist under the
4078 same conditions that modxx3 insns and that these insns don't normally
4079 fail. If these assumptions are not correct, we may generate less
4080 efficient code in some cases.)
4082 Then see if we find a mode in which we can open-code that operation
4083 (either a division, modulus, or shift). Finally, check for the smallest
4084 mode for which we can do the operation with a library call. */
4086 /* We might want to refine this now that we have division-by-constant
4087 optimization. Since expmed_mult_highpart tries so many variants, it is
4088 not straightforward to generalize this. Maybe we should make an array
4089 of possible modes in init_expmed? Save this for GCC 2.7. */
4091 optab1 = (op1_is_pow2
4092 ? (unsignedp ? lshr_optab : ashr_optab)
4093 : (unsignedp ? udiv_optab : sdiv_optab));
4094 optab2 = (op1_is_pow2 ? optab1
4095 : (unsignedp ? udivmod_optab : sdivmod_optab));
4097 for (compute_mode = mode; compute_mode != VOIDmode;
4098 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4099 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4100 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4101 break;
4103 if (compute_mode == VOIDmode)
4104 for (compute_mode = mode; compute_mode != VOIDmode;
4105 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4106 if (optab_libfunc (optab1, compute_mode)
4107 || optab_libfunc (optab2, compute_mode))
4108 break;
4110 /* If we still couldn't find a mode, use MODE, but expand_binop will
4111 probably die. */
4112 if (compute_mode == VOIDmode)
4113 compute_mode = mode;
4115 if (target && GET_MODE (target) == compute_mode)
4116 tquotient = target;
4117 else
4118 tquotient = gen_reg_rtx (compute_mode);
4120 size = GET_MODE_BITSIZE (compute_mode);
4121 #if 0
4122 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4123 (mode), and thereby get better code when OP1 is a constant. Do that
4124 later. It will require going over all usages of SIZE below. */
4125 size = GET_MODE_BITSIZE (mode);
4126 #endif
4128 /* Only deduct something for a REM if the last divide done was
4129 for a different constant. Then set the constant of the last
4130 divide. */
4131 max_cost = (unsignedp
4132 ? udiv_cost (speed, compute_mode)
4133 : sdiv_cost (speed, compute_mode));
4134 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4135 && INTVAL (op1) == last_div_const))
4136 max_cost -= (mul_cost (speed, compute_mode)
4137 + add_cost (speed, compute_mode));
4139 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4141 /* Now convert to the best mode to use. */
4142 if (compute_mode != mode)
4144 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4145 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4147 /* convert_modes may have placed op1 into a register, so we
4148 must recompute the following. */
4149 op1_is_constant = CONST_INT_P (op1);
4150 if (op1_is_constant)
4152 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4153 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4154 || (! unsignedp
4155 && wi::popcount (wi::neg (ext_op1)) == 1));
4157 else
4158 op1_is_pow2 = 0;
4161 /* If one of the operands is a volatile MEM, copy it into a register. */
4163 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4164 op0 = force_reg (compute_mode, op0);
4165 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4166 op1 = force_reg (compute_mode, op1);
4168 /* If we need the remainder or if OP1 is constant, we need to
4169 put OP0 in a register in case it has any queued subexpressions. */
4170 if (rem_flag || op1_is_constant)
4171 op0 = force_reg (compute_mode, op0);
4173 last = get_last_insn ();
4175 /* Promote floor rounding to trunc rounding for unsigned operations. */
4176 if (unsignedp)
4178 if (code == FLOOR_DIV_EXPR)
4179 code = TRUNC_DIV_EXPR;
4180 if (code == FLOOR_MOD_EXPR)
4181 code = TRUNC_MOD_EXPR;
4182 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4183 code = TRUNC_DIV_EXPR;
4186 if (op1 != const0_rtx)
4187 switch (code)
4189 case TRUNC_MOD_EXPR:
4190 case TRUNC_DIV_EXPR:
4191 if (op1_is_constant)
4193 if (unsignedp)
4195 unsigned HOST_WIDE_INT mh, ml;
4196 int pre_shift, post_shift;
4197 int dummy;
4198 wide_int wd = rtx_mode_t (op1, compute_mode);
4199 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4201 if (wi::popcount (wd) == 1)
4203 pre_shift = floor_log2 (d);
4204 if (rem_flag)
4206 unsigned HOST_WIDE_INT mask
4207 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4208 remainder
4209 = expand_binop (compute_mode, and_optab, op0,
4210 gen_int_mode (mask, compute_mode),
4211 remainder, 1,
4212 OPTAB_LIB_WIDEN);
4213 if (remainder)
4214 return gen_lowpart (mode, remainder);
4216 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4217 pre_shift, tquotient, 1);
4219 else if (size <= HOST_BITS_PER_WIDE_INT)
4221 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4223 /* Most significant bit of divisor is set; emit an scc
4224 insn. */
4225 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4226 compute_mode, 1, 1);
4228 else
4230 /* Find a suitable multiplier and right shift count
4231 instead of multiplying with D. */
4233 mh = choose_multiplier (d, size, size,
4234 &ml, &post_shift, &dummy);
4236 /* If the suggested multiplier is more than SIZE bits,
4237 we can do better for even divisors, using an
4238 initial right shift. */
4239 if (mh != 0 && (d & 1) == 0)
4241 pre_shift = ctz_or_zero (d);
4242 mh = choose_multiplier (d >> pre_shift, size,
4243 size - pre_shift,
4244 &ml, &post_shift, &dummy);
4245 gcc_assert (!mh);
4247 else
4248 pre_shift = 0;
4250 if (mh != 0)
4252 rtx t1, t2, t3, t4;
4254 if (post_shift - 1 >= BITS_PER_WORD)
4255 goto fail1;
4257 extra_cost
4258 = (shift_cost (speed, compute_mode, post_shift - 1)
4259 + shift_cost (speed, compute_mode, 1)
4260 + 2 * add_cost (speed, compute_mode));
4261 t1 = expmed_mult_highpart
4262 (compute_mode, op0,
4263 gen_int_mode (ml, compute_mode),
4264 NULL_RTX, 1, max_cost - extra_cost);
4265 if (t1 == 0)
4266 goto fail1;
4267 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4268 op0, t1),
4269 NULL_RTX);
4270 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4271 t2, 1, NULL_RTX, 1);
4272 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4273 t1, t3),
4274 NULL_RTX);
4275 quotient = expand_shift
4276 (RSHIFT_EXPR, compute_mode, t4,
4277 post_shift - 1, tquotient, 1);
4279 else
4281 rtx t1, t2;
4283 if (pre_shift >= BITS_PER_WORD
4284 || post_shift >= BITS_PER_WORD)
4285 goto fail1;
4287 t1 = expand_shift
4288 (RSHIFT_EXPR, compute_mode, op0,
4289 pre_shift, NULL_RTX, 1);
4290 extra_cost
4291 = (shift_cost (speed, compute_mode, pre_shift)
4292 + shift_cost (speed, compute_mode, post_shift));
4293 t2 = expmed_mult_highpart
4294 (compute_mode, t1,
4295 gen_int_mode (ml, compute_mode),
4296 NULL_RTX, 1, max_cost - extra_cost);
4297 if (t2 == 0)
4298 goto fail1;
4299 quotient = expand_shift
4300 (RSHIFT_EXPR, compute_mode, t2,
4301 post_shift, tquotient, 1);
4305 else /* Too wide mode to use tricky code */
4306 break;
4308 insn = get_last_insn ();
4309 if (insn != last)
4310 set_dst_reg_note (insn, REG_EQUAL,
4311 gen_rtx_UDIV (compute_mode, op0, op1),
4312 quotient);
4314 else /* TRUNC_DIV, signed */
4316 unsigned HOST_WIDE_INT ml;
4317 int lgup, post_shift;
4318 rtx mlr;
4319 HOST_WIDE_INT d = INTVAL (op1);
4320 unsigned HOST_WIDE_INT abs_d;
4322 /* Since d might be INT_MIN, we have to cast to
4323 unsigned HOST_WIDE_INT before negating to avoid
4324 undefined signed overflow. */
4325 abs_d = (d >= 0
4326 ? (unsigned HOST_WIDE_INT) d
4327 : - (unsigned HOST_WIDE_INT) d);
4329 /* n rem d = n rem -d */
4330 if (rem_flag && d < 0)
4332 d = abs_d;
4333 op1 = gen_int_mode (abs_d, compute_mode);
4336 if (d == 1)
4337 quotient = op0;
4338 else if (d == -1)
4339 quotient = expand_unop (compute_mode, neg_optab, op0,
4340 tquotient, 0);
4341 else if (size <= HOST_BITS_PER_WIDE_INT
4342 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4344 /* This case is not handled correctly below. */
4345 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4346 compute_mode, 1, 1);
4347 if (quotient == 0)
4348 goto fail1;
4350 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4351 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4352 && (rem_flag
4353 ? smod_pow2_cheap (speed, compute_mode)
4354 : sdiv_pow2_cheap (speed, compute_mode))
4355 /* We assume that cheap metric is true if the
4356 optab has an expander for this mode. */
4357 && ((optab_handler ((rem_flag ? smod_optab
4358 : sdiv_optab),
4359 compute_mode)
4360 != CODE_FOR_nothing)
4361 || (optab_handler (sdivmod_optab,
4362 compute_mode)
4363 != CODE_FOR_nothing)))
4365 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4366 && (size <= HOST_BITS_PER_WIDE_INT
4367 || abs_d != (unsigned HOST_WIDE_INT) d))
4369 if (rem_flag)
4371 remainder = expand_smod_pow2 (compute_mode, op0, d);
4372 if (remainder)
4373 return gen_lowpart (mode, remainder);
4376 if (sdiv_pow2_cheap (speed, compute_mode)
4377 && ((optab_handler (sdiv_optab, compute_mode)
4378 != CODE_FOR_nothing)
4379 || (optab_handler (sdivmod_optab, compute_mode)
4380 != CODE_FOR_nothing)))
4381 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4382 compute_mode, op0,
4383 gen_int_mode (abs_d,
4384 compute_mode),
4385 NULL_RTX, 0);
4386 else
4387 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4389 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4390 negate the quotient. */
4391 if (d < 0)
4393 insn = get_last_insn ();
4394 if (insn != last
4395 && abs_d < (HOST_WIDE_INT_1U
4396 << (HOST_BITS_PER_WIDE_INT - 1)))
4397 set_dst_reg_note (insn, REG_EQUAL,
4398 gen_rtx_DIV (compute_mode, op0,
4399 gen_int_mode
4400 (abs_d,
4401 compute_mode)),
4402 quotient);
4404 quotient = expand_unop (compute_mode, neg_optab,
4405 quotient, quotient, 0);
4408 else if (size <= HOST_BITS_PER_WIDE_INT)
4410 choose_multiplier (abs_d, size, size - 1,
4411 &ml, &post_shift, &lgup);
4412 if (ml < HOST_WIDE_INT_1U << (size - 1))
4414 rtx t1, t2, t3;
4416 if (post_shift >= BITS_PER_WORD
4417 || size - 1 >= BITS_PER_WORD)
4418 goto fail1;
4420 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4421 + shift_cost (speed, compute_mode, size - 1)
4422 + add_cost (speed, compute_mode));
4423 t1 = expmed_mult_highpart
4424 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4425 NULL_RTX, 0, max_cost - extra_cost);
4426 if (t1 == 0)
4427 goto fail1;
4428 t2 = expand_shift
4429 (RSHIFT_EXPR, compute_mode, t1,
4430 post_shift, NULL_RTX, 0);
4431 t3 = expand_shift
4432 (RSHIFT_EXPR, compute_mode, op0,
4433 size - 1, NULL_RTX, 0);
4434 if (d < 0)
4435 quotient
4436 = force_operand (gen_rtx_MINUS (compute_mode,
4437 t3, t2),
4438 tquotient);
4439 else
4440 quotient
4441 = force_operand (gen_rtx_MINUS (compute_mode,
4442 t2, t3),
4443 tquotient);
4445 else
4447 rtx t1, t2, t3, t4;
4449 if (post_shift >= BITS_PER_WORD
4450 || size - 1 >= BITS_PER_WORD)
4451 goto fail1;
4453 ml |= HOST_WIDE_INT_M1U << (size - 1);
4454 mlr = gen_int_mode (ml, compute_mode);
4455 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4456 + shift_cost (speed, compute_mode, size - 1)
4457 + 2 * add_cost (speed, compute_mode));
4458 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4459 NULL_RTX, 0,
4460 max_cost - extra_cost);
4461 if (t1 == 0)
4462 goto fail1;
4463 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4464 t1, op0),
4465 NULL_RTX);
4466 t3 = expand_shift
4467 (RSHIFT_EXPR, compute_mode, t2,
4468 post_shift, NULL_RTX, 0);
4469 t4 = expand_shift
4470 (RSHIFT_EXPR, compute_mode, op0,
4471 size - 1, NULL_RTX, 0);
4472 if (d < 0)
4473 quotient
4474 = force_operand (gen_rtx_MINUS (compute_mode,
4475 t4, t3),
4476 tquotient);
4477 else
4478 quotient
4479 = force_operand (gen_rtx_MINUS (compute_mode,
4480 t3, t4),
4481 tquotient);
4484 else /* Too wide mode to use tricky code */
4485 break;
4487 insn = get_last_insn ();
4488 if (insn != last)
4489 set_dst_reg_note (insn, REG_EQUAL,
4490 gen_rtx_DIV (compute_mode, op0, op1),
4491 quotient);
4493 break;
4495 fail1:
4496 delete_insns_since (last);
4497 break;
4499 case FLOOR_DIV_EXPR:
4500 case FLOOR_MOD_EXPR:
4501 /* We will come here only for signed operations. */
4502 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4504 unsigned HOST_WIDE_INT mh, ml;
4505 int pre_shift, lgup, post_shift;
4506 HOST_WIDE_INT d = INTVAL (op1);
4508 if (d > 0)
4510 /* We could just as easily deal with negative constants here,
4511 but it does not seem worth the trouble for GCC 2.6. */
4512 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4514 pre_shift = floor_log2 (d);
4515 if (rem_flag)
4517 unsigned HOST_WIDE_INT mask
4518 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4519 remainder = expand_binop
4520 (compute_mode, and_optab, op0,
4521 gen_int_mode (mask, compute_mode),
4522 remainder, 0, OPTAB_LIB_WIDEN);
4523 if (remainder)
4524 return gen_lowpart (mode, remainder);
4526 quotient = expand_shift
4527 (RSHIFT_EXPR, compute_mode, op0,
4528 pre_shift, tquotient, 0);
4530 else
4532 rtx t1, t2, t3, t4;
4534 mh = choose_multiplier (d, size, size - 1,
4535 &ml, &post_shift, &lgup);
4536 gcc_assert (!mh);
4538 if (post_shift < BITS_PER_WORD
4539 && size - 1 < BITS_PER_WORD)
4541 t1 = expand_shift
4542 (RSHIFT_EXPR, compute_mode, op0,
4543 size - 1, NULL_RTX, 0);
4544 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4545 NULL_RTX, 0, OPTAB_WIDEN);
4546 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4547 + shift_cost (speed, compute_mode, size - 1)
4548 + 2 * add_cost (speed, compute_mode));
4549 t3 = expmed_mult_highpart
4550 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4551 NULL_RTX, 1, max_cost - extra_cost);
4552 if (t3 != 0)
4554 t4 = expand_shift
4555 (RSHIFT_EXPR, compute_mode, t3,
4556 post_shift, NULL_RTX, 1);
4557 quotient = expand_binop (compute_mode, xor_optab,
4558 t4, t1, tquotient, 0,
4559 OPTAB_WIDEN);
4564 else
4566 rtx nsign, t1, t2, t3, t4;
4567 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4568 op0, constm1_rtx), NULL_RTX);
4569 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4570 0, OPTAB_WIDEN);
4571 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4572 size - 1, NULL_RTX, 0);
4573 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4574 NULL_RTX);
4575 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4576 NULL_RTX, 0);
4577 if (t4)
4579 rtx t5;
4580 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4581 NULL_RTX, 0);
4582 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4583 t4, t5),
4584 tquotient);
4589 if (quotient != 0)
4590 break;
4591 delete_insns_since (last);
4593 /* Try using an instruction that produces both the quotient and
4594 remainder, using truncation. We can easily compensate the quotient
4595 or remainder to get floor rounding, once we have the remainder.
4596 Notice that we compute also the final remainder value here,
4597 and return the result right away. */
4598 if (target == 0 || GET_MODE (target) != compute_mode)
4599 target = gen_reg_rtx (compute_mode);
4601 if (rem_flag)
4603 remainder
4604 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4605 quotient = gen_reg_rtx (compute_mode);
4607 else
4609 quotient
4610 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4611 remainder = gen_reg_rtx (compute_mode);
4614 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4615 quotient, remainder, 0))
4617 /* This could be computed with a branch-less sequence.
4618 Save that for later. */
4619 rtx tem;
4620 rtx_code_label *label = gen_label_rtx ();
4621 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4622 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4623 NULL_RTX, 0, OPTAB_WIDEN);
4624 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4625 expand_dec (quotient, const1_rtx);
4626 expand_inc (remainder, op1);
4627 emit_label (label);
4628 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4631 /* No luck with division elimination or divmod. Have to do it
4632 by conditionally adjusting op0 *and* the result. */
4634 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4635 rtx adjusted_op0;
4636 rtx tem;
4638 quotient = gen_reg_rtx (compute_mode);
4639 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4640 label1 = gen_label_rtx ();
4641 label2 = gen_label_rtx ();
4642 label3 = gen_label_rtx ();
4643 label4 = gen_label_rtx ();
4644 label5 = gen_label_rtx ();
4645 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4646 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4647 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4648 quotient, 0, OPTAB_LIB_WIDEN);
4649 if (tem != quotient)
4650 emit_move_insn (quotient, tem);
4651 emit_jump_insn (targetm.gen_jump (label5));
4652 emit_barrier ();
4653 emit_label (label1);
4654 expand_inc (adjusted_op0, const1_rtx);
4655 emit_jump_insn (targetm.gen_jump (label4));
4656 emit_barrier ();
4657 emit_label (label2);
4658 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4659 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4660 quotient, 0, OPTAB_LIB_WIDEN);
4661 if (tem != quotient)
4662 emit_move_insn (quotient, tem);
4663 emit_jump_insn (targetm.gen_jump (label5));
4664 emit_barrier ();
4665 emit_label (label3);
4666 expand_dec (adjusted_op0, const1_rtx);
4667 emit_label (label4);
4668 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4669 quotient, 0, OPTAB_LIB_WIDEN);
4670 if (tem != quotient)
4671 emit_move_insn (quotient, tem);
4672 expand_dec (quotient, const1_rtx);
4673 emit_label (label5);
4675 break;
4677 case CEIL_DIV_EXPR:
4678 case CEIL_MOD_EXPR:
4679 if (unsignedp)
4681 if (op1_is_constant
4682 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4683 && (size <= HOST_BITS_PER_WIDE_INT
4684 || INTVAL (op1) >= 0))
4686 rtx t1, t2, t3;
4687 unsigned HOST_WIDE_INT d = INTVAL (op1);
4688 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4689 floor_log2 (d), tquotient, 1);
4690 t2 = expand_binop (compute_mode, and_optab, op0,
4691 gen_int_mode (d - 1, compute_mode),
4692 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4693 t3 = gen_reg_rtx (compute_mode);
4694 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4695 compute_mode, 1, 1);
4696 if (t3 == 0)
4698 rtx_code_label *lab;
4699 lab = gen_label_rtx ();
4700 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4701 expand_inc (t1, const1_rtx);
4702 emit_label (lab);
4703 quotient = t1;
4705 else
4706 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4707 t1, t3),
4708 tquotient);
4709 break;
4712 /* Try using an instruction that produces both the quotient and
4713 remainder, using truncation. We can easily compensate the
4714 quotient or remainder to get ceiling rounding, once we have the
4715 remainder. Notice that we compute also the final remainder
4716 value here, and return the result right away. */
4717 if (target == 0 || GET_MODE (target) != compute_mode)
4718 target = gen_reg_rtx (compute_mode);
4720 if (rem_flag)
4722 remainder = (REG_P (target)
4723 ? target : gen_reg_rtx (compute_mode));
4724 quotient = gen_reg_rtx (compute_mode);
4726 else
4728 quotient = (REG_P (target)
4729 ? target : gen_reg_rtx (compute_mode));
4730 remainder = gen_reg_rtx (compute_mode);
4733 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4734 remainder, 1))
4736 /* This could be computed with a branch-less sequence.
4737 Save that for later. */
4738 rtx_code_label *label = gen_label_rtx ();
4739 do_cmp_and_jump (remainder, const0_rtx, EQ,
4740 compute_mode, label);
4741 expand_inc (quotient, const1_rtx);
4742 expand_dec (remainder, op1);
4743 emit_label (label);
4744 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4747 /* No luck with division elimination or divmod. Have to do it
4748 by conditionally adjusting op0 *and* the result. */
4750 rtx_code_label *label1, *label2;
4751 rtx adjusted_op0, tem;
4753 quotient = gen_reg_rtx (compute_mode);
4754 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4755 label1 = gen_label_rtx ();
4756 label2 = gen_label_rtx ();
4757 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4758 compute_mode, label1);
4759 emit_move_insn (quotient, const0_rtx);
4760 emit_jump_insn (targetm.gen_jump (label2));
4761 emit_barrier ();
4762 emit_label (label1);
4763 expand_dec (adjusted_op0, const1_rtx);
4764 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4765 quotient, 1, OPTAB_LIB_WIDEN);
4766 if (tem != quotient)
4767 emit_move_insn (quotient, tem);
4768 expand_inc (quotient, const1_rtx);
4769 emit_label (label2);
4772 else /* signed */
4774 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4775 && INTVAL (op1) >= 0)
4777 /* This is extremely similar to the code for the unsigned case
4778 above. For 2.7 we should merge these variants, but for
4779 2.6.1 I don't want to touch the code for unsigned since that
4780 get used in C. The signed case will only be used by other
4781 languages (Ada). */
4783 rtx t1, t2, t3;
4784 unsigned HOST_WIDE_INT d = INTVAL (op1);
4785 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4786 floor_log2 (d), tquotient, 0);
4787 t2 = expand_binop (compute_mode, and_optab, op0,
4788 gen_int_mode (d - 1, compute_mode),
4789 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4790 t3 = gen_reg_rtx (compute_mode);
4791 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4792 compute_mode, 1, 1);
4793 if (t3 == 0)
4795 rtx_code_label *lab;
4796 lab = gen_label_rtx ();
4797 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4798 expand_inc (t1, const1_rtx);
4799 emit_label (lab);
4800 quotient = t1;
4802 else
4803 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4804 t1, t3),
4805 tquotient);
4806 break;
4809 /* Try using an instruction that produces both the quotient and
4810 remainder, using truncation. We can easily compensate the
4811 quotient or remainder to get ceiling rounding, once we have the
4812 remainder. Notice that we compute also the final remainder
4813 value here, and return the result right away. */
4814 if (target == 0 || GET_MODE (target) != compute_mode)
4815 target = gen_reg_rtx (compute_mode);
4816 if (rem_flag)
4818 remainder= (REG_P (target)
4819 ? target : gen_reg_rtx (compute_mode));
4820 quotient = gen_reg_rtx (compute_mode);
4822 else
4824 quotient = (REG_P (target)
4825 ? target : gen_reg_rtx (compute_mode));
4826 remainder = gen_reg_rtx (compute_mode);
4829 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4830 remainder, 0))
4832 /* This could be computed with a branch-less sequence.
4833 Save that for later. */
4834 rtx tem;
4835 rtx_code_label *label = gen_label_rtx ();
4836 do_cmp_and_jump (remainder, const0_rtx, EQ,
4837 compute_mode, label);
4838 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4839 NULL_RTX, 0, OPTAB_WIDEN);
4840 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4841 expand_inc (quotient, const1_rtx);
4842 expand_dec (remainder, op1);
4843 emit_label (label);
4844 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4847 /* No luck with division elimination or divmod. Have to do it
4848 by conditionally adjusting op0 *and* the result. */
4850 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4851 rtx adjusted_op0;
4852 rtx tem;
4854 quotient = gen_reg_rtx (compute_mode);
4855 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4856 label1 = gen_label_rtx ();
4857 label2 = gen_label_rtx ();
4858 label3 = gen_label_rtx ();
4859 label4 = gen_label_rtx ();
4860 label5 = gen_label_rtx ();
4861 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4862 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4863 compute_mode, label1);
4864 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4865 quotient, 0, OPTAB_LIB_WIDEN);
4866 if (tem != quotient)
4867 emit_move_insn (quotient, tem);
4868 emit_jump_insn (targetm.gen_jump (label5));
4869 emit_barrier ();
4870 emit_label (label1);
4871 expand_dec (adjusted_op0, const1_rtx);
4872 emit_jump_insn (targetm.gen_jump (label4));
4873 emit_barrier ();
4874 emit_label (label2);
4875 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4876 compute_mode, label3);
4877 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4878 quotient, 0, OPTAB_LIB_WIDEN);
4879 if (tem != quotient)
4880 emit_move_insn (quotient, tem);
4881 emit_jump_insn (targetm.gen_jump (label5));
4882 emit_barrier ();
4883 emit_label (label3);
4884 expand_inc (adjusted_op0, const1_rtx);
4885 emit_label (label4);
4886 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4887 quotient, 0, OPTAB_LIB_WIDEN);
4888 if (tem != quotient)
4889 emit_move_insn (quotient, tem);
4890 expand_inc (quotient, const1_rtx);
4891 emit_label (label5);
4894 break;
4896 case EXACT_DIV_EXPR:
4897 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4899 HOST_WIDE_INT d = INTVAL (op1);
4900 unsigned HOST_WIDE_INT ml;
4901 int pre_shift;
4902 rtx t1;
4904 pre_shift = ctz_or_zero (d);
4905 ml = invert_mod2n (d >> pre_shift, size);
4906 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4907 pre_shift, NULL_RTX, unsignedp);
4908 quotient = expand_mult (compute_mode, t1,
4909 gen_int_mode (ml, compute_mode),
4910 NULL_RTX, 1);
4912 insn = get_last_insn ();
4913 set_dst_reg_note (insn, REG_EQUAL,
4914 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4915 compute_mode, op0, op1),
4916 quotient);
4918 break;
4920 case ROUND_DIV_EXPR:
4921 case ROUND_MOD_EXPR:
4922 if (unsignedp)
4924 rtx tem;
4925 rtx_code_label *label;
4926 label = gen_label_rtx ();
4927 quotient = gen_reg_rtx (compute_mode);
4928 remainder = gen_reg_rtx (compute_mode);
4929 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4931 rtx tem;
4932 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4933 quotient, 1, OPTAB_LIB_WIDEN);
4934 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4935 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4936 remainder, 1, OPTAB_LIB_WIDEN);
4938 tem = plus_constant (compute_mode, op1, -1);
4939 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4940 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4941 expand_inc (quotient, const1_rtx);
4942 expand_dec (remainder, op1);
4943 emit_label (label);
4945 else
4947 rtx abs_rem, abs_op1, tem, mask;
4948 rtx_code_label *label;
4949 label = gen_label_rtx ();
4950 quotient = gen_reg_rtx (compute_mode);
4951 remainder = gen_reg_rtx (compute_mode);
4952 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4954 rtx tem;
4955 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4956 quotient, 0, OPTAB_LIB_WIDEN);
4957 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4958 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4959 remainder, 0, OPTAB_LIB_WIDEN);
4961 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4962 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4963 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4964 1, NULL_RTX, 1);
4965 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4966 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4967 NULL_RTX, 0, OPTAB_WIDEN);
4968 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4969 size - 1, NULL_RTX, 0);
4970 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4971 NULL_RTX, 0, OPTAB_WIDEN);
4972 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4973 NULL_RTX, 0, OPTAB_WIDEN);
4974 expand_inc (quotient, tem);
4975 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4976 NULL_RTX, 0, OPTAB_WIDEN);
4977 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4978 NULL_RTX, 0, OPTAB_WIDEN);
4979 expand_dec (remainder, tem);
4980 emit_label (label);
4982 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4984 default:
4985 gcc_unreachable ();
4988 if (quotient == 0)
4990 if (target && GET_MODE (target) != compute_mode)
4991 target = 0;
4993 if (rem_flag)
4995 /* Try to produce the remainder without producing the quotient.
4996 If we seem to have a divmod pattern that does not require widening,
4997 don't try widening here. We should really have a WIDEN argument
4998 to expand_twoval_binop, since what we'd really like to do here is
4999 1) try a mod insn in compute_mode
5000 2) try a divmod insn in compute_mode
5001 3) try a div insn in compute_mode and multiply-subtract to get
5002 remainder
5003 4) try the same things with widening allowed. */
5004 remainder
5005 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5006 op0, op1, target,
5007 unsignedp,
5008 ((optab_handler (optab2, compute_mode)
5009 != CODE_FOR_nothing)
5010 ? OPTAB_DIRECT : OPTAB_WIDEN));
5011 if (remainder == 0)
5013 /* No luck there. Can we do remainder and divide at once
5014 without a library call? */
5015 remainder = gen_reg_rtx (compute_mode);
5016 if (! expand_twoval_binop ((unsignedp
5017 ? udivmod_optab
5018 : sdivmod_optab),
5019 op0, op1,
5020 NULL_RTX, remainder, unsignedp))
5021 remainder = 0;
5024 if (remainder)
5025 return gen_lowpart (mode, remainder);
5028 /* Produce the quotient. Try a quotient insn, but not a library call.
5029 If we have a divmod in this mode, use it in preference to widening
5030 the div (for this test we assume it will not fail). Note that optab2
5031 is set to the one of the two optabs that the call below will use. */
5032 quotient
5033 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5034 op0, op1, rem_flag ? NULL_RTX : target,
5035 unsignedp,
5036 ((optab_handler (optab2, compute_mode)
5037 != CODE_FOR_nothing)
5038 ? OPTAB_DIRECT : OPTAB_WIDEN));
5040 if (quotient == 0)
5042 /* No luck there. Try a quotient-and-remainder insn,
5043 keeping the quotient alone. */
5044 quotient = gen_reg_rtx (compute_mode);
5045 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5046 op0, op1,
5047 quotient, NULL_RTX, unsignedp))
5049 quotient = 0;
5050 if (! rem_flag)
5051 /* Still no luck. If we are not computing the remainder,
5052 use a library call for the quotient. */
5053 quotient = sign_expand_binop (compute_mode,
5054 udiv_optab, sdiv_optab,
5055 op0, op1, target,
5056 unsignedp, OPTAB_LIB_WIDEN);
5061 if (rem_flag)
5063 if (target && GET_MODE (target) != compute_mode)
5064 target = 0;
5066 if (quotient == 0)
5068 /* No divide instruction either. Use library for remainder. */
5069 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5070 op0, op1, target,
5071 unsignedp, OPTAB_LIB_WIDEN);
5072 /* No remainder function. Try a quotient-and-remainder
5073 function, keeping the remainder. */
5074 if (!remainder)
5076 remainder = gen_reg_rtx (compute_mode);
5077 if (!expand_twoval_binop_libfunc
5078 (unsignedp ? udivmod_optab : sdivmod_optab,
5079 op0, op1,
5080 NULL_RTX, remainder,
5081 unsignedp ? UMOD : MOD))
5082 remainder = NULL_RTX;
5085 else
5087 /* We divided. Now finish doing X - Y * (X / Y). */
5088 remainder = expand_mult (compute_mode, quotient, op1,
5089 NULL_RTX, unsignedp);
5090 remainder = expand_binop (compute_mode, sub_optab, op0,
5091 remainder, target, unsignedp,
5092 OPTAB_LIB_WIDEN);
5096 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5099 /* Return a tree node with data type TYPE, describing the value of X.
5100 Usually this is an VAR_DECL, if there is no obvious better choice.
5101 X may be an expression, however we only support those expressions
5102 generated by loop.c. */
5104 tree
5105 make_tree (tree type, rtx x)
5107 tree t;
5109 switch (GET_CODE (x))
5111 case CONST_INT:
5112 case CONST_WIDE_INT:
5113 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5114 return t;
5116 case CONST_DOUBLE:
5117 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5118 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5119 t = wide_int_to_tree (type,
5120 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5121 HOST_BITS_PER_WIDE_INT * 2));
5122 else
5123 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5125 return t;
5127 case CONST_VECTOR:
5129 int units = CONST_VECTOR_NUNITS (x);
5130 tree itype = TREE_TYPE (type);
5131 tree *elts;
5132 int i;
5134 /* Build a tree with vector elements. */
5135 elts = XALLOCAVEC (tree, units);
5136 for (i = units - 1; i >= 0; --i)
5138 rtx elt = CONST_VECTOR_ELT (x, i);
5139 elts[i] = make_tree (itype, elt);
5142 return build_vector (type, elts);
5145 case PLUS:
5146 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5147 make_tree (type, XEXP (x, 1)));
5149 case MINUS:
5150 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5151 make_tree (type, XEXP (x, 1)));
5153 case NEG:
5154 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5156 case MULT:
5157 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5158 make_tree (type, XEXP (x, 1)));
5160 case ASHIFT:
5161 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5162 make_tree (type, XEXP (x, 1)));
5164 case LSHIFTRT:
5165 t = unsigned_type_for (type);
5166 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5167 make_tree (t, XEXP (x, 0)),
5168 make_tree (type, XEXP (x, 1))));
5170 case ASHIFTRT:
5171 t = signed_type_for (type);
5172 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5173 make_tree (t, XEXP (x, 0)),
5174 make_tree (type, XEXP (x, 1))));
5176 case DIV:
5177 if (TREE_CODE (type) != REAL_TYPE)
5178 t = signed_type_for (type);
5179 else
5180 t = type;
5182 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5183 make_tree (t, XEXP (x, 0)),
5184 make_tree (t, XEXP (x, 1))));
5185 case UDIV:
5186 t = unsigned_type_for (type);
5187 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5188 make_tree (t, XEXP (x, 0)),
5189 make_tree (t, XEXP (x, 1))));
5191 case SIGN_EXTEND:
5192 case ZERO_EXTEND:
5193 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5194 GET_CODE (x) == ZERO_EXTEND);
5195 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5197 case CONST:
5198 return make_tree (type, XEXP (x, 0));
5200 case SYMBOL_REF:
5201 t = SYMBOL_REF_DECL (x);
5202 if (t)
5203 return fold_convert (type, build_fold_addr_expr (t));
5204 /* fall through. */
5206 default:
5207 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5209 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5210 address mode to pointer mode. */
5211 if (POINTER_TYPE_P (type))
5212 x = convert_memory_address_addr_space
5213 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5215 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5216 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5217 t->decl_with_rtl.rtl = x;
5219 return t;
5223 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5224 and returning TARGET.
5226 If TARGET is 0, a pseudo-register or constant is returned. */
5229 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5231 rtx tem = 0;
5233 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5234 tem = simplify_binary_operation (AND, mode, op0, op1);
5235 if (tem == 0)
5236 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5238 if (target == 0)
5239 target = tem;
5240 else if (tem != target)
5241 emit_move_insn (target, tem);
5242 return target;
5245 /* Helper function for emit_store_flag. */
5247 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5248 machine_mode mode, machine_mode compare_mode,
5249 int unsignedp, rtx x, rtx y, int normalizep,
5250 machine_mode target_mode)
5252 struct expand_operand ops[4];
5253 rtx op0, comparison, subtarget;
5254 rtx_insn *last;
5255 machine_mode result_mode = targetm.cstore_mode (icode);
5257 last = get_last_insn ();
5258 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5259 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5260 if (!x || !y)
5262 delete_insns_since (last);
5263 return NULL_RTX;
5266 if (target_mode == VOIDmode)
5267 target_mode = result_mode;
5268 if (!target)
5269 target = gen_reg_rtx (target_mode);
5271 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5273 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5274 create_fixed_operand (&ops[1], comparison);
5275 create_fixed_operand (&ops[2], x);
5276 create_fixed_operand (&ops[3], y);
5277 if (!maybe_expand_insn (icode, 4, ops))
5279 delete_insns_since (last);
5280 return NULL_RTX;
5282 subtarget = ops[0].value;
5284 /* If we are converting to a wider mode, first convert to
5285 TARGET_MODE, then normalize. This produces better combining
5286 opportunities on machines that have a SIGN_EXTRACT when we are
5287 testing a single bit. This mostly benefits the 68k.
5289 If STORE_FLAG_VALUE does not have the sign bit set when
5290 interpreted in MODE, we can do this conversion as unsigned, which
5291 is usually more efficient. */
5292 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5294 convert_move (target, subtarget,
5295 val_signbit_known_clear_p (result_mode,
5296 STORE_FLAG_VALUE));
5297 op0 = target;
5298 result_mode = target_mode;
5300 else
5301 op0 = subtarget;
5303 /* If we want to keep subexpressions around, don't reuse our last
5304 target. */
5305 if (optimize)
5306 subtarget = 0;
5308 /* Now normalize to the proper value in MODE. Sometimes we don't
5309 have to do anything. */
5310 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5312 /* STORE_FLAG_VALUE might be the most negative number, so write
5313 the comparison this way to avoid a compiler-time warning. */
5314 else if (- normalizep == STORE_FLAG_VALUE)
5315 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5317 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5318 it hard to use a value of just the sign bit due to ANSI integer
5319 constant typing rules. */
5320 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5321 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5322 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5323 normalizep == 1);
5324 else
5326 gcc_assert (STORE_FLAG_VALUE & 1);
5328 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5329 if (normalizep == -1)
5330 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5333 /* If we were converting to a smaller mode, do the conversion now. */
5334 if (target_mode != result_mode)
5336 convert_move (target, op0, 0);
5337 return target;
5339 else
5340 return op0;
5344 /* A subroutine of emit_store_flag only including "tricks" that do not
5345 need a recursive call. These are kept separate to avoid infinite
5346 loops. */
5348 static rtx
5349 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5350 machine_mode mode, int unsignedp, int normalizep,
5351 machine_mode target_mode)
5353 rtx subtarget;
5354 enum insn_code icode;
5355 machine_mode compare_mode;
5356 enum mode_class mclass;
5357 enum rtx_code scode;
5359 if (unsignedp)
5360 code = unsigned_condition (code);
5361 scode = swap_condition (code);
5363 /* If one operand is constant, make it the second one. Only do this
5364 if the other operand is not constant as well. */
5366 if (swap_commutative_operands_p (op0, op1))
5368 std::swap (op0, op1);
5369 code = swap_condition (code);
5372 if (mode == VOIDmode)
5373 mode = GET_MODE (op0);
5375 /* For some comparisons with 1 and -1, we can convert this to
5376 comparisons with zero. This will often produce more opportunities for
5377 store-flag insns. */
5379 switch (code)
5381 case LT:
5382 if (op1 == const1_rtx)
5383 op1 = const0_rtx, code = LE;
5384 break;
5385 case LE:
5386 if (op1 == constm1_rtx)
5387 op1 = const0_rtx, code = LT;
5388 break;
5389 case GE:
5390 if (op1 == const1_rtx)
5391 op1 = const0_rtx, code = GT;
5392 break;
5393 case GT:
5394 if (op1 == constm1_rtx)
5395 op1 = const0_rtx, code = GE;
5396 break;
5397 case GEU:
5398 if (op1 == const1_rtx)
5399 op1 = const0_rtx, code = NE;
5400 break;
5401 case LTU:
5402 if (op1 == const1_rtx)
5403 op1 = const0_rtx, code = EQ;
5404 break;
5405 default:
5406 break;
5409 /* If we are comparing a double-word integer with zero or -1, we can
5410 convert the comparison into one involving a single word. */
5411 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5412 && GET_MODE_CLASS (mode) == MODE_INT
5413 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5415 rtx tem;
5416 if ((code == EQ || code == NE)
5417 && (op1 == const0_rtx || op1 == constm1_rtx))
5419 rtx op00, op01;
5421 /* Do a logical OR or AND of the two words and compare the
5422 result. */
5423 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5424 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5425 tem = expand_binop (word_mode,
5426 op1 == const0_rtx ? ior_optab : and_optab,
5427 op00, op01, NULL_RTX, unsignedp,
5428 OPTAB_DIRECT);
5430 if (tem != 0)
5431 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5432 unsignedp, normalizep);
5434 else if ((code == LT || code == GE) && op1 == const0_rtx)
5436 rtx op0h;
5438 /* If testing the sign bit, can just test on high word. */
5439 op0h = simplify_gen_subreg (word_mode, op0, mode,
5440 subreg_highpart_offset (word_mode,
5441 mode));
5442 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5443 unsignedp, normalizep);
5445 else
5446 tem = NULL_RTX;
5448 if (tem)
5450 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5451 return tem;
5452 if (!target)
5453 target = gen_reg_rtx (target_mode);
5455 convert_move (target, tem,
5456 !val_signbit_known_set_p (word_mode,
5457 (normalizep ? normalizep
5458 : STORE_FLAG_VALUE)));
5459 return target;
5463 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5464 complement of A (for GE) and shifting the sign bit to the low bit. */
5465 if (op1 == const0_rtx && (code == LT || code == GE)
5466 && GET_MODE_CLASS (mode) == MODE_INT
5467 && (normalizep || STORE_FLAG_VALUE == 1
5468 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5470 subtarget = target;
5472 if (!target)
5473 target_mode = mode;
5475 /* If the result is to be wider than OP0, it is best to convert it
5476 first. If it is to be narrower, it is *incorrect* to convert it
5477 first. */
5478 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5480 op0 = convert_modes (target_mode, mode, op0, 0);
5481 mode = target_mode;
5484 if (target_mode != mode)
5485 subtarget = 0;
5487 if (code == GE)
5488 op0 = expand_unop (mode, one_cmpl_optab, op0,
5489 ((STORE_FLAG_VALUE == 1 || normalizep)
5490 ? 0 : subtarget), 0);
5492 if (STORE_FLAG_VALUE == 1 || normalizep)
5493 /* If we are supposed to produce a 0/1 value, we want to do
5494 a logical shift from the sign bit to the low-order bit; for
5495 a -1/0 value, we do an arithmetic shift. */
5496 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5497 GET_MODE_BITSIZE (mode) - 1,
5498 subtarget, normalizep != -1);
5500 if (mode != target_mode)
5501 op0 = convert_modes (target_mode, mode, op0, 0);
5503 return op0;
5506 mclass = GET_MODE_CLASS (mode);
5507 for (compare_mode = mode; compare_mode != VOIDmode;
5508 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5510 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5511 icode = optab_handler (cstore_optab, optab_mode);
5512 if (icode != CODE_FOR_nothing)
5514 do_pending_stack_adjust ();
5515 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5516 unsignedp, op0, op1, normalizep, target_mode);
5517 if (tem)
5518 return tem;
5520 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5522 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5523 unsignedp, op1, op0, normalizep, target_mode);
5524 if (tem)
5525 return tem;
5527 break;
5531 return 0;
5534 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5535 and storing in TARGET. Normally return TARGET.
5536 Return 0 if that cannot be done.
5538 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5539 it is VOIDmode, they cannot both be CONST_INT.
5541 UNSIGNEDP is for the case where we have to widen the operands
5542 to perform the operation. It says to use zero-extension.
5544 NORMALIZEP is 1 if we should convert the result to be either zero
5545 or one. Normalize is -1 if we should convert the result to be
5546 either zero or -1. If NORMALIZEP is zero, the result will be left
5547 "raw" out of the scc insn. */
5550 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5551 machine_mode mode, int unsignedp, int normalizep)
5553 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5554 enum rtx_code rcode;
5555 rtx subtarget;
5556 rtx tem, trueval;
5557 rtx_insn *last;
5559 /* If we compare constants, we shouldn't use a store-flag operation,
5560 but a constant load. We can get there via the vanilla route that
5561 usually generates a compare-branch sequence, but will in this case
5562 fold the comparison to a constant, and thus elide the branch. */
5563 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5564 return NULL_RTX;
5566 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5567 target_mode);
5568 if (tem)
5569 return tem;
5571 /* If we reached here, we can't do this with a scc insn, however there
5572 are some comparisons that can be done in other ways. Don't do any
5573 of these cases if branches are very cheap. */
5574 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5575 return 0;
5577 /* See what we need to return. We can only return a 1, -1, or the
5578 sign bit. */
5580 if (normalizep == 0)
5582 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5583 normalizep = STORE_FLAG_VALUE;
5585 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5587 else
5588 return 0;
5591 last = get_last_insn ();
5593 /* If optimizing, use different pseudo registers for each insn, instead
5594 of reusing the same pseudo. This leads to better CSE, but slows
5595 down the compiler, since there are more pseudos */
5596 subtarget = (!optimize
5597 && (target_mode == mode)) ? target : NULL_RTX;
5598 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5600 /* For floating-point comparisons, try the reverse comparison or try
5601 changing the "orderedness" of the comparison. */
5602 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5604 enum rtx_code first_code;
5605 bool and_them;
5607 rcode = reverse_condition_maybe_unordered (code);
5608 if (can_compare_p (rcode, mode, ccp_store_flag)
5609 && (code == ORDERED || code == UNORDERED
5610 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5611 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5613 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5614 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5616 /* For the reverse comparison, use either an addition or a XOR. */
5617 if (want_add
5618 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5619 optimize_insn_for_speed_p ()) == 0)
5621 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5622 STORE_FLAG_VALUE, target_mode);
5623 if (tem)
5624 return expand_binop (target_mode, add_optab, tem,
5625 gen_int_mode (normalizep, target_mode),
5626 target, 0, OPTAB_WIDEN);
5628 else if (!want_add
5629 && rtx_cost (trueval, mode, XOR, 1,
5630 optimize_insn_for_speed_p ()) == 0)
5632 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5633 normalizep, target_mode);
5634 if (tem)
5635 return expand_binop (target_mode, xor_optab, tem, trueval,
5636 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5640 delete_insns_since (last);
5642 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5643 if (code == ORDERED || code == UNORDERED)
5644 return 0;
5646 and_them = split_comparison (code, mode, &first_code, &code);
5648 /* If there are no NaNs, the first comparison should always fall through.
5649 Effectively change the comparison to the other one. */
5650 if (!HONOR_NANS (mode))
5652 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5653 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5654 target_mode);
5657 if (!HAVE_conditional_move)
5658 return 0;
5660 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5661 conditional move. */
5662 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5663 normalizep, target_mode);
5664 if (tem == 0)
5665 return 0;
5667 if (and_them)
5668 tem = emit_conditional_move (target, code, op0, op1, mode,
5669 tem, const0_rtx, GET_MODE (tem), 0);
5670 else
5671 tem = emit_conditional_move (target, code, op0, op1, mode,
5672 trueval, tem, GET_MODE (tem), 0);
5674 if (tem == 0)
5675 delete_insns_since (last);
5676 return tem;
5679 /* The remaining tricks only apply to integer comparisons. */
5681 if (GET_MODE_CLASS (mode) != MODE_INT)
5682 return 0;
5684 /* If this is an equality comparison of integers, we can try to exclusive-or
5685 (or subtract) the two operands and use a recursive call to try the
5686 comparison with zero. Don't do any of these cases if branches are
5687 very cheap. */
5689 if ((code == EQ || code == NE) && op1 != const0_rtx)
5691 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5692 OPTAB_WIDEN);
5694 if (tem == 0)
5695 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5696 OPTAB_WIDEN);
5697 if (tem != 0)
5698 tem = emit_store_flag (target, code, tem, const0_rtx,
5699 mode, unsignedp, normalizep);
5700 if (tem != 0)
5701 return tem;
5703 delete_insns_since (last);
5706 /* For integer comparisons, try the reverse comparison. However, for
5707 small X and if we'd have anyway to extend, implementing "X != 0"
5708 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5709 rcode = reverse_condition (code);
5710 if (can_compare_p (rcode, mode, ccp_store_flag)
5711 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5712 && code == NE
5713 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5714 && op1 == const0_rtx))
5716 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5717 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5719 /* Again, for the reverse comparison, use either an addition or a XOR. */
5720 if (want_add
5721 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5722 optimize_insn_for_speed_p ()) == 0)
5724 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5725 STORE_FLAG_VALUE, target_mode);
5726 if (tem != 0)
5727 tem = expand_binop (target_mode, add_optab, tem,
5728 gen_int_mode (normalizep, target_mode),
5729 target, 0, OPTAB_WIDEN);
5731 else if (!want_add
5732 && rtx_cost (trueval, mode, XOR, 1,
5733 optimize_insn_for_speed_p ()) == 0)
5735 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5736 normalizep, target_mode);
5737 if (tem != 0)
5738 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5739 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5742 if (tem != 0)
5743 return tem;
5744 delete_insns_since (last);
5747 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5748 the constant zero. Reject all other comparisons at this point. Only
5749 do LE and GT if branches are expensive since they are expensive on
5750 2-operand machines. */
5752 if (op1 != const0_rtx
5753 || (code != EQ && code != NE
5754 && (BRANCH_COST (optimize_insn_for_speed_p (),
5755 false) <= 1 || (code != LE && code != GT))))
5756 return 0;
5758 /* Try to put the result of the comparison in the sign bit. Assume we can't
5759 do the necessary operation below. */
5761 tem = 0;
5763 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5764 the sign bit set. */
5766 if (code == LE)
5768 /* This is destructive, so SUBTARGET can't be OP0. */
5769 if (rtx_equal_p (subtarget, op0))
5770 subtarget = 0;
5772 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5773 OPTAB_WIDEN);
5774 if (tem)
5775 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5776 OPTAB_WIDEN);
5779 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5780 number of bits in the mode of OP0, minus one. */
5782 if (code == GT)
5784 if (rtx_equal_p (subtarget, op0))
5785 subtarget = 0;
5787 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5788 GET_MODE_BITSIZE (mode) - 1,
5789 subtarget, 0);
5790 if (tem)
5791 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5792 OPTAB_WIDEN);
5795 if (code == EQ || code == NE)
5797 /* For EQ or NE, one way to do the comparison is to apply an operation
5798 that converts the operand into a positive number if it is nonzero
5799 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5800 for NE we negate. This puts the result in the sign bit. Then we
5801 normalize with a shift, if needed.
5803 Two operations that can do the above actions are ABS and FFS, so try
5804 them. If that doesn't work, and MODE is smaller than a full word,
5805 we can use zero-extension to the wider mode (an unsigned conversion)
5806 as the operation. */
5808 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5809 that is compensated by the subsequent overflow when subtracting
5810 one / negating. */
5812 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5813 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5814 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5815 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5816 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5818 tem = convert_modes (word_mode, mode, op0, 1);
5819 mode = word_mode;
5822 if (tem != 0)
5824 if (code == EQ)
5825 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5826 0, OPTAB_WIDEN);
5827 else
5828 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5831 /* If we couldn't do it that way, for NE we can "or" the two's complement
5832 of the value with itself. For EQ, we take the one's complement of
5833 that "or", which is an extra insn, so we only handle EQ if branches
5834 are expensive. */
5836 if (tem == 0
5837 && (code == NE
5838 || BRANCH_COST (optimize_insn_for_speed_p (),
5839 false) > 1))
5841 if (rtx_equal_p (subtarget, op0))
5842 subtarget = 0;
5844 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5845 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5846 OPTAB_WIDEN);
5848 if (tem && code == EQ)
5849 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5853 if (tem && normalizep)
5854 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5855 GET_MODE_BITSIZE (mode) - 1,
5856 subtarget, normalizep == 1);
5858 if (tem)
5860 if (!target)
5862 else if (GET_MODE (tem) != target_mode)
5864 convert_move (target, tem, 0);
5865 tem = target;
5867 else if (!subtarget)
5869 emit_move_insn (target, tem);
5870 tem = target;
5873 else
5874 delete_insns_since (last);
5876 return tem;
5879 /* Like emit_store_flag, but always succeeds. */
5882 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5883 machine_mode mode, int unsignedp, int normalizep)
5885 rtx tem;
5886 rtx_code_label *label;
5887 rtx trueval, falseval;
5889 /* First see if emit_store_flag can do the job. */
5890 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5891 if (tem != 0)
5892 return tem;
5894 if (!target)
5895 target = gen_reg_rtx (word_mode);
5897 /* If this failed, we have to do this with set/compare/jump/set code.
5898 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5899 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5900 if (code == NE
5901 && GET_MODE_CLASS (mode) == MODE_INT
5902 && REG_P (target)
5903 && op0 == target
5904 && op1 == const0_rtx)
5906 label = gen_label_rtx ();
5907 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5908 NULL_RTX, NULL, label,
5909 profile_probability::uninitialized ());
5910 emit_move_insn (target, trueval);
5911 emit_label (label);
5912 return target;
5915 if (!REG_P (target)
5916 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5917 target = gen_reg_rtx (GET_MODE (target));
5919 /* Jump in the right direction if the target cannot implement CODE
5920 but can jump on its reverse condition. */
5921 falseval = const0_rtx;
5922 if (! can_compare_p (code, mode, ccp_jump)
5923 && (! FLOAT_MODE_P (mode)
5924 || code == ORDERED || code == UNORDERED
5925 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5926 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5928 enum rtx_code rcode;
5929 if (FLOAT_MODE_P (mode))
5930 rcode = reverse_condition_maybe_unordered (code);
5931 else
5932 rcode = reverse_condition (code);
5934 /* Canonicalize to UNORDERED for the libcall. */
5935 if (can_compare_p (rcode, mode, ccp_jump)
5936 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5938 falseval = trueval;
5939 trueval = const0_rtx;
5940 code = rcode;
5944 emit_move_insn (target, trueval);
5945 label = gen_label_rtx ();
5946 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5947 label, profile_probability::uninitialized ());
5949 emit_move_insn (target, falseval);
5950 emit_label (label);
5952 return target;
5955 /* Perform possibly multi-word comparison and conditional jump to LABEL
5956 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5957 now a thin wrapper around do_compare_rtx_and_jump. */
5959 static void
5960 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5961 rtx_code_label *label)
5963 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5964 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5965 NULL, label, profile_probability::uninitialized ());