PR rtl-optimization/79386
[official-gcc.git] / gcc / expmed.c
blobbcc5922510006d74c135cd2a973d4350d19173b5
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2017 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "emit-rtl.h"
35 #include "diagnostic-core.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "dojump.h"
39 #include "explow.h"
40 #include "expr.h"
41 #include "langhooks.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 rtx, bool);
53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 rtx, bool);
56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT,
59 unsigned HOST_WIDE_INT,
60 rtx, bool);
61 static rtx extract_fixed_bit_field (machine_mode, rtx,
62 unsigned HOST_WIDE_INT,
63 unsigned HOST_WIDE_INT, rtx, int, bool);
64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
65 unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, rtx, int, bool);
67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
69 unsigned HOST_WIDE_INT, int, bool);
70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
75 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
76 The mask is truncated if necessary to the width of mode MODE. The
77 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
79 static inline rtx
80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
82 return immed_wide_int_const
83 (wi::shifted_mask (bitpos, bitsize, complement,
84 GET_MODE_PRECISION (mode)), mode);
87 /* Test whether a value is zero of a power of two. */
88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
89 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
91 struct init_expmed_rtl
93 rtx reg;
94 rtx plus;
95 rtx neg;
96 rtx mult;
97 rtx sdiv;
98 rtx udiv;
99 rtx sdiv_32;
100 rtx smod_32;
101 rtx wide_mult;
102 rtx wide_lshr;
103 rtx wide_trunc;
104 rtx shift;
105 rtx shift_mult;
106 rtx shift_add;
107 rtx shift_sub0;
108 rtx shift_sub1;
109 rtx zext;
110 rtx trunc;
112 rtx pow2[MAX_BITS_PER_WORD];
113 rtx cint[MAX_BITS_PER_WORD];
116 static void
117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
118 machine_mode from_mode, bool speed)
120 int to_size, from_size;
121 rtx which;
123 to_size = GET_MODE_PRECISION (to_mode);
124 from_size = GET_MODE_PRECISION (from_mode);
126 /* Most partial integers have a precision less than the "full"
127 integer it requires for storage. In case one doesn't, for
128 comparison purposes here, reduce the bit size by one in that
129 case. */
130 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
131 && pow2p_hwi (to_size))
132 to_size --;
133 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
134 && pow2p_hwi (from_size))
135 from_size --;
137 /* Assume cost of zero-extend and sign-extend is the same. */
138 which = (to_size < from_size ? all->trunc : all->zext);
140 PUT_MODE (all->reg, from_mode);
141 set_convert_cost (to_mode, from_mode, speed,
142 set_src_cost (which, to_mode, speed));
145 static void
146 init_expmed_one_mode (struct init_expmed_rtl *all,
147 machine_mode mode, int speed)
149 int m, n, mode_bitsize;
150 machine_mode mode_from;
152 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
154 PUT_MODE (all->reg, mode);
155 PUT_MODE (all->plus, mode);
156 PUT_MODE (all->neg, mode);
157 PUT_MODE (all->mult, mode);
158 PUT_MODE (all->sdiv, mode);
159 PUT_MODE (all->udiv, mode);
160 PUT_MODE (all->sdiv_32, mode);
161 PUT_MODE (all->smod_32, mode);
162 PUT_MODE (all->wide_trunc, mode);
163 PUT_MODE (all->shift, mode);
164 PUT_MODE (all->shift_mult, mode);
165 PUT_MODE (all->shift_add, mode);
166 PUT_MODE (all->shift_sub0, mode);
167 PUT_MODE (all->shift_sub1, mode);
168 PUT_MODE (all->zext, mode);
169 PUT_MODE (all->trunc, mode);
171 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
172 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
173 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
174 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
175 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
177 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
178 <= 2 * add_cost (speed, mode)));
179 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
180 <= 4 * add_cost (speed, mode)));
182 set_shift_cost (speed, mode, 0, 0);
184 int cost = add_cost (speed, mode);
185 set_shiftadd_cost (speed, mode, 0, cost);
186 set_shiftsub0_cost (speed, mode, 0, cost);
187 set_shiftsub1_cost (speed, mode, 0, cost);
190 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
191 for (m = 1; m < n; m++)
193 XEXP (all->shift, 1) = all->cint[m];
194 XEXP (all->shift_mult, 1) = all->pow2[m];
196 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
197 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
198 speed));
199 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
200 speed));
201 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
202 speed));
205 if (SCALAR_INT_MODE_P (mode))
207 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
208 mode_from = (machine_mode)(mode_from + 1))
209 init_expmed_one_conv (all, mode, mode_from, speed);
211 if (GET_MODE_CLASS (mode) == MODE_INT)
213 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
214 if (wider_mode != VOIDmode)
216 PUT_MODE (all->zext, wider_mode);
217 PUT_MODE (all->wide_mult, wider_mode);
218 PUT_MODE (all->wide_lshr, wider_mode);
219 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
221 set_mul_widen_cost (speed, wider_mode,
222 set_src_cost (all->wide_mult, wider_mode, speed));
223 set_mul_highpart_cost (speed, mode,
224 set_src_cost (all->wide_trunc, mode, speed));
229 void
230 init_expmed (void)
232 struct init_expmed_rtl all;
233 machine_mode mode = QImode;
234 int m, speed;
236 memset (&all, 0, sizeof all);
237 for (m = 1; m < MAX_BITS_PER_WORD; m++)
239 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
240 all.cint[m] = GEN_INT (m);
243 /* Avoid using hard regs in ways which may be unsupported. */
244 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
245 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
246 all.neg = gen_rtx_NEG (mode, all.reg);
247 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
248 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
249 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
250 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
251 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
252 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
253 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
254 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
255 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
256 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
257 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
258 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
259 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
260 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
261 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
263 for (speed = 0; speed < 2; speed++)
265 crtl->maybe_hot_insn_p = speed;
266 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
268 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
269 mode = (machine_mode)(mode + 1))
270 init_expmed_one_mode (&all, mode, speed);
272 if (MIN_MODE_PARTIAL_INT != VOIDmode)
273 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
274 mode = (machine_mode)(mode + 1))
275 init_expmed_one_mode (&all, mode, speed);
277 if (MIN_MODE_VECTOR_INT != VOIDmode)
278 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
279 mode = (machine_mode)(mode + 1))
280 init_expmed_one_mode (&all, mode, speed);
283 if (alg_hash_used_p ())
285 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
286 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
288 else
289 set_alg_hash_used_p (true);
290 default_rtl_profile ();
292 ggc_free (all.trunc);
293 ggc_free (all.shift_sub1);
294 ggc_free (all.shift_sub0);
295 ggc_free (all.shift_add);
296 ggc_free (all.shift_mult);
297 ggc_free (all.shift);
298 ggc_free (all.wide_trunc);
299 ggc_free (all.wide_lshr);
300 ggc_free (all.wide_mult);
301 ggc_free (all.zext);
302 ggc_free (all.smod_32);
303 ggc_free (all.sdiv_32);
304 ggc_free (all.udiv);
305 ggc_free (all.sdiv);
306 ggc_free (all.mult);
307 ggc_free (all.neg);
308 ggc_free (all.plus);
309 ggc_free (all.reg);
312 /* Return an rtx representing minus the value of X.
313 MODE is the intended mode of the result,
314 useful if X is a CONST_INT. */
317 negate_rtx (machine_mode mode, rtx x)
319 rtx result = simplify_unary_operation (NEG, mode, x, mode);
321 if (result == 0)
322 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
324 return result;
327 /* Whether reverse storage order is supported on the target. */
328 static int reverse_storage_order_supported = -1;
330 /* Check whether reverse storage order is supported on the target. */
332 static void
333 check_reverse_storage_order_support (void)
335 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
337 reverse_storage_order_supported = 0;
338 sorry ("reverse scalar storage order");
340 else
341 reverse_storage_order_supported = 1;
344 /* Whether reverse FP storage order is supported on the target. */
345 static int reverse_float_storage_order_supported = -1;
347 /* Check whether reverse FP storage order is supported on the target. */
349 static void
350 check_reverse_float_storage_order_support (void)
352 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
354 reverse_float_storage_order_supported = 0;
355 sorry ("reverse floating-point scalar storage order");
357 else
358 reverse_float_storage_order_supported = 1;
361 /* Return an rtx representing value of X with reverse storage order.
362 MODE is the intended mode of the result,
363 useful if X is a CONST_INT. */
366 flip_storage_order (enum machine_mode mode, rtx x)
368 enum machine_mode int_mode;
369 rtx result;
371 if (mode == QImode)
372 return x;
374 if (COMPLEX_MODE_P (mode))
376 rtx real = read_complex_part (x, false);
377 rtx imag = read_complex_part (x, true);
379 real = flip_storage_order (GET_MODE_INNER (mode), real);
380 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
382 return gen_rtx_CONCAT (mode, real, imag);
385 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
386 check_reverse_storage_order_support ();
388 if (SCALAR_INT_MODE_P (mode))
389 int_mode = mode;
390 else
392 if (FLOAT_MODE_P (mode)
393 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
394 check_reverse_float_storage_order_support ();
396 int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
397 if (int_mode == BLKmode)
399 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
400 return x;
402 x = gen_lowpart (int_mode, x);
405 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
406 if (result == 0)
407 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
409 if (int_mode != mode)
410 result = gen_lowpart (mode, result);
412 return result;
415 /* Adjust bitfield memory MEM so that it points to the first unit of mode
416 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
417 If MODE is BLKmode, return a reference to every byte in the bitfield.
418 Set *NEW_BITNUM to the bit position of the field within the new memory. */
420 static rtx
421 narrow_bit_field_mem (rtx mem, machine_mode mode,
422 unsigned HOST_WIDE_INT bitsize,
423 unsigned HOST_WIDE_INT bitnum,
424 unsigned HOST_WIDE_INT *new_bitnum)
426 if (mode == BLKmode)
428 *new_bitnum = bitnum % BITS_PER_UNIT;
429 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
430 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
431 / BITS_PER_UNIT);
432 return adjust_bitfield_address_size (mem, mode, offset, size);
434 else
436 unsigned int unit = GET_MODE_BITSIZE (mode);
437 *new_bitnum = bitnum % unit;
438 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
439 return adjust_bitfield_address (mem, mode, offset);
443 /* The caller wants to perform insertion or extraction PATTERN on a
444 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
445 BITREGION_START and BITREGION_END are as for store_bit_field
446 and FIELDMODE is the natural mode of the field.
448 Search for a mode that is compatible with the memory access
449 restrictions and (where applicable) with a register insertion or
450 extraction. Return the new memory on success, storing the adjusted
451 bit position in *NEW_BITNUM. Return null otherwise. */
453 static rtx
454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
455 rtx op0, HOST_WIDE_INT bitsize,
456 HOST_WIDE_INT bitnum,
457 unsigned HOST_WIDE_INT bitregion_start,
458 unsigned HOST_WIDE_INT bitregion_end,
459 machine_mode fieldmode,
460 unsigned HOST_WIDE_INT *new_bitnum)
462 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
463 bitregion_end, MEM_ALIGN (op0),
464 MEM_VOLATILE_P (op0));
465 machine_mode best_mode;
466 if (iter.next_mode (&best_mode))
468 /* We can use a memory in BEST_MODE. See whether this is true for
469 any wider modes. All other things being equal, we prefer to
470 use the widest mode possible because it tends to expose more
471 CSE opportunities. */
472 if (!iter.prefer_smaller_modes ())
474 /* Limit the search to the mode required by the corresponding
475 register insertion or extraction instruction, if any. */
476 machine_mode limit_mode = word_mode;
477 extraction_insn insn;
478 if (get_best_reg_extraction_insn (&insn, pattern,
479 GET_MODE_BITSIZE (best_mode),
480 fieldmode))
481 limit_mode = insn.field_mode;
483 machine_mode wider_mode;
484 while (iter.next_mode (&wider_mode)
485 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
486 best_mode = wider_mode;
488 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
489 new_bitnum);
491 return NULL_RTX;
494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
495 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
496 offset is then BITNUM / BITS_PER_UNIT. */
498 static bool
499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
500 unsigned HOST_WIDE_INT bitsize,
501 machine_mode struct_mode)
503 if (BYTES_BIG_ENDIAN)
504 return (bitnum % BITS_PER_UNIT == 0
505 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
506 || (bitnum + bitsize) % BITS_PER_WORD == 0));
507 else
508 return bitnum % BITS_PER_WORD == 0;
511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
512 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
513 Return false if the access would touch memory outside the range
514 BITREGION_START to BITREGION_END for conformance to the C++ memory
515 model. */
517 static bool
518 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
519 unsigned HOST_WIDE_INT bitnum,
520 machine_mode fieldmode,
521 unsigned HOST_WIDE_INT bitregion_start,
522 unsigned HOST_WIDE_INT bitregion_end)
524 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
526 /* -fstrict-volatile-bitfields must be enabled and we must have a
527 volatile MEM. */
528 if (!MEM_P (op0)
529 || !MEM_VOLATILE_P (op0)
530 || flag_strict_volatile_bitfields <= 0)
531 return false;
533 /* Non-integral modes likely only happen with packed structures.
534 Punt. */
535 if (!SCALAR_INT_MODE_P (fieldmode))
536 return false;
538 /* The bit size must not be larger than the field mode, and
539 the field mode must not be larger than a word. */
540 if (bitsize > modesize || modesize > BITS_PER_WORD)
541 return false;
543 /* Check for cases of unaligned fields that must be split. */
544 if (bitnum % modesize + bitsize > modesize)
545 return false;
547 /* The memory must be sufficiently aligned for a MODESIZE access.
548 This condition guarantees, that the memory access will not
549 touch anything after the end of the structure. */
550 if (MEM_ALIGN (op0) < modesize)
551 return false;
553 /* Check for cases where the C++ memory model applies. */
554 if (bitregion_end != 0
555 && (bitnum - bitnum % modesize < bitregion_start
556 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
557 return false;
559 return true;
562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
563 bit number BITNUM can be treated as a simple value of mode MODE. */
565 static bool
566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
567 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
569 return (MEM_P (op0)
570 && bitnum % BITS_PER_UNIT == 0
571 && bitsize == GET_MODE_BITSIZE (mode)
572 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
573 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
574 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
577 /* Try to use instruction INSV to store VALUE into a field of OP0.
578 BITSIZE and BITNUM are as for store_bit_field. */
580 static bool
581 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
582 unsigned HOST_WIDE_INT bitsize,
583 unsigned HOST_WIDE_INT bitnum,
584 rtx value)
586 struct expand_operand ops[4];
587 rtx value1;
588 rtx xop0 = op0;
589 rtx_insn *last = get_last_insn ();
590 bool copy_back = false;
592 machine_mode op_mode = insv->field_mode;
593 unsigned int unit = GET_MODE_BITSIZE (op_mode);
594 if (bitsize == 0 || bitsize > unit)
595 return false;
597 if (MEM_P (xop0))
598 /* Get a reference to the first byte of the field. */
599 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
600 &bitnum);
601 else
603 /* Convert from counting within OP0 to counting in OP_MODE. */
604 if (BYTES_BIG_ENDIAN)
605 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
607 /* If xop0 is a register, we need it in OP_MODE
608 to make it acceptable to the format of insv. */
609 if (GET_CODE (xop0) == SUBREG)
610 /* We can't just change the mode, because this might clobber op0,
611 and we will need the original value of op0 if insv fails. */
612 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
613 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
614 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
617 /* If the destination is a paradoxical subreg such that we need a
618 truncate to the inner mode, perform the insertion on a temporary and
619 truncate the result to the original destination. Note that we can't
620 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
621 X) 0)) is (reg:N X). */
622 if (GET_CODE (xop0) == SUBREG
623 && REG_P (SUBREG_REG (xop0))
624 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
625 op_mode))
627 rtx tem = gen_reg_rtx (op_mode);
628 emit_move_insn (tem, xop0);
629 xop0 = tem;
630 copy_back = true;
633 /* There are similar overflow check at the start of store_bit_field_1,
634 but that only check the situation where the field lies completely
635 outside the register, while there do have situation where the field
636 lies partialy in the register, we need to adjust bitsize for this
637 partial overflow situation. Without this fix, pr48335-2.c on big-endian
638 will broken on those arch support bit insert instruction, like arm, aarch64
639 etc. */
640 if (bitsize + bitnum > unit && bitnum < unit)
642 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
643 "destination object, data truncated into %wu-bit",
644 bitsize, unit - bitnum);
645 bitsize = unit - bitnum;
648 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
649 "backwards" from the size of the unit we are inserting into.
650 Otherwise, we count bits from the most significant on a
651 BYTES/BITS_BIG_ENDIAN machine. */
653 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
654 bitnum = unit - bitsize - bitnum;
656 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
657 value1 = value;
658 if (GET_MODE (value) != op_mode)
660 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
662 rtx tmp;
663 /* Optimization: Don't bother really extending VALUE
664 if it has all the bits we will actually use. However,
665 if we must narrow it, be sure we do it correctly. */
667 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
669 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
670 if (! tmp)
671 tmp = simplify_gen_subreg (op_mode,
672 force_reg (GET_MODE (value),
673 value1),
674 GET_MODE (value), 0);
676 else
678 tmp = gen_lowpart_if_possible (op_mode, value1);
679 if (! tmp)
680 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
681 value1));
683 value1 = tmp;
685 else if (CONST_INT_P (value))
686 value1 = gen_int_mode (INTVAL (value), op_mode);
687 else
688 /* Parse phase is supposed to make VALUE's data type
689 match that of the component reference, which is a type
690 at least as wide as the field; so VALUE should have
691 a mode that corresponds to that type. */
692 gcc_assert (CONSTANT_P (value));
695 create_fixed_operand (&ops[0], xop0);
696 create_integer_operand (&ops[1], bitsize);
697 create_integer_operand (&ops[2], bitnum);
698 create_input_operand (&ops[3], value1, op_mode);
699 if (maybe_expand_insn (insv->icode, 4, ops))
701 if (copy_back)
702 convert_move (op0, xop0, true);
703 return true;
705 delete_insns_since (last);
706 return false;
709 /* A subroutine of store_bit_field, with the same arguments. Return true
710 if the operation could be implemented.
712 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
713 no other way of implementing the operation. If FALLBACK_P is false,
714 return false instead. */
716 static bool
717 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
718 unsigned HOST_WIDE_INT bitnum,
719 unsigned HOST_WIDE_INT bitregion_start,
720 unsigned HOST_WIDE_INT bitregion_end,
721 machine_mode fieldmode,
722 rtx value, bool reverse, bool fallback_p)
724 rtx op0 = str_rtx;
725 rtx orig_value;
727 while (GET_CODE (op0) == SUBREG)
729 /* The following line once was done only if WORDS_BIG_ENDIAN,
730 but I think that is a mistake. WORDS_BIG_ENDIAN is
731 meaningful at a much higher level; when structures are copied
732 between memory and regs, the higher-numbered regs
733 always get higher addresses. */
734 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
735 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
736 int byte_offset = 0;
738 /* Paradoxical subregs need special handling on big-endian machines. */
739 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
741 int difference = inner_mode_size - outer_mode_size;
743 if (WORDS_BIG_ENDIAN)
744 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
745 if (BYTES_BIG_ENDIAN)
746 byte_offset += difference % UNITS_PER_WORD;
748 else
749 byte_offset = SUBREG_BYTE (op0);
751 bitnum += byte_offset * BITS_PER_UNIT;
752 op0 = SUBREG_REG (op0);
755 /* No action is needed if the target is a register and if the field
756 lies completely outside that register. This can occur if the source
757 code contains an out-of-bounds access to a small array. */
758 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
759 return true;
761 /* Use vec_set patterns for inserting parts of vectors whenever
762 available. */
763 if (VECTOR_MODE_P (GET_MODE (op0))
764 && !MEM_P (op0)
765 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
766 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
767 && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
768 && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
770 struct expand_operand ops[3];
771 machine_mode outermode = GET_MODE (op0);
772 machine_mode innermode = GET_MODE_INNER (outermode);
773 enum insn_code icode = optab_handler (vec_set_optab, outermode);
774 int pos = bitnum / GET_MODE_BITSIZE (innermode);
776 create_fixed_operand (&ops[0], op0);
777 create_input_operand (&ops[1], value, innermode);
778 create_integer_operand (&ops[2], pos);
779 if (maybe_expand_insn (icode, 3, ops))
780 return true;
783 /* If the target is a register, overwriting the entire object, or storing
784 a full-word or multi-word field can be done with just a SUBREG. */
785 if (!MEM_P (op0)
786 && bitsize == GET_MODE_BITSIZE (fieldmode)
787 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
788 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
790 /* Use the subreg machinery either to narrow OP0 to the required
791 words or to cope with mode punning between equal-sized modes.
792 In the latter case, use subreg on the rhs side, not lhs. */
793 rtx sub;
795 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
797 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
798 if (sub)
800 if (reverse)
801 sub = flip_storage_order (GET_MODE (op0), sub);
802 emit_move_insn (op0, sub);
803 return true;
806 else
808 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
809 bitnum / BITS_PER_UNIT);
810 if (sub)
812 if (reverse)
813 value = flip_storage_order (fieldmode, value);
814 emit_move_insn (sub, value);
815 return true;
820 /* If the target is memory, storing any naturally aligned field can be
821 done with a simple store. For targets that support fast unaligned
822 memory, any naturally sized, unit aligned field can be done directly. */
823 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
825 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
826 if (reverse)
827 value = flip_storage_order (fieldmode, value);
828 emit_move_insn (op0, value);
829 return true;
832 /* Make sure we are playing with integral modes. Pun with subregs
833 if we aren't. This must come after the entire register case above,
834 since that case is valid for any mode. The following cases are only
835 valid for integral modes. */
837 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
838 if (imode != GET_MODE (op0))
840 if (MEM_P (op0))
841 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
842 else
844 gcc_assert (imode != BLKmode);
845 op0 = gen_lowpart (imode, op0);
850 /* Storing an lsb-aligned field in a register
851 can be done with a movstrict instruction. */
853 if (!MEM_P (op0)
854 && !reverse
855 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
856 && bitsize == GET_MODE_BITSIZE (fieldmode)
857 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
859 struct expand_operand ops[2];
860 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
861 rtx arg0 = op0;
862 unsigned HOST_WIDE_INT subreg_off;
864 if (GET_CODE (arg0) == SUBREG)
866 /* Else we've got some float mode source being extracted into
867 a different float mode destination -- this combination of
868 subregs results in Severe Tire Damage. */
869 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
870 || GET_MODE_CLASS (fieldmode) == MODE_INT
871 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
872 arg0 = SUBREG_REG (arg0);
875 subreg_off = bitnum / BITS_PER_UNIT;
876 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
878 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
880 create_fixed_operand (&ops[0], arg0);
881 /* Shrink the source operand to FIELDMODE. */
882 create_convert_operand_to (&ops[1], value, fieldmode, false);
883 if (maybe_expand_insn (icode, 2, ops))
884 return true;
888 /* Handle fields bigger than a word. */
890 if (bitsize > BITS_PER_WORD)
892 /* Here we transfer the words of the field
893 in the order least significant first.
894 This is because the most significant word is the one which may
895 be less than full.
896 However, only do that if the value is not BLKmode. */
898 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
899 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
900 unsigned int i;
901 rtx_insn *last;
903 /* This is the mode we must force value to, so that there will be enough
904 subwords to extract. Note that fieldmode will often (always?) be
905 VOIDmode, because that is what store_field uses to indicate that this
906 is a bit field, but passing VOIDmode to operand_subword_force
907 is not allowed. */
908 fieldmode = GET_MODE (value);
909 if (fieldmode == VOIDmode)
910 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
912 last = get_last_insn ();
913 for (i = 0; i < nwords; i++)
915 /* If I is 0, use the low-order word in both field and target;
916 if I is 1, use the next to lowest word; and so on. */
917 unsigned int wordnum = (backwards
918 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
919 - i - 1
920 : i);
921 unsigned int bit_offset = (backwards ^ reverse
922 ? MAX ((int) bitsize - ((int) i + 1)
923 * BITS_PER_WORD,
925 : (int) i * BITS_PER_WORD);
926 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
927 unsigned HOST_WIDE_INT new_bitsize =
928 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
930 /* If the remaining chunk doesn't have full wordsize we have
931 to make sure that for big-endian machines the higher order
932 bits are used. */
933 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
934 value_word = simplify_expand_binop (word_mode, lshr_optab,
935 value_word,
936 GEN_INT (BITS_PER_WORD
937 - new_bitsize),
938 NULL_RTX, true,
939 OPTAB_LIB_WIDEN);
941 if (!store_bit_field_1 (op0, new_bitsize,
942 bitnum + bit_offset,
943 bitregion_start, bitregion_end,
944 word_mode,
945 value_word, reverse, fallback_p))
947 delete_insns_since (last);
948 return false;
951 return true;
954 /* If VALUE has a floating-point or complex mode, access it as an
955 integer of the corresponding size. This can occur on a machine
956 with 64 bit registers that uses SFmode for float. It can also
957 occur for unaligned float or complex fields. */
958 orig_value = value;
959 if (GET_MODE (value) != VOIDmode
960 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
961 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
963 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
964 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
967 /* If OP0 is a multi-word register, narrow it to the affected word.
968 If the region spans two words, defer to store_split_bit_field. */
969 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
971 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
973 if (!fallback_p)
974 return false;
976 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
977 bitregion_end, value, reverse);
978 return true;
980 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
981 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
982 gcc_assert (op0);
983 bitnum %= BITS_PER_WORD;
986 /* From here on we can assume that the field to be stored in fits
987 within a word. If the destination is a register, it too fits
988 in a word. */
990 extraction_insn insv;
991 if (!MEM_P (op0)
992 && !reverse
993 && get_best_reg_extraction_insn (&insv, EP_insv,
994 GET_MODE_BITSIZE (GET_MODE (op0)),
995 fieldmode)
996 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
997 return true;
999 /* If OP0 is a memory, try copying it to a register and seeing if a
1000 cheap register alternative is available. */
1001 if (MEM_P (op0) && !reverse)
1003 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1004 fieldmode)
1005 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1006 return true;
1008 rtx_insn *last = get_last_insn ();
1010 /* Try loading part of OP0 into a register, inserting the bitfield
1011 into that, and then copying the result back to OP0. */
1012 unsigned HOST_WIDE_INT bitpos;
1013 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1014 bitregion_start, bitregion_end,
1015 fieldmode, &bitpos);
1016 if (xop0)
1018 rtx tempreg = copy_to_reg (xop0);
1019 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1020 bitregion_start, bitregion_end,
1021 fieldmode, orig_value, reverse, false))
1023 emit_move_insn (xop0, tempreg);
1024 return true;
1026 delete_insns_since (last);
1030 if (!fallback_p)
1031 return false;
1033 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1034 bitregion_end, value, reverse);
1035 return true;
1038 /* Generate code to store value from rtx VALUE
1039 into a bit-field within structure STR_RTX
1040 containing BITSIZE bits starting at bit BITNUM.
1042 BITREGION_START is bitpos of the first bitfield in this region.
1043 BITREGION_END is the bitpos of the ending bitfield in this region.
1044 These two fields are 0, if the C++ memory model does not apply,
1045 or we are not interested in keeping track of bitfield regions.
1047 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1049 If REVERSE is true, the store is to be done in reverse order. */
1051 void
1052 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1053 unsigned HOST_WIDE_INT bitnum,
1054 unsigned HOST_WIDE_INT bitregion_start,
1055 unsigned HOST_WIDE_INT bitregion_end,
1056 machine_mode fieldmode,
1057 rtx value, bool reverse)
1059 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1060 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1061 bitregion_start, bitregion_end))
1063 /* Storing of a full word can be done with a simple store.
1064 We know here that the field can be accessed with one single
1065 instruction. For targets that support unaligned memory,
1066 an unaligned access may be necessary. */
1067 if (bitsize == GET_MODE_BITSIZE (fieldmode))
1069 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1070 bitnum / BITS_PER_UNIT);
1071 if (reverse)
1072 value = flip_storage_order (fieldmode, value);
1073 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1074 emit_move_insn (str_rtx, value);
1076 else
1078 rtx temp;
1080 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1081 &bitnum);
1082 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1083 temp = copy_to_reg (str_rtx);
1084 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1085 fieldmode, value, reverse, true))
1086 gcc_unreachable ();
1088 emit_move_insn (str_rtx, temp);
1091 return;
1094 /* Under the C++0x memory model, we must not touch bits outside the
1095 bit region. Adjust the address to start at the beginning of the
1096 bit region. */
1097 if (MEM_P (str_rtx) && bitregion_start > 0)
1099 machine_mode bestmode;
1100 HOST_WIDE_INT offset, size;
1102 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1104 offset = bitregion_start / BITS_PER_UNIT;
1105 bitnum -= bitregion_start;
1106 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1107 bitregion_end -= bitregion_start;
1108 bitregion_start = 0;
1109 bestmode = get_best_mode (bitsize, bitnum,
1110 bitregion_start, bitregion_end,
1111 MEM_ALIGN (str_rtx), VOIDmode,
1112 MEM_VOLATILE_P (str_rtx));
1113 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1116 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1117 bitregion_start, bitregion_end,
1118 fieldmode, value, reverse, true))
1119 gcc_unreachable ();
1122 /* Use shifts and boolean operations to store VALUE into a bit field of
1123 width BITSIZE in OP0, starting at bit BITNUM.
1125 If REVERSE is true, the store is to be done in reverse order. */
1127 static void
1128 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1129 unsigned HOST_WIDE_INT bitnum,
1130 unsigned HOST_WIDE_INT bitregion_start,
1131 unsigned HOST_WIDE_INT bitregion_end,
1132 rtx value, bool reverse)
1134 /* There is a case not handled here:
1135 a structure with a known alignment of just a halfword
1136 and a field split across two aligned halfwords within the structure.
1137 Or likewise a structure with a known alignment of just a byte
1138 and a field split across two bytes.
1139 Such cases are not supposed to be able to occur. */
1141 if (MEM_P (op0))
1143 machine_mode mode = GET_MODE (op0);
1144 if (GET_MODE_BITSIZE (mode) == 0
1145 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1146 mode = word_mode;
1147 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1148 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1150 if (mode == VOIDmode)
1152 /* The only way this should occur is if the field spans word
1153 boundaries. */
1154 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1155 bitregion_end, value, reverse);
1156 return;
1159 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1162 store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1165 /* Helper function for store_fixed_bit_field, stores
1166 the bit field always using the MODE of OP0. */
1168 static void
1169 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1170 unsigned HOST_WIDE_INT bitnum,
1171 rtx value, bool reverse)
1173 machine_mode mode;
1174 rtx temp;
1175 int all_zero = 0;
1176 int all_one = 0;
1178 mode = GET_MODE (op0);
1179 gcc_assert (SCALAR_INT_MODE_P (mode));
1181 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1182 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1184 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1185 /* BITNUM is the distance between our msb
1186 and that of the containing datum.
1187 Convert it to the distance from the lsb. */
1188 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1190 /* Now BITNUM is always the distance between our lsb
1191 and that of OP0. */
1193 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1194 we must first convert its mode to MODE. */
1196 if (CONST_INT_P (value))
1198 unsigned HOST_WIDE_INT v = UINTVAL (value);
1200 if (bitsize < HOST_BITS_PER_WIDE_INT)
1201 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1203 if (v == 0)
1204 all_zero = 1;
1205 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1206 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1207 || (bitsize == HOST_BITS_PER_WIDE_INT
1208 && v == HOST_WIDE_INT_M1U))
1209 all_one = 1;
1211 value = lshift_value (mode, v, bitnum);
1213 else
1215 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1216 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1218 if (GET_MODE (value) != mode)
1219 value = convert_to_mode (mode, value, 1);
1221 if (must_and)
1222 value = expand_binop (mode, and_optab, value,
1223 mask_rtx (mode, 0, bitsize, 0),
1224 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1225 if (bitnum > 0)
1226 value = expand_shift (LSHIFT_EXPR, mode, value,
1227 bitnum, NULL_RTX, 1);
1230 if (reverse)
1231 value = flip_storage_order (mode, value);
1233 /* Now clear the chosen bits in OP0,
1234 except that if VALUE is -1 we need not bother. */
1235 /* We keep the intermediates in registers to allow CSE to combine
1236 consecutive bitfield assignments. */
1238 temp = force_reg (mode, op0);
1240 if (! all_one)
1242 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1243 if (reverse)
1244 mask = flip_storage_order (mode, mask);
1245 temp = expand_binop (mode, and_optab, temp, mask,
1246 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1247 temp = force_reg (mode, temp);
1250 /* Now logical-or VALUE into OP0, unless it is zero. */
1252 if (! all_zero)
1254 temp = expand_binop (mode, ior_optab, temp, value,
1255 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1256 temp = force_reg (mode, temp);
1259 if (op0 != temp)
1261 op0 = copy_rtx (op0);
1262 emit_move_insn (op0, temp);
1266 /* Store a bit field that is split across multiple accessible memory objects.
1268 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1269 BITSIZE is the field width; BITPOS the position of its first bit
1270 (within the word).
1271 VALUE is the value to store.
1273 If REVERSE is true, the store is to be done in reverse order.
1275 This does not yet handle fields wider than BITS_PER_WORD. */
1277 static void
1278 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1279 unsigned HOST_WIDE_INT bitpos,
1280 unsigned HOST_WIDE_INT bitregion_start,
1281 unsigned HOST_WIDE_INT bitregion_end,
1282 rtx value, bool reverse)
1284 unsigned int unit, total_bits, bitsdone = 0;
1286 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1287 much at a time. */
1288 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1289 unit = BITS_PER_WORD;
1290 else
1291 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1293 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1294 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1295 again, and we will mutually recurse forever. */
1296 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1297 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1299 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1300 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1301 that VALUE might be a floating-point constant. */
1302 if (CONSTANT_P (value) && !CONST_INT_P (value))
1304 rtx word = gen_lowpart_common (word_mode, value);
1306 if (word && (value != word))
1307 value = word;
1308 else
1309 value = gen_lowpart_common (word_mode,
1310 force_reg (GET_MODE (value) != VOIDmode
1311 ? GET_MODE (value)
1312 : word_mode, value));
1315 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1317 while (bitsdone < bitsize)
1319 unsigned HOST_WIDE_INT thissize;
1320 unsigned HOST_WIDE_INT thispos;
1321 unsigned HOST_WIDE_INT offset;
1322 rtx part, word;
1324 offset = (bitpos + bitsdone) / unit;
1325 thispos = (bitpos + bitsdone) % unit;
1327 /* When region of bytes we can touch is restricted, decrease
1328 UNIT close to the end of the region as needed. If op0 is a REG
1329 or SUBREG of REG, don't do this, as there can't be data races
1330 on a register and we can expand shorter code in some cases. */
1331 if (bitregion_end
1332 && unit > BITS_PER_UNIT
1333 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1334 && !REG_P (op0)
1335 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1337 unit = unit / 2;
1338 continue;
1341 /* THISSIZE must not overrun a word boundary. Otherwise,
1342 store_fixed_bit_field will call us again, and we will mutually
1343 recurse forever. */
1344 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1345 thissize = MIN (thissize, unit - thispos);
1347 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1349 /* Fetch successively less significant portions. */
1350 if (CONST_INT_P (value))
1351 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1352 >> (bitsize - bitsdone - thissize))
1353 & ((HOST_WIDE_INT_1 << thissize) - 1));
1354 /* Likewise, but the source is little-endian. */
1355 else if (reverse)
1356 part = extract_fixed_bit_field (word_mode, value, thissize,
1357 bitsize - bitsdone - thissize,
1358 NULL_RTX, 1, false);
1359 else
1361 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1362 /* The args are chosen so that the last part includes the
1363 lsb. Give extract_bit_field the value it needs (with
1364 endianness compensation) to fetch the piece we want. */
1365 part = extract_fixed_bit_field (word_mode, value, thissize,
1366 total_bits - bitsize + bitsdone,
1367 NULL_RTX, 1, false);
1370 else
1372 /* Fetch successively more significant portions. */
1373 if (CONST_INT_P (value))
1374 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1375 >> bitsdone)
1376 & ((HOST_WIDE_INT_1 << thissize) - 1));
1377 /* Likewise, but the source is big-endian. */
1378 else if (reverse)
1379 part = extract_fixed_bit_field (word_mode, value, thissize,
1380 total_bits - bitsdone - thissize,
1381 NULL_RTX, 1, false);
1382 else
1383 part = extract_fixed_bit_field (word_mode, value, thissize,
1384 bitsdone, NULL_RTX, 1, false);
1387 /* If OP0 is a register, then handle OFFSET here. */
1388 if (SUBREG_P (op0) || REG_P (op0))
1390 machine_mode op0_mode = GET_MODE (op0);
1391 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1392 word = offset ? const0_rtx : op0;
1393 else
1394 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1395 GET_MODE (op0));
1396 offset &= BITS_PER_WORD / unit - 1;
1398 else
1399 word = op0;
1401 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1402 it is just an out-of-bounds access. Ignore it. */
1403 if (word != const0_rtx)
1404 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1405 bitregion_start, bitregion_end, part,
1406 reverse);
1407 bitsdone += thissize;
1411 /* A subroutine of extract_bit_field_1 that converts return value X
1412 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1413 to extract_bit_field. */
1415 static rtx
1416 convert_extracted_bit_field (rtx x, machine_mode mode,
1417 machine_mode tmode, bool unsignedp)
1419 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1420 return x;
1422 /* If the x mode is not a scalar integral, first convert to the
1423 integer mode of that size and then access it as a floating-point
1424 value via a SUBREG. */
1425 if (!SCALAR_INT_MODE_P (tmode))
1427 machine_mode smode;
1429 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1430 x = convert_to_mode (smode, x, unsignedp);
1431 x = force_reg (smode, x);
1432 return gen_lowpart (tmode, x);
1435 return convert_to_mode (tmode, x, unsignedp);
1438 /* Try to use an ext(z)v pattern to extract a field from OP0.
1439 Return the extracted value on success, otherwise return null.
1440 EXT_MODE is the mode of the extraction and the other arguments
1441 are as for extract_bit_field. */
1443 static rtx
1444 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1445 unsigned HOST_WIDE_INT bitsize,
1446 unsigned HOST_WIDE_INT bitnum,
1447 int unsignedp, rtx target,
1448 machine_mode mode, machine_mode tmode)
1450 struct expand_operand ops[4];
1451 rtx spec_target = target;
1452 rtx spec_target_subreg = 0;
1453 machine_mode ext_mode = extv->field_mode;
1454 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1456 if (bitsize == 0 || unit < bitsize)
1457 return NULL_RTX;
1459 if (MEM_P (op0))
1460 /* Get a reference to the first byte of the field. */
1461 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1462 &bitnum);
1463 else
1465 /* Convert from counting within OP0 to counting in EXT_MODE. */
1466 if (BYTES_BIG_ENDIAN)
1467 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1469 /* If op0 is a register, we need it in EXT_MODE to make it
1470 acceptable to the format of ext(z)v. */
1471 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1472 return NULL_RTX;
1473 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1474 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1477 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1478 "backwards" from the size of the unit we are extracting from.
1479 Otherwise, we count bits from the most significant on a
1480 BYTES/BITS_BIG_ENDIAN machine. */
1482 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1483 bitnum = unit - bitsize - bitnum;
1485 if (target == 0)
1486 target = spec_target = gen_reg_rtx (tmode);
1488 if (GET_MODE (target) != ext_mode)
1490 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1491 between the mode of the extraction (word_mode) and the target
1492 mode. Instead, create a temporary and use convert_move to set
1493 the target. */
1494 if (REG_P (target)
1495 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1497 target = gen_lowpart (ext_mode, target);
1498 if (GET_MODE_PRECISION (ext_mode)
1499 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1500 spec_target_subreg = target;
1502 else
1503 target = gen_reg_rtx (ext_mode);
1506 create_output_operand (&ops[0], target, ext_mode);
1507 create_fixed_operand (&ops[1], op0);
1508 create_integer_operand (&ops[2], bitsize);
1509 create_integer_operand (&ops[3], bitnum);
1510 if (maybe_expand_insn (extv->icode, 4, ops))
1512 target = ops[0].value;
1513 if (target == spec_target)
1514 return target;
1515 if (target == spec_target_subreg)
1516 return spec_target;
1517 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1519 return NULL_RTX;
1522 /* A subroutine of extract_bit_field, with the same arguments.
1523 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1524 if we can find no other means of implementing the operation.
1525 if FALLBACK_P is false, return NULL instead. */
1527 static rtx
1528 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1529 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1530 machine_mode mode, machine_mode tmode,
1531 bool reverse, bool fallback_p)
1533 rtx op0 = str_rtx;
1534 machine_mode int_mode;
1535 machine_mode mode1;
1537 if (tmode == VOIDmode)
1538 tmode = mode;
1540 while (GET_CODE (op0) == SUBREG)
1542 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1543 op0 = SUBREG_REG (op0);
1546 /* If we have an out-of-bounds access to a register, just return an
1547 uninitialized register of the required mode. This can occur if the
1548 source code contains an out-of-bounds access to a small array. */
1549 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1550 return gen_reg_rtx (tmode);
1552 if (REG_P (op0)
1553 && mode == GET_MODE (op0)
1554 && bitnum == 0
1555 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1557 if (reverse)
1558 op0 = flip_storage_order (mode, op0);
1559 /* We're trying to extract a full register from itself. */
1560 return op0;
1563 /* See if we can get a better vector mode before extracting. */
1564 if (VECTOR_MODE_P (GET_MODE (op0))
1565 && !MEM_P (op0)
1566 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1568 machine_mode new_mode;
1570 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1571 new_mode = MIN_MODE_VECTOR_FLOAT;
1572 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1573 new_mode = MIN_MODE_VECTOR_FRACT;
1574 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1575 new_mode = MIN_MODE_VECTOR_UFRACT;
1576 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1577 new_mode = MIN_MODE_VECTOR_ACCUM;
1578 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1579 new_mode = MIN_MODE_VECTOR_UACCUM;
1580 else
1581 new_mode = MIN_MODE_VECTOR_INT;
1583 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1584 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1585 && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1586 && targetm.vector_mode_supported_p (new_mode))
1587 break;
1588 if (new_mode != VOIDmode)
1589 op0 = gen_lowpart (new_mode, op0);
1592 /* Use vec_extract patterns for extracting parts of vectors whenever
1593 available. */
1594 if (VECTOR_MODE_P (GET_MODE (op0))
1595 && !MEM_P (op0)
1596 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1597 && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1598 == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1600 struct expand_operand ops[3];
1601 machine_mode outermode = GET_MODE (op0);
1602 machine_mode innermode = GET_MODE_INNER (outermode);
1603 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1604 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1606 create_output_operand (&ops[0], target, innermode);
1607 create_input_operand (&ops[1], op0, outermode);
1608 create_integer_operand (&ops[2], pos);
1609 if (maybe_expand_insn (icode, 3, ops))
1611 target = ops[0].value;
1612 if (GET_MODE (target) != mode)
1613 return gen_lowpart (tmode, target);
1614 return target;
1618 /* Make sure we are playing with integral modes. Pun with subregs
1619 if we aren't. */
1621 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1622 if (imode != GET_MODE (op0))
1624 if (MEM_P (op0))
1625 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1626 else if (imode != BLKmode)
1628 op0 = gen_lowpart (imode, op0);
1630 /* If we got a SUBREG, force it into a register since we
1631 aren't going to be able to do another SUBREG on it. */
1632 if (GET_CODE (op0) == SUBREG)
1633 op0 = force_reg (imode, op0);
1635 else
1637 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1638 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1639 emit_move_insn (mem, op0);
1640 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1645 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1646 If that's wrong, the solution is to test for it and set TARGET to 0
1647 if needed. */
1649 /* Get the mode of the field to use for atomic access or subreg
1650 conversion. */
1651 mode1 = mode;
1652 if (SCALAR_INT_MODE_P (tmode))
1654 machine_mode try_mode = mode_for_size (bitsize,
1655 GET_MODE_CLASS (tmode), 0);
1656 if (try_mode != BLKmode)
1657 mode1 = try_mode;
1659 gcc_assert (mode1 != BLKmode);
1661 /* Extraction of a full MODE1 value can be done with a subreg as long
1662 as the least significant bit of the value is the least significant
1663 bit of either OP0 or a word of OP0. */
1664 if (!MEM_P (op0)
1665 && !reverse
1666 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1667 && bitsize == GET_MODE_BITSIZE (mode1)
1668 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1670 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1671 bitnum / BITS_PER_UNIT);
1672 if (sub)
1673 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1676 /* Extraction of a full MODE1 value can be done with a load as long as
1677 the field is on a byte boundary and is sufficiently aligned. */
1678 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1680 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1681 if (reverse)
1682 op0 = flip_storage_order (mode1, op0);
1683 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1686 /* Handle fields bigger than a word. */
1688 if (bitsize > BITS_PER_WORD)
1690 /* Here we transfer the words of the field
1691 in the order least significant first.
1692 This is because the most significant word is the one which may
1693 be less than full. */
1695 const bool backwards = WORDS_BIG_ENDIAN;
1696 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1697 unsigned int i;
1698 rtx_insn *last;
1700 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1701 target = gen_reg_rtx (mode);
1703 /* In case we're about to clobber a base register or something
1704 (see gcc.c-torture/execute/20040625-1.c). */
1705 if (reg_mentioned_p (target, str_rtx))
1706 target = gen_reg_rtx (mode);
1708 /* Indicate for flow that the entire target reg is being set. */
1709 emit_clobber (target);
1711 last = get_last_insn ();
1712 for (i = 0; i < nwords; i++)
1714 /* If I is 0, use the low-order word in both field and target;
1715 if I is 1, use the next to lowest word; and so on. */
1716 /* Word number in TARGET to use. */
1717 unsigned int wordnum
1718 = (backwards
1719 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1720 : i);
1721 /* Offset from start of field in OP0. */
1722 unsigned int bit_offset = (backwards ^ reverse
1723 ? MAX ((int) bitsize - ((int) i + 1)
1724 * BITS_PER_WORD,
1726 : (int) i * BITS_PER_WORD);
1727 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1728 rtx result_part
1729 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1730 bitsize - i * BITS_PER_WORD),
1731 bitnum + bit_offset, 1, target_part,
1732 mode, word_mode, reverse, fallback_p);
1734 gcc_assert (target_part);
1735 if (!result_part)
1737 delete_insns_since (last);
1738 return NULL;
1741 if (result_part != target_part)
1742 emit_move_insn (target_part, result_part);
1745 if (unsignedp)
1747 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1748 need to be zero'd out. */
1749 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1751 unsigned int i, total_words;
1753 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1754 for (i = nwords; i < total_words; i++)
1755 emit_move_insn
1756 (operand_subword (target,
1757 backwards ? total_words - i - 1 : i,
1758 1, VOIDmode),
1759 const0_rtx);
1761 return target;
1764 /* Signed bit field: sign-extend with two arithmetic shifts. */
1765 target = expand_shift (LSHIFT_EXPR, mode, target,
1766 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1767 return expand_shift (RSHIFT_EXPR, mode, target,
1768 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1771 /* If OP0 is a multi-word register, narrow it to the affected word.
1772 If the region spans two words, defer to extract_split_bit_field. */
1773 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1775 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1777 if (!fallback_p)
1778 return NULL_RTX;
1779 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1780 reverse);
1781 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1783 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1784 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1785 bitnum %= BITS_PER_WORD;
1788 /* From here on we know the desired field is smaller than a word.
1789 If OP0 is a register, it too fits within a word. */
1790 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1791 extraction_insn extv;
1792 if (!MEM_P (op0)
1793 && !reverse
1794 /* ??? We could limit the structure size to the part of OP0 that
1795 contains the field, with appropriate checks for endianness
1796 and TRULY_NOOP_TRUNCATION. */
1797 && get_best_reg_extraction_insn (&extv, pattern,
1798 GET_MODE_BITSIZE (GET_MODE (op0)),
1799 tmode))
1801 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1802 unsignedp, target, mode,
1803 tmode);
1804 if (result)
1805 return result;
1808 /* If OP0 is a memory, try copying it to a register and seeing if a
1809 cheap register alternative is available. */
1810 if (MEM_P (op0) & !reverse)
1812 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1813 tmode))
1815 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1816 bitnum, unsignedp,
1817 target, mode,
1818 tmode);
1819 if (result)
1820 return result;
1823 rtx_insn *last = get_last_insn ();
1825 /* Try loading part of OP0 into a register and extracting the
1826 bitfield from that. */
1827 unsigned HOST_WIDE_INT bitpos;
1828 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1829 0, 0, tmode, &bitpos);
1830 if (xop0)
1832 xop0 = copy_to_reg (xop0);
1833 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1834 unsignedp, target,
1835 mode, tmode, reverse, false);
1836 if (result)
1837 return result;
1838 delete_insns_since (last);
1842 if (!fallback_p)
1843 return NULL;
1845 /* Find a correspondingly-sized integer field, so we can apply
1846 shifts and masks to it. */
1847 int_mode = int_mode_for_mode (tmode);
1848 if (int_mode == BLKmode)
1849 int_mode = int_mode_for_mode (mode);
1850 /* Should probably push op0 out to memory and then do a load. */
1851 gcc_assert (int_mode != BLKmode);
1853 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1854 unsignedp, reverse);
1856 /* Complex values must be reversed piecewise, so we need to undo the global
1857 reversal, convert to the complex mode and reverse again. */
1858 if (reverse && COMPLEX_MODE_P (tmode))
1860 target = flip_storage_order (int_mode, target);
1861 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1862 target = flip_storage_order (tmode, target);
1864 else
1865 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1867 return target;
1870 /* Generate code to extract a byte-field from STR_RTX
1871 containing BITSIZE bits, starting at BITNUM,
1872 and put it in TARGET if possible (if TARGET is nonzero).
1873 Regardless of TARGET, we return the rtx for where the value is placed.
1875 STR_RTX is the structure containing the byte (a REG or MEM).
1876 UNSIGNEDP is nonzero if this is an unsigned bit field.
1877 MODE is the natural mode of the field value once extracted.
1878 TMODE is the mode the caller would like the value to have;
1879 but the value may be returned with type MODE instead.
1881 If REVERSE is true, the extraction is to be done in reverse order.
1883 If a TARGET is specified and we can store in it at no extra cost,
1884 we do so, and return TARGET.
1885 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1886 if they are equally easy. */
1889 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1890 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1891 machine_mode mode, machine_mode tmode, bool reverse)
1893 machine_mode mode1;
1895 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1896 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1897 mode1 = GET_MODE (str_rtx);
1898 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1899 mode1 = GET_MODE (target);
1900 else
1901 mode1 = tmode;
1903 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1905 /* Extraction of a full MODE1 value can be done with a simple load.
1906 We know here that the field can be accessed with one single
1907 instruction. For targets that support unaligned memory,
1908 an unaligned access may be necessary. */
1909 if (bitsize == GET_MODE_BITSIZE (mode1))
1911 rtx result = adjust_bitfield_address (str_rtx, mode1,
1912 bitnum / BITS_PER_UNIT);
1913 if (reverse)
1914 result = flip_storage_order (mode1, result);
1915 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1916 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1919 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1920 &bitnum);
1921 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1922 str_rtx = copy_to_reg (str_rtx);
1925 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1926 target, mode, tmode, reverse, true);
1929 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1930 from bit BITNUM of OP0.
1932 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1933 If REVERSE is true, the extraction is to be done in reverse order.
1935 If TARGET is nonzero, attempts to store the value there
1936 and return TARGET, but this is not guaranteed.
1937 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1939 static rtx
1940 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1941 unsigned HOST_WIDE_INT bitsize,
1942 unsigned HOST_WIDE_INT bitnum, rtx target,
1943 int unsignedp, bool reverse)
1945 if (MEM_P (op0))
1947 machine_mode mode
1948 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1949 MEM_VOLATILE_P (op0));
1951 if (mode == VOIDmode)
1952 /* The only way this should occur is if the field spans word
1953 boundaries. */
1954 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1955 reverse);
1957 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1960 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1961 target, unsignedp, reverse);
1964 /* Helper function for extract_fixed_bit_field, extracts
1965 the bit field always using the MODE of OP0. */
1967 static rtx
1968 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1969 unsigned HOST_WIDE_INT bitsize,
1970 unsigned HOST_WIDE_INT bitnum, rtx target,
1971 int unsignedp, bool reverse)
1973 machine_mode mode = GET_MODE (op0);
1974 gcc_assert (SCALAR_INT_MODE_P (mode));
1976 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1977 for invalid input, such as extract equivalent of f5 from
1978 gcc.dg/pr48335-2.c. */
1980 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1981 /* BITNUM is the distance between our msb and that of OP0.
1982 Convert it to the distance from the lsb. */
1983 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1985 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1986 We have reduced the big-endian case to the little-endian case. */
1987 if (reverse)
1988 op0 = flip_storage_order (mode, op0);
1990 if (unsignedp)
1992 if (bitnum)
1994 /* If the field does not already start at the lsb,
1995 shift it so it does. */
1996 /* Maybe propagate the target for the shift. */
1997 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1998 if (tmode != mode)
1999 subtarget = 0;
2000 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2002 /* Convert the value to the desired mode. */
2003 if (mode != tmode)
2004 op0 = convert_to_mode (tmode, op0, 1);
2006 /* Unless the msb of the field used to be the msb when we shifted,
2007 mask out the upper bits. */
2009 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2010 return expand_binop (GET_MODE (op0), and_optab, op0,
2011 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2012 target, 1, OPTAB_LIB_WIDEN);
2013 return op0;
2016 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2017 then arithmetic-shift its lsb to the lsb of the word. */
2018 op0 = force_reg (mode, op0);
2020 /* Find the narrowest integer mode that contains the field. */
2022 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2023 mode = GET_MODE_WIDER_MODE (mode))
2024 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2026 op0 = convert_to_mode (mode, op0, 0);
2027 break;
2030 if (mode != tmode)
2031 target = 0;
2033 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2035 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2036 /* Maybe propagate the target for the shift. */
2037 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2038 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2041 return expand_shift (RSHIFT_EXPR, mode, op0,
2042 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2045 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2046 VALUE << BITPOS. */
2048 static rtx
2049 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2050 int bitpos)
2052 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2055 /* Extract a bit field that is split across two words
2056 and return an RTX for the result.
2058 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2059 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2060 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2062 If REVERSE is true, the extraction is to be done in reverse order. */
2064 static rtx
2065 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2066 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2067 bool reverse)
2069 unsigned int unit;
2070 unsigned int bitsdone = 0;
2071 rtx result = NULL_RTX;
2072 int first = 1;
2074 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2075 much at a time. */
2076 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2077 unit = BITS_PER_WORD;
2078 else
2079 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2081 while (bitsdone < bitsize)
2083 unsigned HOST_WIDE_INT thissize;
2084 rtx part, word;
2085 unsigned HOST_WIDE_INT thispos;
2086 unsigned HOST_WIDE_INT offset;
2088 offset = (bitpos + bitsdone) / unit;
2089 thispos = (bitpos + bitsdone) % unit;
2091 /* THISSIZE must not overrun a word boundary. Otherwise,
2092 extract_fixed_bit_field will call us again, and we will mutually
2093 recurse forever. */
2094 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2095 thissize = MIN (thissize, unit - thispos);
2097 /* If OP0 is a register, then handle OFFSET here. */
2098 if (SUBREG_P (op0) || REG_P (op0))
2100 word = operand_subword_force (op0, offset, GET_MODE (op0));
2101 offset = 0;
2103 else
2104 word = op0;
2106 /* Extract the parts in bit-counting order,
2107 whose meaning is determined by BYTES_PER_UNIT.
2108 OFFSET is in UNITs, and UNIT is in bits. */
2109 part = extract_fixed_bit_field (word_mode, word, thissize,
2110 offset * unit + thispos, 0, 1, reverse);
2111 bitsdone += thissize;
2113 /* Shift this part into place for the result. */
2114 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2116 if (bitsize != bitsdone)
2117 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2118 bitsize - bitsdone, 0, 1);
2120 else
2122 if (bitsdone != thissize)
2123 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2124 bitsdone - thissize, 0, 1);
2127 if (first)
2128 result = part;
2129 else
2130 /* Combine the parts with bitwise or. This works
2131 because we extracted each part as an unsigned bit field. */
2132 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2133 OPTAB_LIB_WIDEN);
2135 first = 0;
2138 /* Unsigned bit field: we are done. */
2139 if (unsignedp)
2140 return result;
2141 /* Signed bit field: sign-extend with two arithmetic shifts. */
2142 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2143 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2144 return expand_shift (RSHIFT_EXPR, word_mode, result,
2145 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2148 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2149 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2150 MODE, fill the upper bits with zeros. Fail if the layout of either
2151 mode is unknown (as for CC modes) or if the extraction would involve
2152 unprofitable mode punning. Return the value on success, otherwise
2153 return null.
2155 This is different from gen_lowpart* in these respects:
2157 - the returned value must always be considered an rvalue
2159 - when MODE is wider than SRC_MODE, the extraction involves
2160 a zero extension
2162 - when MODE is smaller than SRC_MODE, the extraction involves
2163 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2165 In other words, this routine performs a computation, whereas the
2166 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2167 operations. */
2170 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2172 machine_mode int_mode, src_int_mode;
2174 if (mode == src_mode)
2175 return src;
2177 if (CONSTANT_P (src))
2179 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2180 fails, it will happily create (subreg (symbol_ref)) or similar
2181 invalid SUBREGs. */
2182 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2183 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2184 if (ret)
2185 return ret;
2187 if (GET_MODE (src) == VOIDmode
2188 || !validate_subreg (mode, src_mode, src, byte))
2189 return NULL_RTX;
2191 src = force_reg (GET_MODE (src), src);
2192 return gen_rtx_SUBREG (mode, src, byte);
2195 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2196 return NULL_RTX;
2198 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2199 && MODES_TIEABLE_P (mode, src_mode))
2201 rtx x = gen_lowpart_common (mode, src);
2202 if (x)
2203 return x;
2206 src_int_mode = int_mode_for_mode (src_mode);
2207 int_mode = int_mode_for_mode (mode);
2208 if (src_int_mode == BLKmode || int_mode == BLKmode)
2209 return NULL_RTX;
2211 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2212 return NULL_RTX;
2213 if (!MODES_TIEABLE_P (int_mode, mode))
2214 return NULL_RTX;
2216 src = gen_lowpart (src_int_mode, src);
2217 src = convert_modes (int_mode, src_int_mode, src, true);
2218 src = gen_lowpart (mode, src);
2219 return src;
2222 /* Add INC into TARGET. */
2224 void
2225 expand_inc (rtx target, rtx inc)
2227 rtx value = expand_binop (GET_MODE (target), add_optab,
2228 target, inc,
2229 target, 0, OPTAB_LIB_WIDEN);
2230 if (value != target)
2231 emit_move_insn (target, value);
2234 /* Subtract DEC from TARGET. */
2236 void
2237 expand_dec (rtx target, rtx dec)
2239 rtx value = expand_binop (GET_MODE (target), sub_optab,
2240 target, dec,
2241 target, 0, OPTAB_LIB_WIDEN);
2242 if (value != target)
2243 emit_move_insn (target, value);
2246 /* Output a shift instruction for expression code CODE,
2247 with SHIFTED being the rtx for the value to shift,
2248 and AMOUNT the rtx for the amount to shift by.
2249 Store the result in the rtx TARGET, if that is convenient.
2250 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2251 Return the rtx for where the value is.
2252 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2253 in which case 0 is returned. */
2255 static rtx
2256 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2257 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2259 rtx op1, temp = 0;
2260 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2261 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2262 optab lshift_optab = ashl_optab;
2263 optab rshift_arith_optab = ashr_optab;
2264 optab rshift_uns_optab = lshr_optab;
2265 optab lrotate_optab = rotl_optab;
2266 optab rrotate_optab = rotr_optab;
2267 machine_mode op1_mode;
2268 machine_mode scalar_mode = mode;
2269 int attempt;
2270 bool speed = optimize_insn_for_speed_p ();
2272 if (VECTOR_MODE_P (mode))
2273 scalar_mode = GET_MODE_INNER (mode);
2274 op1 = amount;
2275 op1_mode = GET_MODE (op1);
2277 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2278 shift amount is a vector, use the vector/vector shift patterns. */
2279 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2281 lshift_optab = vashl_optab;
2282 rshift_arith_optab = vashr_optab;
2283 rshift_uns_optab = vlshr_optab;
2284 lrotate_optab = vrotl_optab;
2285 rrotate_optab = vrotr_optab;
2288 /* Previously detected shift-counts computed by NEGATE_EXPR
2289 and shifted in the other direction; but that does not work
2290 on all machines. */
2292 if (SHIFT_COUNT_TRUNCATED)
2294 if (CONST_INT_P (op1)
2295 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2296 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2297 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2298 % GET_MODE_BITSIZE (scalar_mode));
2299 else if (GET_CODE (op1) == SUBREG
2300 && subreg_lowpart_p (op1)
2301 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2302 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2303 op1 = SUBREG_REG (op1);
2306 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2307 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2308 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2309 amount instead. */
2310 if (rotate
2311 && CONST_INT_P (op1)
2312 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2313 GET_MODE_BITSIZE (scalar_mode) - 1))
2315 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2316 left = !left;
2317 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2320 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2321 Note that this is not the case for bigger values. For instance a rotation
2322 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2323 0x04030201 (bswapsi). */
2324 if (rotate
2325 && CONST_INT_P (op1)
2326 && INTVAL (op1) == BITS_PER_UNIT
2327 && GET_MODE_SIZE (scalar_mode) == 2
2328 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2329 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2330 unsignedp);
2332 if (op1 == const0_rtx)
2333 return shifted;
2335 /* Check whether its cheaper to implement a left shift by a constant
2336 bit count by a sequence of additions. */
2337 if (code == LSHIFT_EXPR
2338 && CONST_INT_P (op1)
2339 && INTVAL (op1) > 0
2340 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2341 && INTVAL (op1) < MAX_BITS_PER_WORD
2342 && (shift_cost (speed, mode, INTVAL (op1))
2343 > INTVAL (op1) * add_cost (speed, mode))
2344 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2346 int i;
2347 for (i = 0; i < INTVAL (op1); i++)
2349 temp = force_reg (mode, shifted);
2350 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2351 unsignedp, OPTAB_LIB_WIDEN);
2353 return shifted;
2356 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2358 enum optab_methods methods;
2360 if (attempt == 0)
2361 methods = OPTAB_DIRECT;
2362 else if (attempt == 1)
2363 methods = OPTAB_WIDEN;
2364 else
2365 methods = OPTAB_LIB_WIDEN;
2367 if (rotate)
2369 /* Widening does not work for rotation. */
2370 if (methods == OPTAB_WIDEN)
2371 continue;
2372 else if (methods == OPTAB_LIB_WIDEN)
2374 /* If we have been unable to open-code this by a rotation,
2375 do it as the IOR of two shifts. I.e., to rotate A
2376 by N bits, compute
2377 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2378 where C is the bitsize of A.
2380 It is theoretically possible that the target machine might
2381 not be able to perform either shift and hence we would
2382 be making two libcalls rather than just the one for the
2383 shift (similarly if IOR could not be done). We will allow
2384 this extremely unlikely lossage to avoid complicating the
2385 code below. */
2387 rtx subtarget = target == shifted ? 0 : target;
2388 rtx new_amount, other_amount;
2389 rtx temp1;
2391 new_amount = op1;
2392 if (op1 == const0_rtx)
2393 return shifted;
2394 else if (CONST_INT_P (op1))
2395 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2396 - INTVAL (op1));
2397 else
2399 other_amount
2400 = simplify_gen_unary (NEG, GET_MODE (op1),
2401 op1, GET_MODE (op1));
2402 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2403 other_amount
2404 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2405 gen_int_mode (mask, GET_MODE (op1)));
2408 shifted = force_reg (mode, shifted);
2410 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2411 mode, shifted, new_amount, 0, 1);
2412 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2413 mode, shifted, other_amount,
2414 subtarget, 1);
2415 return expand_binop (mode, ior_optab, temp, temp1, target,
2416 unsignedp, methods);
2419 temp = expand_binop (mode,
2420 left ? lrotate_optab : rrotate_optab,
2421 shifted, op1, target, unsignedp, methods);
2423 else if (unsignedp)
2424 temp = expand_binop (mode,
2425 left ? lshift_optab : rshift_uns_optab,
2426 shifted, op1, target, unsignedp, methods);
2428 /* Do arithmetic shifts.
2429 Also, if we are going to widen the operand, we can just as well
2430 use an arithmetic right-shift instead of a logical one. */
2431 if (temp == 0 && ! rotate
2432 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2434 enum optab_methods methods1 = methods;
2436 /* If trying to widen a log shift to an arithmetic shift,
2437 don't accept an arithmetic shift of the same size. */
2438 if (unsignedp)
2439 methods1 = OPTAB_MUST_WIDEN;
2441 /* Arithmetic shift */
2443 temp = expand_binop (mode,
2444 left ? lshift_optab : rshift_arith_optab,
2445 shifted, op1, target, unsignedp, methods1);
2448 /* We used to try extzv here for logical right shifts, but that was
2449 only useful for one machine, the VAX, and caused poor code
2450 generation there for lshrdi3, so the code was deleted and a
2451 define_expand for lshrsi3 was added to vax.md. */
2454 gcc_assert (temp != NULL_RTX || may_fail);
2455 return temp;
2458 /* Output a shift instruction for expression code CODE,
2459 with SHIFTED being the rtx for the value to shift,
2460 and AMOUNT the amount to shift by.
2461 Store the result in the rtx TARGET, if that is convenient.
2462 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2463 Return the rtx for where the value is. */
2466 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2467 int amount, rtx target, int unsignedp)
2469 return expand_shift_1 (code, mode,
2470 shifted, GEN_INT (amount), target, unsignedp);
2473 /* Likewise, but return 0 if that cannot be done. */
2475 static rtx
2476 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2477 int amount, rtx target, int unsignedp)
2479 return expand_shift_1 (code, mode,
2480 shifted, GEN_INT (amount), target, unsignedp, true);
2483 /* Output a shift instruction for expression code CODE,
2484 with SHIFTED being the rtx for the value to shift,
2485 and AMOUNT the tree for the amount to shift by.
2486 Store the result in the rtx TARGET, if that is convenient.
2487 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2488 Return the rtx for where the value is. */
2491 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2492 tree amount, rtx target, int unsignedp)
2494 return expand_shift_1 (code, mode,
2495 shifted, expand_normal (amount), target, unsignedp);
2499 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2500 const struct mult_cost *, machine_mode mode);
2501 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2502 const struct algorithm *, enum mult_variant);
2503 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2504 static rtx extract_high_half (machine_mode, rtx);
2505 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2506 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2507 int, int);
2508 /* Compute and return the best algorithm for multiplying by T.
2509 The algorithm must cost less than cost_limit
2510 If retval.cost >= COST_LIMIT, no algorithm was found and all
2511 other field of the returned struct are undefined.
2512 MODE is the machine mode of the multiplication. */
2514 static void
2515 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2516 const struct mult_cost *cost_limit, machine_mode mode)
2518 int m;
2519 struct algorithm *alg_in, *best_alg;
2520 struct mult_cost best_cost;
2521 struct mult_cost new_limit;
2522 int op_cost, op_latency;
2523 unsigned HOST_WIDE_INT orig_t = t;
2524 unsigned HOST_WIDE_INT q;
2525 int maxm, hash_index;
2526 bool cache_hit = false;
2527 enum alg_code cache_alg = alg_zero;
2528 bool speed = optimize_insn_for_speed_p ();
2529 machine_mode imode;
2530 struct alg_hash_entry *entry_ptr;
2532 /* Indicate that no algorithm is yet found. If no algorithm
2533 is found, this value will be returned and indicate failure. */
2534 alg_out->cost.cost = cost_limit->cost + 1;
2535 alg_out->cost.latency = cost_limit->latency + 1;
2537 if (cost_limit->cost < 0
2538 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2539 return;
2541 /* Be prepared for vector modes. */
2542 imode = GET_MODE_INNER (mode);
2544 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2546 /* Restrict the bits of "t" to the multiplication's mode. */
2547 t &= GET_MODE_MASK (imode);
2549 /* t == 1 can be done in zero cost. */
2550 if (t == 1)
2552 alg_out->ops = 1;
2553 alg_out->cost.cost = 0;
2554 alg_out->cost.latency = 0;
2555 alg_out->op[0] = alg_m;
2556 return;
2559 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2560 fail now. */
2561 if (t == 0)
2563 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2564 return;
2565 else
2567 alg_out->ops = 1;
2568 alg_out->cost.cost = zero_cost (speed);
2569 alg_out->cost.latency = zero_cost (speed);
2570 alg_out->op[0] = alg_zero;
2571 return;
2575 /* We'll be needing a couple extra algorithm structures now. */
2577 alg_in = XALLOCA (struct algorithm);
2578 best_alg = XALLOCA (struct algorithm);
2579 best_cost = *cost_limit;
2581 /* Compute the hash index. */
2582 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2584 /* See if we already know what to do for T. */
2585 entry_ptr = alg_hash_entry_ptr (hash_index);
2586 if (entry_ptr->t == t
2587 && entry_ptr->mode == mode
2588 && entry_ptr->speed == speed
2589 && entry_ptr->alg != alg_unknown)
2591 cache_alg = entry_ptr->alg;
2593 if (cache_alg == alg_impossible)
2595 /* The cache tells us that it's impossible to synthesize
2596 multiplication by T within entry_ptr->cost. */
2597 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2598 /* COST_LIMIT is at least as restrictive as the one
2599 recorded in the hash table, in which case we have no
2600 hope of synthesizing a multiplication. Just
2601 return. */
2602 return;
2604 /* If we get here, COST_LIMIT is less restrictive than the
2605 one recorded in the hash table, so we may be able to
2606 synthesize a multiplication. Proceed as if we didn't
2607 have the cache entry. */
2609 else
2611 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2612 /* The cached algorithm shows that this multiplication
2613 requires more cost than COST_LIMIT. Just return. This
2614 way, we don't clobber this cache entry with
2615 alg_impossible but retain useful information. */
2616 return;
2618 cache_hit = true;
2620 switch (cache_alg)
2622 case alg_shift:
2623 goto do_alg_shift;
2625 case alg_add_t_m2:
2626 case alg_sub_t_m2:
2627 goto do_alg_addsub_t_m2;
2629 case alg_add_factor:
2630 case alg_sub_factor:
2631 goto do_alg_addsub_factor;
2633 case alg_add_t2_m:
2634 goto do_alg_add_t2_m;
2636 case alg_sub_t2_m:
2637 goto do_alg_sub_t2_m;
2639 default:
2640 gcc_unreachable ();
2645 /* If we have a group of zero bits at the low-order part of T, try
2646 multiplying by the remaining bits and then doing a shift. */
2648 if ((t & 1) == 0)
2650 do_alg_shift:
2651 m = ctz_or_zero (t); /* m = number of low zero bits */
2652 if (m < maxm)
2654 q = t >> m;
2655 /* The function expand_shift will choose between a shift and
2656 a sequence of additions, so the observed cost is given as
2657 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2658 op_cost = m * add_cost (speed, mode);
2659 if (shift_cost (speed, mode, m) < op_cost)
2660 op_cost = shift_cost (speed, mode, m);
2661 new_limit.cost = best_cost.cost - op_cost;
2662 new_limit.latency = best_cost.latency - op_cost;
2663 synth_mult (alg_in, q, &new_limit, mode);
2665 alg_in->cost.cost += op_cost;
2666 alg_in->cost.latency += op_cost;
2667 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2669 best_cost = alg_in->cost;
2670 std::swap (alg_in, best_alg);
2671 best_alg->log[best_alg->ops] = m;
2672 best_alg->op[best_alg->ops] = alg_shift;
2675 /* See if treating ORIG_T as a signed number yields a better
2676 sequence. Try this sequence only for a negative ORIG_T
2677 as it would be useless for a non-negative ORIG_T. */
2678 if ((HOST_WIDE_INT) orig_t < 0)
2680 /* Shift ORIG_T as follows because a right shift of a
2681 negative-valued signed type is implementation
2682 defined. */
2683 q = ~(~orig_t >> m);
2684 /* The function expand_shift will choose between a shift
2685 and a sequence of additions, so the observed cost is
2686 given as MIN (m * add_cost(speed, mode),
2687 shift_cost(speed, mode, m)). */
2688 op_cost = m * add_cost (speed, mode);
2689 if (shift_cost (speed, mode, m) < op_cost)
2690 op_cost = shift_cost (speed, mode, m);
2691 new_limit.cost = best_cost.cost - op_cost;
2692 new_limit.latency = best_cost.latency - op_cost;
2693 synth_mult (alg_in, q, &new_limit, mode);
2695 alg_in->cost.cost += op_cost;
2696 alg_in->cost.latency += op_cost;
2697 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2699 best_cost = alg_in->cost;
2700 std::swap (alg_in, best_alg);
2701 best_alg->log[best_alg->ops] = m;
2702 best_alg->op[best_alg->ops] = alg_shift;
2706 if (cache_hit)
2707 goto done;
2710 /* If we have an odd number, add or subtract one. */
2711 if ((t & 1) != 0)
2713 unsigned HOST_WIDE_INT w;
2715 do_alg_addsub_t_m2:
2716 for (w = 1; (w & t) != 0; w <<= 1)
2718 /* If T was -1, then W will be zero after the loop. This is another
2719 case where T ends with ...111. Handling this with (T + 1) and
2720 subtract 1 produces slightly better code and results in algorithm
2721 selection much faster than treating it like the ...0111 case
2722 below. */
2723 if (w == 0
2724 || (w > 2
2725 /* Reject the case where t is 3.
2726 Thus we prefer addition in that case. */
2727 && t != 3))
2729 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2731 op_cost = add_cost (speed, mode);
2732 new_limit.cost = best_cost.cost - op_cost;
2733 new_limit.latency = best_cost.latency - op_cost;
2734 synth_mult (alg_in, t + 1, &new_limit, mode);
2736 alg_in->cost.cost += op_cost;
2737 alg_in->cost.latency += op_cost;
2738 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2740 best_cost = alg_in->cost;
2741 std::swap (alg_in, best_alg);
2742 best_alg->log[best_alg->ops] = 0;
2743 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2746 else
2748 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2750 op_cost = add_cost (speed, mode);
2751 new_limit.cost = best_cost.cost - op_cost;
2752 new_limit.latency = best_cost.latency - op_cost;
2753 synth_mult (alg_in, t - 1, &new_limit, mode);
2755 alg_in->cost.cost += op_cost;
2756 alg_in->cost.latency += op_cost;
2757 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2759 best_cost = alg_in->cost;
2760 std::swap (alg_in, best_alg);
2761 best_alg->log[best_alg->ops] = 0;
2762 best_alg->op[best_alg->ops] = alg_add_t_m2;
2766 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2767 quickly with a - a * n for some appropriate constant n. */
2768 m = exact_log2 (-orig_t + 1);
2769 if (m >= 0 && m < maxm)
2771 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2772 /* If the target has a cheap shift-and-subtract insn use
2773 that in preference to a shift insn followed by a sub insn.
2774 Assume that the shift-and-sub is "atomic" with a latency
2775 equal to it's cost, otherwise assume that on superscalar
2776 hardware the shift may be executed concurrently with the
2777 earlier steps in the algorithm. */
2778 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2780 op_cost = shiftsub1_cost (speed, mode, m);
2781 op_latency = op_cost;
2783 else
2784 op_latency = add_cost (speed, mode);
2786 new_limit.cost = best_cost.cost - op_cost;
2787 new_limit.latency = best_cost.latency - op_latency;
2788 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2789 &new_limit, mode);
2791 alg_in->cost.cost += op_cost;
2792 alg_in->cost.latency += op_latency;
2793 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2795 best_cost = alg_in->cost;
2796 std::swap (alg_in, best_alg);
2797 best_alg->log[best_alg->ops] = m;
2798 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2802 if (cache_hit)
2803 goto done;
2806 /* Look for factors of t of the form
2807 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2808 If we find such a factor, we can multiply by t using an algorithm that
2809 multiplies by q, shift the result by m and add/subtract it to itself.
2811 We search for large factors first and loop down, even if large factors
2812 are less probable than small; if we find a large factor we will find a
2813 good sequence quickly, and therefore be able to prune (by decreasing
2814 COST_LIMIT) the search. */
2816 do_alg_addsub_factor:
2817 for (m = floor_log2 (t - 1); m >= 2; m--)
2819 unsigned HOST_WIDE_INT d;
2821 d = (HOST_WIDE_INT_1U << m) + 1;
2822 if (t % d == 0 && t > d && m < maxm
2823 && (!cache_hit || cache_alg == alg_add_factor))
2825 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2826 if (shiftadd_cost (speed, mode, m) <= op_cost)
2827 op_cost = shiftadd_cost (speed, mode, m);
2829 op_latency = op_cost;
2832 new_limit.cost = best_cost.cost - op_cost;
2833 new_limit.latency = best_cost.latency - op_latency;
2834 synth_mult (alg_in, t / d, &new_limit, mode);
2836 alg_in->cost.cost += op_cost;
2837 alg_in->cost.latency += op_latency;
2838 if (alg_in->cost.latency < op_cost)
2839 alg_in->cost.latency = op_cost;
2840 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2842 best_cost = alg_in->cost;
2843 std::swap (alg_in, best_alg);
2844 best_alg->log[best_alg->ops] = m;
2845 best_alg->op[best_alg->ops] = alg_add_factor;
2847 /* Other factors will have been taken care of in the recursion. */
2848 break;
2851 d = (HOST_WIDE_INT_1U << m) - 1;
2852 if (t % d == 0 && t > d && m < maxm
2853 && (!cache_hit || cache_alg == alg_sub_factor))
2855 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2856 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2857 op_cost = shiftsub0_cost (speed, mode, m);
2859 op_latency = op_cost;
2861 new_limit.cost = best_cost.cost - op_cost;
2862 new_limit.latency = best_cost.latency - op_latency;
2863 synth_mult (alg_in, t / d, &new_limit, mode);
2865 alg_in->cost.cost += op_cost;
2866 alg_in->cost.latency += op_latency;
2867 if (alg_in->cost.latency < op_cost)
2868 alg_in->cost.latency = op_cost;
2869 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2871 best_cost = alg_in->cost;
2872 std::swap (alg_in, best_alg);
2873 best_alg->log[best_alg->ops] = m;
2874 best_alg->op[best_alg->ops] = alg_sub_factor;
2876 break;
2879 if (cache_hit)
2880 goto done;
2882 /* Try shift-and-add (load effective address) instructions,
2883 i.e. do a*3, a*5, a*9. */
2884 if ((t & 1) != 0)
2886 do_alg_add_t2_m:
2887 q = t - 1;
2888 m = ctz_hwi (q);
2889 if (q && m < maxm)
2891 op_cost = shiftadd_cost (speed, mode, m);
2892 new_limit.cost = best_cost.cost - op_cost;
2893 new_limit.latency = best_cost.latency - op_cost;
2894 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2896 alg_in->cost.cost += op_cost;
2897 alg_in->cost.latency += op_cost;
2898 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2900 best_cost = alg_in->cost;
2901 std::swap (alg_in, best_alg);
2902 best_alg->log[best_alg->ops] = m;
2903 best_alg->op[best_alg->ops] = alg_add_t2_m;
2906 if (cache_hit)
2907 goto done;
2909 do_alg_sub_t2_m:
2910 q = t + 1;
2911 m = ctz_hwi (q);
2912 if (q && m < maxm)
2914 op_cost = shiftsub0_cost (speed, mode, m);
2915 new_limit.cost = best_cost.cost - op_cost;
2916 new_limit.latency = best_cost.latency - op_cost;
2917 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2919 alg_in->cost.cost += op_cost;
2920 alg_in->cost.latency += op_cost;
2921 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2923 best_cost = alg_in->cost;
2924 std::swap (alg_in, best_alg);
2925 best_alg->log[best_alg->ops] = m;
2926 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2929 if (cache_hit)
2930 goto done;
2933 done:
2934 /* If best_cost has not decreased, we have not found any algorithm. */
2935 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2937 /* We failed to find an algorithm. Record alg_impossible for
2938 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2939 we are asked to find an algorithm for T within the same or
2940 lower COST_LIMIT, we can immediately return to the
2941 caller. */
2942 entry_ptr->t = t;
2943 entry_ptr->mode = mode;
2944 entry_ptr->speed = speed;
2945 entry_ptr->alg = alg_impossible;
2946 entry_ptr->cost = *cost_limit;
2947 return;
2950 /* Cache the result. */
2951 if (!cache_hit)
2953 entry_ptr->t = t;
2954 entry_ptr->mode = mode;
2955 entry_ptr->speed = speed;
2956 entry_ptr->alg = best_alg->op[best_alg->ops];
2957 entry_ptr->cost.cost = best_cost.cost;
2958 entry_ptr->cost.latency = best_cost.latency;
2961 /* If we are getting a too long sequence for `struct algorithm'
2962 to record, make this search fail. */
2963 if (best_alg->ops == MAX_BITS_PER_WORD)
2964 return;
2966 /* Copy the algorithm from temporary space to the space at alg_out.
2967 We avoid using structure assignment because the majority of
2968 best_alg is normally undefined, and this is a critical function. */
2969 alg_out->ops = best_alg->ops + 1;
2970 alg_out->cost = best_cost;
2971 memcpy (alg_out->op, best_alg->op,
2972 alg_out->ops * sizeof *alg_out->op);
2973 memcpy (alg_out->log, best_alg->log,
2974 alg_out->ops * sizeof *alg_out->log);
2977 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2978 Try three variations:
2980 - a shift/add sequence based on VAL itself
2981 - a shift/add sequence based on -VAL, followed by a negation
2982 - a shift/add sequence based on VAL - 1, followed by an addition.
2984 Return true if the cheapest of these cost less than MULT_COST,
2985 describing the algorithm in *ALG and final fixup in *VARIANT. */
2987 bool
2988 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2989 struct algorithm *alg, enum mult_variant *variant,
2990 int mult_cost)
2992 struct algorithm alg2;
2993 struct mult_cost limit;
2994 int op_cost;
2995 bool speed = optimize_insn_for_speed_p ();
2997 /* Fail quickly for impossible bounds. */
2998 if (mult_cost < 0)
2999 return false;
3001 /* Ensure that mult_cost provides a reasonable upper bound.
3002 Any constant multiplication can be performed with less
3003 than 2 * bits additions. */
3004 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3005 if (mult_cost > op_cost)
3006 mult_cost = op_cost;
3008 *variant = basic_variant;
3009 limit.cost = mult_cost;
3010 limit.latency = mult_cost;
3011 synth_mult (alg, val, &limit, mode);
3013 /* This works only if the inverted value actually fits in an
3014 `unsigned int' */
3015 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3017 op_cost = neg_cost (speed, mode);
3018 if (MULT_COST_LESS (&alg->cost, mult_cost))
3020 limit.cost = alg->cost.cost - op_cost;
3021 limit.latency = alg->cost.latency - op_cost;
3023 else
3025 limit.cost = mult_cost - op_cost;
3026 limit.latency = mult_cost - op_cost;
3029 synth_mult (&alg2, -val, &limit, mode);
3030 alg2.cost.cost += op_cost;
3031 alg2.cost.latency += op_cost;
3032 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3033 *alg = alg2, *variant = negate_variant;
3036 /* This proves very useful for division-by-constant. */
3037 op_cost = add_cost (speed, mode);
3038 if (MULT_COST_LESS (&alg->cost, mult_cost))
3040 limit.cost = alg->cost.cost - op_cost;
3041 limit.latency = alg->cost.latency - op_cost;
3043 else
3045 limit.cost = mult_cost - op_cost;
3046 limit.latency = mult_cost - op_cost;
3049 synth_mult (&alg2, val - 1, &limit, mode);
3050 alg2.cost.cost += op_cost;
3051 alg2.cost.latency += op_cost;
3052 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3053 *alg = alg2, *variant = add_variant;
3055 return MULT_COST_LESS (&alg->cost, mult_cost);
3058 /* A subroutine of expand_mult, used for constant multiplications.
3059 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3060 convenient. Use the shift/add sequence described by ALG and apply
3061 the final fixup specified by VARIANT. */
3063 static rtx
3064 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3065 rtx target, const struct algorithm *alg,
3066 enum mult_variant variant)
3068 unsigned HOST_WIDE_INT val_so_far;
3069 rtx_insn *insn;
3070 rtx accum, tem;
3071 int opno;
3072 machine_mode nmode;
3074 /* Avoid referencing memory over and over and invalid sharing
3075 on SUBREGs. */
3076 op0 = force_reg (mode, op0);
3078 /* ACCUM starts out either as OP0 or as a zero, depending on
3079 the first operation. */
3081 if (alg->op[0] == alg_zero)
3083 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3084 val_so_far = 0;
3086 else if (alg->op[0] == alg_m)
3088 accum = copy_to_mode_reg (mode, op0);
3089 val_so_far = 1;
3091 else
3092 gcc_unreachable ();
3094 for (opno = 1; opno < alg->ops; opno++)
3096 int log = alg->log[opno];
3097 rtx shift_subtarget = optimize ? 0 : accum;
3098 rtx add_target
3099 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3100 && !optimize)
3101 ? target : 0;
3102 rtx accum_target = optimize ? 0 : accum;
3103 rtx accum_inner;
3105 switch (alg->op[opno])
3107 case alg_shift:
3108 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3109 /* REG_EQUAL note will be attached to the following insn. */
3110 emit_move_insn (accum, tem);
3111 val_so_far <<= log;
3112 break;
3114 case alg_add_t_m2:
3115 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3116 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3117 add_target ? add_target : accum_target);
3118 val_so_far += HOST_WIDE_INT_1U << log;
3119 break;
3121 case alg_sub_t_m2:
3122 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3123 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3124 add_target ? add_target : accum_target);
3125 val_so_far -= HOST_WIDE_INT_1U << log;
3126 break;
3128 case alg_add_t2_m:
3129 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3130 log, shift_subtarget, 0);
3131 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3132 add_target ? add_target : accum_target);
3133 val_so_far = (val_so_far << log) + 1;
3134 break;
3136 case alg_sub_t2_m:
3137 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3138 log, shift_subtarget, 0);
3139 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3140 add_target ? add_target : accum_target);
3141 val_so_far = (val_so_far << log) - 1;
3142 break;
3144 case alg_add_factor:
3145 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3146 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3147 add_target ? add_target : accum_target);
3148 val_so_far += val_so_far << log;
3149 break;
3151 case alg_sub_factor:
3152 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3153 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3154 (add_target
3155 ? add_target : (optimize ? 0 : tem)));
3156 val_so_far = (val_so_far << log) - val_so_far;
3157 break;
3159 default:
3160 gcc_unreachable ();
3163 if (SCALAR_INT_MODE_P (mode))
3165 /* Write a REG_EQUAL note on the last insn so that we can cse
3166 multiplication sequences. Note that if ACCUM is a SUBREG,
3167 we've set the inner register and must properly indicate that. */
3168 tem = op0, nmode = mode;
3169 accum_inner = accum;
3170 if (GET_CODE (accum) == SUBREG)
3172 accum_inner = SUBREG_REG (accum);
3173 nmode = GET_MODE (accum_inner);
3174 tem = gen_lowpart (nmode, op0);
3177 insn = get_last_insn ();
3178 set_dst_reg_note (insn, REG_EQUAL,
3179 gen_rtx_MULT (nmode, tem,
3180 gen_int_mode (val_so_far, nmode)),
3181 accum_inner);
3185 if (variant == negate_variant)
3187 val_so_far = -val_so_far;
3188 accum = expand_unop (mode, neg_optab, accum, target, 0);
3190 else if (variant == add_variant)
3192 val_so_far = val_so_far + 1;
3193 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3196 /* Compare only the bits of val and val_so_far that are significant
3197 in the result mode, to avoid sign-/zero-extension confusion. */
3198 nmode = GET_MODE_INNER (mode);
3199 val &= GET_MODE_MASK (nmode);
3200 val_so_far &= GET_MODE_MASK (nmode);
3201 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3203 return accum;
3206 /* Perform a multiplication and return an rtx for the result.
3207 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3208 TARGET is a suggestion for where to store the result (an rtx).
3210 We check specially for a constant integer as OP1.
3211 If you want this check for OP0 as well, then before calling
3212 you should swap the two operands if OP0 would be constant. */
3215 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3216 int unsignedp)
3218 enum mult_variant variant;
3219 struct algorithm algorithm;
3220 rtx scalar_op1;
3221 int max_cost;
3222 bool speed = optimize_insn_for_speed_p ();
3223 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3225 if (CONSTANT_P (op0))
3226 std::swap (op0, op1);
3228 /* For vectors, there are several simplifications that can be made if
3229 all elements of the vector constant are identical. */
3230 scalar_op1 = unwrap_const_vec_duplicate (op1);
3232 if (INTEGRAL_MODE_P (mode))
3234 rtx fake_reg;
3235 HOST_WIDE_INT coeff;
3236 bool is_neg;
3237 int mode_bitsize;
3239 if (op1 == CONST0_RTX (mode))
3240 return op1;
3241 if (op1 == CONST1_RTX (mode))
3242 return op0;
3243 if (op1 == CONSTM1_RTX (mode))
3244 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3245 op0, target, 0);
3247 if (do_trapv)
3248 goto skip_synth;
3250 /* If mode is integer vector mode, check if the backend supports
3251 vector lshift (by scalar or vector) at all. If not, we can't use
3252 synthetized multiply. */
3253 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3254 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3255 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3256 goto skip_synth;
3258 /* These are the operations that are potentially turned into
3259 a sequence of shifts and additions. */
3260 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3262 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3263 less than or equal in size to `unsigned int' this doesn't matter.
3264 If the mode is larger than `unsigned int', then synth_mult works
3265 only if the constant value exactly fits in an `unsigned int' without
3266 any truncation. This means that multiplying by negative values does
3267 not work; results are off by 2^32 on a 32 bit machine. */
3268 if (CONST_INT_P (scalar_op1))
3270 coeff = INTVAL (scalar_op1);
3271 is_neg = coeff < 0;
3273 #if TARGET_SUPPORTS_WIDE_INT
3274 else if (CONST_WIDE_INT_P (scalar_op1))
3275 #else
3276 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3277 #endif
3279 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3280 /* Perfect power of 2 (other than 1, which is handled above). */
3281 if (shift > 0)
3282 return expand_shift (LSHIFT_EXPR, mode, op0,
3283 shift, target, unsignedp);
3284 else
3285 goto skip_synth;
3287 else
3288 goto skip_synth;
3290 /* We used to test optimize here, on the grounds that it's better to
3291 produce a smaller program when -O is not used. But this causes
3292 such a terrible slowdown sometimes that it seems better to always
3293 use synth_mult. */
3295 /* Special case powers of two. */
3296 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3297 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3298 return expand_shift (LSHIFT_EXPR, mode, op0,
3299 floor_log2 (coeff), target, unsignedp);
3301 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3303 /* Attempt to handle multiplication of DImode values by negative
3304 coefficients, by performing the multiplication by a positive
3305 multiplier and then inverting the result. */
3306 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3308 /* Its safe to use -coeff even for INT_MIN, as the
3309 result is interpreted as an unsigned coefficient.
3310 Exclude cost of op0 from max_cost to match the cost
3311 calculation of the synth_mult. */
3312 coeff = -(unsigned HOST_WIDE_INT) coeff;
3313 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3314 mode, speed)
3315 - neg_cost (speed, mode));
3316 if (max_cost <= 0)
3317 goto skip_synth;
3319 /* Special case powers of two. */
3320 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3322 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3323 floor_log2 (coeff), target, unsignedp);
3324 return expand_unop (mode, neg_optab, temp, target, 0);
3327 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3328 max_cost))
3330 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3331 &algorithm, variant);
3332 return expand_unop (mode, neg_optab, temp, target, 0);
3334 goto skip_synth;
3337 /* Exclude cost of op0 from max_cost to match the cost
3338 calculation of the synth_mult. */
3339 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3340 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3341 return expand_mult_const (mode, op0, coeff, target,
3342 &algorithm, variant);
3344 skip_synth:
3346 /* Expand x*2.0 as x+x. */
3347 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3348 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3350 op0 = force_reg (GET_MODE (op0), op0);
3351 return expand_binop (mode, add_optab, op0, op0,
3352 target, unsignedp, OPTAB_LIB_WIDEN);
3355 /* This used to use umul_optab if unsigned, but for non-widening multiply
3356 there is no difference between signed and unsigned. */
3357 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3358 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3359 gcc_assert (op0);
3360 return op0;
3363 /* Return a cost estimate for multiplying a register by the given
3364 COEFFicient in the given MODE and SPEED. */
3367 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3369 int max_cost;
3370 struct algorithm algorithm;
3371 enum mult_variant variant;
3373 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3374 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3375 mode, speed);
3376 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3377 return algorithm.cost.cost;
3378 else
3379 return max_cost;
3382 /* Perform a widening multiplication and return an rtx for the result.
3383 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3384 TARGET is a suggestion for where to store the result (an rtx).
3385 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3386 or smul_widen_optab.
3388 We check specially for a constant integer as OP1, comparing the
3389 cost of a widening multiply against the cost of a sequence of shifts
3390 and adds. */
3393 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3394 int unsignedp, optab this_optab)
3396 bool speed = optimize_insn_for_speed_p ();
3397 rtx cop1;
3399 if (CONST_INT_P (op1)
3400 && GET_MODE (op0) != VOIDmode
3401 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3402 this_optab == umul_widen_optab))
3403 && CONST_INT_P (cop1)
3404 && (INTVAL (cop1) >= 0
3405 || HWI_COMPUTABLE_MODE_P (mode)))
3407 HOST_WIDE_INT coeff = INTVAL (cop1);
3408 int max_cost;
3409 enum mult_variant variant;
3410 struct algorithm algorithm;
3412 if (coeff == 0)
3413 return CONST0_RTX (mode);
3415 /* Special case powers of two. */
3416 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3418 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3419 return expand_shift (LSHIFT_EXPR, mode, op0,
3420 floor_log2 (coeff), target, unsignedp);
3423 /* Exclude cost of op0 from max_cost to match the cost
3424 calculation of the synth_mult. */
3425 max_cost = mul_widen_cost (speed, mode);
3426 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3427 max_cost))
3429 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3430 return expand_mult_const (mode, op0, coeff, target,
3431 &algorithm, variant);
3434 return expand_binop (mode, this_optab, op0, op1, target,
3435 unsignedp, OPTAB_LIB_WIDEN);
3438 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3439 replace division by D, and put the least significant N bits of the result
3440 in *MULTIPLIER_PTR and return the most significant bit.
3442 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3443 needed precision is in PRECISION (should be <= N).
3445 PRECISION should be as small as possible so this function can choose
3446 multiplier more freely.
3448 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3449 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3451 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3452 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3454 unsigned HOST_WIDE_INT
3455 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3456 unsigned HOST_WIDE_INT *multiplier_ptr,
3457 int *post_shift_ptr, int *lgup_ptr)
3459 int lgup, post_shift;
3460 int pow, pow2;
3462 /* lgup = ceil(log2(divisor)); */
3463 lgup = ceil_log2 (d);
3465 gcc_assert (lgup <= n);
3467 pow = n + lgup;
3468 pow2 = n + lgup - precision;
3470 /* mlow = 2^(N + lgup)/d */
3471 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3472 wide_int mlow = wi::udiv_trunc (val, d);
3474 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3475 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3476 wide_int mhigh = wi::udiv_trunc (val, d);
3478 /* If precision == N, then mlow, mhigh exceed 2^N
3479 (but they do not exceed 2^(N+1)). */
3481 /* Reduce to lowest terms. */
3482 for (post_shift = lgup; post_shift > 0; post_shift--)
3484 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3485 HOST_BITS_PER_WIDE_INT);
3486 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3487 HOST_BITS_PER_WIDE_INT);
3488 if (ml_lo >= mh_lo)
3489 break;
3491 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3492 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3495 *post_shift_ptr = post_shift;
3496 *lgup_ptr = lgup;
3497 if (n < HOST_BITS_PER_WIDE_INT)
3499 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3500 *multiplier_ptr = mhigh.to_uhwi () & mask;
3501 return mhigh.to_uhwi () >= mask;
3503 else
3505 *multiplier_ptr = mhigh.to_uhwi ();
3506 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3510 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3511 congruent to 1 (mod 2**N). */
3513 static unsigned HOST_WIDE_INT
3514 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3516 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3518 /* The algorithm notes that the choice y = x satisfies
3519 x*y == 1 mod 2^3, since x is assumed odd.
3520 Each iteration doubles the number of bits of significance in y. */
3522 unsigned HOST_WIDE_INT mask;
3523 unsigned HOST_WIDE_INT y = x;
3524 int nbit = 3;
3526 mask = (n == HOST_BITS_PER_WIDE_INT
3527 ? HOST_WIDE_INT_M1U
3528 : (HOST_WIDE_INT_1U << n) - 1);
3530 while (nbit < n)
3532 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3533 nbit *= 2;
3535 return y;
3538 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3539 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3540 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3541 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3542 become signed.
3544 The result is put in TARGET if that is convenient.
3546 MODE is the mode of operation. */
3549 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3550 rtx op1, rtx target, int unsignedp)
3552 rtx tem;
3553 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3555 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3556 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3557 tem = expand_and (mode, tem, op1, NULL_RTX);
3558 adj_operand
3559 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3560 adj_operand);
3562 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3563 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3564 tem = expand_and (mode, tem, op0, NULL_RTX);
3565 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3566 target);
3568 return target;
3571 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3573 static rtx
3574 extract_high_half (machine_mode mode, rtx op)
3576 machine_mode wider_mode;
3578 if (mode == word_mode)
3579 return gen_highpart (mode, op);
3581 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3583 wider_mode = GET_MODE_WIDER_MODE (mode);
3584 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3585 GET_MODE_BITSIZE (mode), 0, 1);
3586 return convert_modes (mode, wider_mode, op, 0);
3589 /* Like expmed_mult_highpart, but only consider using a multiplication
3590 optab. OP1 is an rtx for the constant operand. */
3592 static rtx
3593 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3594 rtx target, int unsignedp, int max_cost)
3596 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3597 machine_mode wider_mode;
3598 optab moptab;
3599 rtx tem;
3600 int size;
3601 bool speed = optimize_insn_for_speed_p ();
3603 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3605 wider_mode = GET_MODE_WIDER_MODE (mode);
3606 size = GET_MODE_BITSIZE (mode);
3608 /* Firstly, try using a multiplication insn that only generates the needed
3609 high part of the product, and in the sign flavor of unsignedp. */
3610 if (mul_highpart_cost (speed, mode) < max_cost)
3612 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3613 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3614 unsignedp, OPTAB_DIRECT);
3615 if (tem)
3616 return tem;
3619 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3620 Need to adjust the result after the multiplication. */
3621 if (size - 1 < BITS_PER_WORD
3622 && (mul_highpart_cost (speed, mode)
3623 + 2 * shift_cost (speed, mode, size-1)
3624 + 4 * add_cost (speed, mode) < max_cost))
3626 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3627 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3628 unsignedp, OPTAB_DIRECT);
3629 if (tem)
3630 /* We used the wrong signedness. Adjust the result. */
3631 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3632 tem, unsignedp);
3635 /* Try widening multiplication. */
3636 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3637 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3638 && mul_widen_cost (speed, wider_mode) < max_cost)
3640 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3641 unsignedp, OPTAB_WIDEN);
3642 if (tem)
3643 return extract_high_half (mode, tem);
3646 /* Try widening the mode and perform a non-widening multiplication. */
3647 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3648 && size - 1 < BITS_PER_WORD
3649 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3650 < max_cost))
3652 rtx_insn *insns;
3653 rtx wop0, wop1;
3655 /* We need to widen the operands, for example to ensure the
3656 constant multiplier is correctly sign or zero extended.
3657 Use a sequence to clean-up any instructions emitted by
3658 the conversions if things don't work out. */
3659 start_sequence ();
3660 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3661 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3662 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3663 unsignedp, OPTAB_WIDEN);
3664 insns = get_insns ();
3665 end_sequence ();
3667 if (tem)
3669 emit_insn (insns);
3670 return extract_high_half (mode, tem);
3674 /* Try widening multiplication of opposite signedness, and adjust. */
3675 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3676 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3677 && size - 1 < BITS_PER_WORD
3678 && (mul_widen_cost (speed, wider_mode)
3679 + 2 * shift_cost (speed, mode, size-1)
3680 + 4 * add_cost (speed, mode) < max_cost))
3682 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3683 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3684 if (tem != 0)
3686 tem = extract_high_half (mode, tem);
3687 /* We used the wrong signedness. Adjust the result. */
3688 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3689 target, unsignedp);
3693 return 0;
3696 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3697 putting the high half of the result in TARGET if that is convenient,
3698 and return where the result is. If the operation can not be performed,
3699 0 is returned.
3701 MODE is the mode of operation and result.
3703 UNSIGNEDP nonzero means unsigned multiply.
3705 MAX_COST is the total allowed cost for the expanded RTL. */
3707 static rtx
3708 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3709 rtx target, int unsignedp, int max_cost)
3711 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3712 unsigned HOST_WIDE_INT cnst1;
3713 int extra_cost;
3714 bool sign_adjust = false;
3715 enum mult_variant variant;
3716 struct algorithm alg;
3717 rtx tem;
3718 bool speed = optimize_insn_for_speed_p ();
3720 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3721 /* We can't support modes wider than HOST_BITS_PER_INT. */
3722 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3724 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3726 /* We can't optimize modes wider than BITS_PER_WORD.
3727 ??? We might be able to perform double-word arithmetic if
3728 mode == word_mode, however all the cost calculations in
3729 synth_mult etc. assume single-word operations. */
3730 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3731 return expmed_mult_highpart_optab (mode, op0, op1, target,
3732 unsignedp, max_cost);
3734 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3736 /* Check whether we try to multiply by a negative constant. */
3737 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3739 sign_adjust = true;
3740 extra_cost += add_cost (speed, mode);
3743 /* See whether shift/add multiplication is cheap enough. */
3744 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3745 max_cost - extra_cost))
3747 /* See whether the specialized multiplication optabs are
3748 cheaper than the shift/add version. */
3749 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3750 alg.cost.cost + extra_cost);
3751 if (tem)
3752 return tem;
3754 tem = convert_to_mode (wider_mode, op0, unsignedp);
3755 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3756 tem = extract_high_half (mode, tem);
3758 /* Adjust result for signedness. */
3759 if (sign_adjust)
3760 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3762 return tem;
3764 return expmed_mult_highpart_optab (mode, op0, op1, target,
3765 unsignedp, max_cost);
3769 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3771 static rtx
3772 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3774 rtx result, temp, shift;
3775 rtx_code_label *label;
3776 int logd;
3777 int prec = GET_MODE_PRECISION (mode);
3779 logd = floor_log2 (d);
3780 result = gen_reg_rtx (mode);
3782 /* Avoid conditional branches when they're expensive. */
3783 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3784 && optimize_insn_for_speed_p ())
3786 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3787 mode, 0, -1);
3788 if (signmask)
3790 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3791 signmask = force_reg (mode, signmask);
3792 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3794 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3795 which instruction sequence to use. If logical right shifts
3796 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3797 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3799 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3800 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3801 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3802 > COSTS_N_INSNS (2)))
3804 temp = expand_binop (mode, xor_optab, op0, signmask,
3805 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3806 temp = expand_binop (mode, sub_optab, temp, signmask,
3807 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3808 temp = expand_binop (mode, and_optab, temp,
3809 gen_int_mode (masklow, mode),
3810 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3811 temp = expand_binop (mode, xor_optab, temp, signmask,
3812 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3813 temp = expand_binop (mode, sub_optab, temp, signmask,
3814 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3816 else
3818 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3819 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3820 signmask = force_reg (mode, signmask);
3822 temp = expand_binop (mode, add_optab, op0, signmask,
3823 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3824 temp = expand_binop (mode, and_optab, temp,
3825 gen_int_mode (masklow, mode),
3826 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3827 temp = expand_binop (mode, sub_optab, temp, signmask,
3828 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3830 return temp;
3834 /* Mask contains the mode's signbit and the significant bits of the
3835 modulus. By including the signbit in the operation, many targets
3836 can avoid an explicit compare operation in the following comparison
3837 against zero. */
3838 wide_int mask = wi::mask (logd, false, prec);
3839 mask = wi::set_bit (mask, prec - 1);
3841 temp = expand_binop (mode, and_optab, op0,
3842 immed_wide_int_const (mask, mode),
3843 result, 1, OPTAB_LIB_WIDEN);
3844 if (temp != result)
3845 emit_move_insn (result, temp);
3847 label = gen_label_rtx ();
3848 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3850 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3851 0, OPTAB_LIB_WIDEN);
3853 mask = wi::mask (logd, true, prec);
3854 temp = expand_binop (mode, ior_optab, temp,
3855 immed_wide_int_const (mask, mode),
3856 result, 1, OPTAB_LIB_WIDEN);
3857 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3858 0, OPTAB_LIB_WIDEN);
3859 if (temp != result)
3860 emit_move_insn (result, temp);
3861 emit_label (label);
3862 return result;
3865 /* Expand signed division of OP0 by a power of two D in mode MODE.
3866 This routine is only called for positive values of D. */
3868 static rtx
3869 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3871 rtx temp;
3872 rtx_code_label *label;
3873 int logd;
3875 logd = floor_log2 (d);
3877 if (d == 2
3878 && BRANCH_COST (optimize_insn_for_speed_p (),
3879 false) >= 1)
3881 temp = gen_reg_rtx (mode);
3882 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3883 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3884 0, OPTAB_LIB_WIDEN);
3885 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3888 if (HAVE_conditional_move
3889 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3891 rtx temp2;
3893 start_sequence ();
3894 temp2 = copy_to_mode_reg (mode, op0);
3895 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3896 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3897 temp = force_reg (mode, temp);
3899 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3900 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3901 mode, temp, temp2, mode, 0);
3902 if (temp2)
3904 rtx_insn *seq = get_insns ();
3905 end_sequence ();
3906 emit_insn (seq);
3907 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3909 end_sequence ();
3912 if (BRANCH_COST (optimize_insn_for_speed_p (),
3913 false) >= 2)
3915 int ushift = GET_MODE_BITSIZE (mode) - logd;
3917 temp = gen_reg_rtx (mode);
3918 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3919 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3920 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3921 > COSTS_N_INSNS (1))
3922 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3923 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3924 else
3925 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3926 ushift, NULL_RTX, 1);
3927 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3928 0, OPTAB_LIB_WIDEN);
3929 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3932 label = gen_label_rtx ();
3933 temp = copy_to_mode_reg (mode, op0);
3934 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3935 expand_inc (temp, gen_int_mode (d - 1, mode));
3936 emit_label (label);
3937 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3940 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3941 if that is convenient, and returning where the result is.
3942 You may request either the quotient or the remainder as the result;
3943 specify REM_FLAG nonzero to get the remainder.
3945 CODE is the expression code for which kind of division this is;
3946 it controls how rounding is done. MODE is the machine mode to use.
3947 UNSIGNEDP nonzero means do unsigned division. */
3949 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3950 and then correct it by or'ing in missing high bits
3951 if result of ANDI is nonzero.
3952 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3953 This could optimize to a bfexts instruction.
3954 But C doesn't use these operations, so their optimizations are
3955 left for later. */
3956 /* ??? For modulo, we don't actually need the highpart of the first product,
3957 the low part will do nicely. And for small divisors, the second multiply
3958 can also be a low-part only multiply or even be completely left out.
3959 E.g. to calculate the remainder of a division by 3 with a 32 bit
3960 multiply, multiply with 0x55555556 and extract the upper two bits;
3961 the result is exact for inputs up to 0x1fffffff.
3962 The input range can be reduced by using cross-sum rules.
3963 For odd divisors >= 3, the following table gives right shift counts
3964 so that if a number is shifted by an integer multiple of the given
3965 amount, the remainder stays the same:
3966 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3967 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3968 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3969 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3970 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3972 Cross-sum rules for even numbers can be derived by leaving as many bits
3973 to the right alone as the divisor has zeros to the right.
3974 E.g. if x is an unsigned 32 bit number:
3975 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3979 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3980 rtx op0, rtx op1, rtx target, int unsignedp)
3982 machine_mode compute_mode;
3983 rtx tquotient;
3984 rtx quotient = 0, remainder = 0;
3985 rtx_insn *last;
3986 int size;
3987 rtx_insn *insn;
3988 optab optab1, optab2;
3989 int op1_is_constant, op1_is_pow2 = 0;
3990 int max_cost, extra_cost;
3991 static HOST_WIDE_INT last_div_const = 0;
3992 bool speed = optimize_insn_for_speed_p ();
3994 op1_is_constant = CONST_INT_P (op1);
3995 if (op1_is_constant)
3997 wide_int ext_op1 = rtx_mode_t (op1, mode);
3998 op1_is_pow2 = (wi::popcount (ext_op1) == 1
3999 || (! unsignedp
4000 && wi::popcount (wi::neg (ext_op1)) == 1));
4004 This is the structure of expand_divmod:
4006 First comes code to fix up the operands so we can perform the operations
4007 correctly and efficiently.
4009 Second comes a switch statement with code specific for each rounding mode.
4010 For some special operands this code emits all RTL for the desired
4011 operation, for other cases, it generates only a quotient and stores it in
4012 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4013 to indicate that it has not done anything.
4015 Last comes code that finishes the operation. If QUOTIENT is set and
4016 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4017 QUOTIENT is not set, it is computed using trunc rounding.
4019 We try to generate special code for division and remainder when OP1 is a
4020 constant. If |OP1| = 2**n we can use shifts and some other fast
4021 operations. For other values of OP1, we compute a carefully selected
4022 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4023 by m.
4025 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4026 half of the product. Different strategies for generating the product are
4027 implemented in expmed_mult_highpart.
4029 If what we actually want is the remainder, we generate that by another
4030 by-constant multiplication and a subtraction. */
4032 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4033 code below will malfunction if we are, so check here and handle
4034 the special case if so. */
4035 if (op1 == const1_rtx)
4036 return rem_flag ? const0_rtx : op0;
4038 /* When dividing by -1, we could get an overflow.
4039 negv_optab can handle overflows. */
4040 if (! unsignedp && op1 == constm1_rtx)
4042 if (rem_flag)
4043 return const0_rtx;
4044 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4045 ? negv_optab : neg_optab, op0, target, 0);
4048 if (target
4049 /* Don't use the function value register as a target
4050 since we have to read it as well as write it,
4051 and function-inlining gets confused by this. */
4052 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4053 /* Don't clobber an operand while doing a multi-step calculation. */
4054 || ((rem_flag || op1_is_constant)
4055 && (reg_mentioned_p (target, op0)
4056 || (MEM_P (op0) && MEM_P (target))))
4057 || reg_mentioned_p (target, op1)
4058 || (MEM_P (op1) && MEM_P (target))))
4059 target = 0;
4061 /* Get the mode in which to perform this computation. Normally it will
4062 be MODE, but sometimes we can't do the desired operation in MODE.
4063 If so, pick a wider mode in which we can do the operation. Convert
4064 to that mode at the start to avoid repeated conversions.
4066 First see what operations we need. These depend on the expression
4067 we are evaluating. (We assume that divxx3 insns exist under the
4068 same conditions that modxx3 insns and that these insns don't normally
4069 fail. If these assumptions are not correct, we may generate less
4070 efficient code in some cases.)
4072 Then see if we find a mode in which we can open-code that operation
4073 (either a division, modulus, or shift). Finally, check for the smallest
4074 mode for which we can do the operation with a library call. */
4076 /* We might want to refine this now that we have division-by-constant
4077 optimization. Since expmed_mult_highpart tries so many variants, it is
4078 not straightforward to generalize this. Maybe we should make an array
4079 of possible modes in init_expmed? Save this for GCC 2.7. */
4081 optab1 = (op1_is_pow2
4082 ? (unsignedp ? lshr_optab : ashr_optab)
4083 : (unsignedp ? udiv_optab : sdiv_optab));
4084 optab2 = (op1_is_pow2 ? optab1
4085 : (unsignedp ? udivmod_optab : sdivmod_optab));
4087 for (compute_mode = mode; compute_mode != VOIDmode;
4088 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4089 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4090 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4091 break;
4093 if (compute_mode == VOIDmode)
4094 for (compute_mode = mode; compute_mode != VOIDmode;
4095 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4096 if (optab_libfunc (optab1, compute_mode)
4097 || optab_libfunc (optab2, compute_mode))
4098 break;
4100 /* If we still couldn't find a mode, use MODE, but expand_binop will
4101 probably die. */
4102 if (compute_mode == VOIDmode)
4103 compute_mode = mode;
4105 if (target && GET_MODE (target) == compute_mode)
4106 tquotient = target;
4107 else
4108 tquotient = gen_reg_rtx (compute_mode);
4110 size = GET_MODE_BITSIZE (compute_mode);
4111 #if 0
4112 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4113 (mode), and thereby get better code when OP1 is a constant. Do that
4114 later. It will require going over all usages of SIZE below. */
4115 size = GET_MODE_BITSIZE (mode);
4116 #endif
4118 /* Only deduct something for a REM if the last divide done was
4119 for a different constant. Then set the constant of the last
4120 divide. */
4121 max_cost = (unsignedp
4122 ? udiv_cost (speed, compute_mode)
4123 : sdiv_cost (speed, compute_mode));
4124 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4125 && INTVAL (op1) == last_div_const))
4126 max_cost -= (mul_cost (speed, compute_mode)
4127 + add_cost (speed, compute_mode));
4129 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4131 /* Now convert to the best mode to use. */
4132 if (compute_mode != mode)
4134 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4135 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4137 /* convert_modes may have placed op1 into a register, so we
4138 must recompute the following. */
4139 op1_is_constant = CONST_INT_P (op1);
4140 if (op1_is_constant)
4142 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4143 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4144 || (! unsignedp
4145 && wi::popcount (wi::neg (ext_op1)) == 1));
4147 else
4148 op1_is_pow2 = 0;
4151 /* If one of the operands is a volatile MEM, copy it into a register. */
4153 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4154 op0 = force_reg (compute_mode, op0);
4155 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4156 op1 = force_reg (compute_mode, op1);
4158 /* If we need the remainder or if OP1 is constant, we need to
4159 put OP0 in a register in case it has any queued subexpressions. */
4160 if (rem_flag || op1_is_constant)
4161 op0 = force_reg (compute_mode, op0);
4163 last = get_last_insn ();
4165 /* Promote floor rounding to trunc rounding for unsigned operations. */
4166 if (unsignedp)
4168 if (code == FLOOR_DIV_EXPR)
4169 code = TRUNC_DIV_EXPR;
4170 if (code == FLOOR_MOD_EXPR)
4171 code = TRUNC_MOD_EXPR;
4172 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4173 code = TRUNC_DIV_EXPR;
4176 if (op1 != const0_rtx)
4177 switch (code)
4179 case TRUNC_MOD_EXPR:
4180 case TRUNC_DIV_EXPR:
4181 if (op1_is_constant)
4183 if (unsignedp)
4185 unsigned HOST_WIDE_INT mh, ml;
4186 int pre_shift, post_shift;
4187 int dummy;
4188 wide_int wd = rtx_mode_t (op1, compute_mode);
4189 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4191 if (wi::popcount (wd) == 1)
4193 pre_shift = floor_log2 (d);
4194 if (rem_flag)
4196 unsigned HOST_WIDE_INT mask
4197 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4198 remainder
4199 = expand_binop (compute_mode, and_optab, op0,
4200 gen_int_mode (mask, compute_mode),
4201 remainder, 1,
4202 OPTAB_LIB_WIDEN);
4203 if (remainder)
4204 return gen_lowpart (mode, remainder);
4206 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4207 pre_shift, tquotient, 1);
4209 else if (size <= HOST_BITS_PER_WIDE_INT)
4211 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4213 /* Most significant bit of divisor is set; emit an scc
4214 insn. */
4215 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4216 compute_mode, 1, 1);
4218 else
4220 /* Find a suitable multiplier and right shift count
4221 instead of multiplying with D. */
4223 mh = choose_multiplier (d, size, size,
4224 &ml, &post_shift, &dummy);
4226 /* If the suggested multiplier is more than SIZE bits,
4227 we can do better for even divisors, using an
4228 initial right shift. */
4229 if (mh != 0 && (d & 1) == 0)
4231 pre_shift = ctz_or_zero (d);
4232 mh = choose_multiplier (d >> pre_shift, size,
4233 size - pre_shift,
4234 &ml, &post_shift, &dummy);
4235 gcc_assert (!mh);
4237 else
4238 pre_shift = 0;
4240 if (mh != 0)
4242 rtx t1, t2, t3, t4;
4244 if (post_shift - 1 >= BITS_PER_WORD)
4245 goto fail1;
4247 extra_cost
4248 = (shift_cost (speed, compute_mode, post_shift - 1)
4249 + shift_cost (speed, compute_mode, 1)
4250 + 2 * add_cost (speed, compute_mode));
4251 t1 = expmed_mult_highpart
4252 (compute_mode, op0,
4253 gen_int_mode (ml, compute_mode),
4254 NULL_RTX, 1, max_cost - extra_cost);
4255 if (t1 == 0)
4256 goto fail1;
4257 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4258 op0, t1),
4259 NULL_RTX);
4260 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4261 t2, 1, NULL_RTX, 1);
4262 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4263 t1, t3),
4264 NULL_RTX);
4265 quotient = expand_shift
4266 (RSHIFT_EXPR, compute_mode, t4,
4267 post_shift - 1, tquotient, 1);
4269 else
4271 rtx t1, t2;
4273 if (pre_shift >= BITS_PER_WORD
4274 || post_shift >= BITS_PER_WORD)
4275 goto fail1;
4277 t1 = expand_shift
4278 (RSHIFT_EXPR, compute_mode, op0,
4279 pre_shift, NULL_RTX, 1);
4280 extra_cost
4281 = (shift_cost (speed, compute_mode, pre_shift)
4282 + shift_cost (speed, compute_mode, post_shift));
4283 t2 = expmed_mult_highpart
4284 (compute_mode, t1,
4285 gen_int_mode (ml, compute_mode),
4286 NULL_RTX, 1, max_cost - extra_cost);
4287 if (t2 == 0)
4288 goto fail1;
4289 quotient = expand_shift
4290 (RSHIFT_EXPR, compute_mode, t2,
4291 post_shift, tquotient, 1);
4295 else /* Too wide mode to use tricky code */
4296 break;
4298 insn = get_last_insn ();
4299 if (insn != last)
4300 set_dst_reg_note (insn, REG_EQUAL,
4301 gen_rtx_UDIV (compute_mode, op0, op1),
4302 quotient);
4304 else /* TRUNC_DIV, signed */
4306 unsigned HOST_WIDE_INT ml;
4307 int lgup, post_shift;
4308 rtx mlr;
4309 HOST_WIDE_INT d = INTVAL (op1);
4310 unsigned HOST_WIDE_INT abs_d;
4312 /* Since d might be INT_MIN, we have to cast to
4313 unsigned HOST_WIDE_INT before negating to avoid
4314 undefined signed overflow. */
4315 abs_d = (d >= 0
4316 ? (unsigned HOST_WIDE_INT) d
4317 : - (unsigned HOST_WIDE_INT) d);
4319 /* n rem d = n rem -d */
4320 if (rem_flag && d < 0)
4322 d = abs_d;
4323 op1 = gen_int_mode (abs_d, compute_mode);
4326 if (d == 1)
4327 quotient = op0;
4328 else if (d == -1)
4329 quotient = expand_unop (compute_mode, neg_optab, op0,
4330 tquotient, 0);
4331 else if (size <= HOST_BITS_PER_WIDE_INT
4332 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4334 /* This case is not handled correctly below. */
4335 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4336 compute_mode, 1, 1);
4337 if (quotient == 0)
4338 goto fail1;
4340 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4341 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4342 && (rem_flag
4343 ? smod_pow2_cheap (speed, compute_mode)
4344 : sdiv_pow2_cheap (speed, compute_mode))
4345 /* We assume that cheap metric is true if the
4346 optab has an expander for this mode. */
4347 && ((optab_handler ((rem_flag ? smod_optab
4348 : sdiv_optab),
4349 compute_mode)
4350 != CODE_FOR_nothing)
4351 || (optab_handler (sdivmod_optab,
4352 compute_mode)
4353 != CODE_FOR_nothing)))
4355 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4356 && (size <= HOST_BITS_PER_WIDE_INT
4357 || abs_d != (unsigned HOST_WIDE_INT) d))
4359 if (rem_flag)
4361 remainder = expand_smod_pow2 (compute_mode, op0, d);
4362 if (remainder)
4363 return gen_lowpart (mode, remainder);
4366 if (sdiv_pow2_cheap (speed, compute_mode)
4367 && ((optab_handler (sdiv_optab, compute_mode)
4368 != CODE_FOR_nothing)
4369 || (optab_handler (sdivmod_optab, compute_mode)
4370 != CODE_FOR_nothing)))
4371 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4372 compute_mode, op0,
4373 gen_int_mode (abs_d,
4374 compute_mode),
4375 NULL_RTX, 0);
4376 else
4377 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4379 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4380 negate the quotient. */
4381 if (d < 0)
4383 insn = get_last_insn ();
4384 if (insn != last
4385 && abs_d < (HOST_WIDE_INT_1U
4386 << (HOST_BITS_PER_WIDE_INT - 1)))
4387 set_dst_reg_note (insn, REG_EQUAL,
4388 gen_rtx_DIV (compute_mode, op0,
4389 gen_int_mode
4390 (abs_d,
4391 compute_mode)),
4392 quotient);
4394 quotient = expand_unop (compute_mode, neg_optab,
4395 quotient, quotient, 0);
4398 else if (size <= HOST_BITS_PER_WIDE_INT)
4400 choose_multiplier (abs_d, size, size - 1,
4401 &ml, &post_shift, &lgup);
4402 if (ml < HOST_WIDE_INT_1U << (size - 1))
4404 rtx t1, t2, t3;
4406 if (post_shift >= BITS_PER_WORD
4407 || size - 1 >= BITS_PER_WORD)
4408 goto fail1;
4410 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4411 + shift_cost (speed, compute_mode, size - 1)
4412 + add_cost (speed, compute_mode));
4413 t1 = expmed_mult_highpart
4414 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4415 NULL_RTX, 0, max_cost - extra_cost);
4416 if (t1 == 0)
4417 goto fail1;
4418 t2 = expand_shift
4419 (RSHIFT_EXPR, compute_mode, t1,
4420 post_shift, NULL_RTX, 0);
4421 t3 = expand_shift
4422 (RSHIFT_EXPR, compute_mode, op0,
4423 size - 1, NULL_RTX, 0);
4424 if (d < 0)
4425 quotient
4426 = force_operand (gen_rtx_MINUS (compute_mode,
4427 t3, t2),
4428 tquotient);
4429 else
4430 quotient
4431 = force_operand (gen_rtx_MINUS (compute_mode,
4432 t2, t3),
4433 tquotient);
4435 else
4437 rtx t1, t2, t3, t4;
4439 if (post_shift >= BITS_PER_WORD
4440 || size - 1 >= BITS_PER_WORD)
4441 goto fail1;
4443 ml |= HOST_WIDE_INT_M1U << (size - 1);
4444 mlr = gen_int_mode (ml, compute_mode);
4445 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4446 + shift_cost (speed, compute_mode, size - 1)
4447 + 2 * add_cost (speed, compute_mode));
4448 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4449 NULL_RTX, 0,
4450 max_cost - extra_cost);
4451 if (t1 == 0)
4452 goto fail1;
4453 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4454 t1, op0),
4455 NULL_RTX);
4456 t3 = expand_shift
4457 (RSHIFT_EXPR, compute_mode, t2,
4458 post_shift, NULL_RTX, 0);
4459 t4 = expand_shift
4460 (RSHIFT_EXPR, compute_mode, op0,
4461 size - 1, NULL_RTX, 0);
4462 if (d < 0)
4463 quotient
4464 = force_operand (gen_rtx_MINUS (compute_mode,
4465 t4, t3),
4466 tquotient);
4467 else
4468 quotient
4469 = force_operand (gen_rtx_MINUS (compute_mode,
4470 t3, t4),
4471 tquotient);
4474 else /* Too wide mode to use tricky code */
4475 break;
4477 insn = get_last_insn ();
4478 if (insn != last)
4479 set_dst_reg_note (insn, REG_EQUAL,
4480 gen_rtx_DIV (compute_mode, op0, op1),
4481 quotient);
4483 break;
4485 fail1:
4486 delete_insns_since (last);
4487 break;
4489 case FLOOR_DIV_EXPR:
4490 case FLOOR_MOD_EXPR:
4491 /* We will come here only for signed operations. */
4492 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4494 unsigned HOST_WIDE_INT mh, ml;
4495 int pre_shift, lgup, post_shift;
4496 HOST_WIDE_INT d = INTVAL (op1);
4498 if (d > 0)
4500 /* We could just as easily deal with negative constants here,
4501 but it does not seem worth the trouble for GCC 2.6. */
4502 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4504 pre_shift = floor_log2 (d);
4505 if (rem_flag)
4507 unsigned HOST_WIDE_INT mask
4508 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4509 remainder = expand_binop
4510 (compute_mode, and_optab, op0,
4511 gen_int_mode (mask, compute_mode),
4512 remainder, 0, OPTAB_LIB_WIDEN);
4513 if (remainder)
4514 return gen_lowpart (mode, remainder);
4516 quotient = expand_shift
4517 (RSHIFT_EXPR, compute_mode, op0,
4518 pre_shift, tquotient, 0);
4520 else
4522 rtx t1, t2, t3, t4;
4524 mh = choose_multiplier (d, size, size - 1,
4525 &ml, &post_shift, &lgup);
4526 gcc_assert (!mh);
4528 if (post_shift < BITS_PER_WORD
4529 && size - 1 < BITS_PER_WORD)
4531 t1 = expand_shift
4532 (RSHIFT_EXPR, compute_mode, op0,
4533 size - 1, NULL_RTX, 0);
4534 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4535 NULL_RTX, 0, OPTAB_WIDEN);
4536 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4537 + shift_cost (speed, compute_mode, size - 1)
4538 + 2 * add_cost (speed, compute_mode));
4539 t3 = expmed_mult_highpart
4540 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4541 NULL_RTX, 1, max_cost - extra_cost);
4542 if (t3 != 0)
4544 t4 = expand_shift
4545 (RSHIFT_EXPR, compute_mode, t3,
4546 post_shift, NULL_RTX, 1);
4547 quotient = expand_binop (compute_mode, xor_optab,
4548 t4, t1, tquotient, 0,
4549 OPTAB_WIDEN);
4554 else
4556 rtx nsign, t1, t2, t3, t4;
4557 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4558 op0, constm1_rtx), NULL_RTX);
4559 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4560 0, OPTAB_WIDEN);
4561 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4562 size - 1, NULL_RTX, 0);
4563 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4564 NULL_RTX);
4565 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4566 NULL_RTX, 0);
4567 if (t4)
4569 rtx t5;
4570 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4571 NULL_RTX, 0);
4572 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4573 t4, t5),
4574 tquotient);
4579 if (quotient != 0)
4580 break;
4581 delete_insns_since (last);
4583 /* Try using an instruction that produces both the quotient and
4584 remainder, using truncation. We can easily compensate the quotient
4585 or remainder to get floor rounding, once we have the remainder.
4586 Notice that we compute also the final remainder value here,
4587 and return the result right away. */
4588 if (target == 0 || GET_MODE (target) != compute_mode)
4589 target = gen_reg_rtx (compute_mode);
4591 if (rem_flag)
4593 remainder
4594 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4595 quotient = gen_reg_rtx (compute_mode);
4597 else
4599 quotient
4600 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4601 remainder = gen_reg_rtx (compute_mode);
4604 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4605 quotient, remainder, 0))
4607 /* This could be computed with a branch-less sequence.
4608 Save that for later. */
4609 rtx tem;
4610 rtx_code_label *label = gen_label_rtx ();
4611 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4612 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4613 NULL_RTX, 0, OPTAB_WIDEN);
4614 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4615 expand_dec (quotient, const1_rtx);
4616 expand_inc (remainder, op1);
4617 emit_label (label);
4618 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4621 /* No luck with division elimination or divmod. Have to do it
4622 by conditionally adjusting op0 *and* the result. */
4624 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4625 rtx adjusted_op0;
4626 rtx tem;
4628 quotient = gen_reg_rtx (compute_mode);
4629 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4630 label1 = gen_label_rtx ();
4631 label2 = gen_label_rtx ();
4632 label3 = gen_label_rtx ();
4633 label4 = gen_label_rtx ();
4634 label5 = gen_label_rtx ();
4635 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4636 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4637 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4638 quotient, 0, OPTAB_LIB_WIDEN);
4639 if (tem != quotient)
4640 emit_move_insn (quotient, tem);
4641 emit_jump_insn (targetm.gen_jump (label5));
4642 emit_barrier ();
4643 emit_label (label1);
4644 expand_inc (adjusted_op0, const1_rtx);
4645 emit_jump_insn (targetm.gen_jump (label4));
4646 emit_barrier ();
4647 emit_label (label2);
4648 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4649 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4650 quotient, 0, OPTAB_LIB_WIDEN);
4651 if (tem != quotient)
4652 emit_move_insn (quotient, tem);
4653 emit_jump_insn (targetm.gen_jump (label5));
4654 emit_barrier ();
4655 emit_label (label3);
4656 expand_dec (adjusted_op0, const1_rtx);
4657 emit_label (label4);
4658 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4659 quotient, 0, OPTAB_LIB_WIDEN);
4660 if (tem != quotient)
4661 emit_move_insn (quotient, tem);
4662 expand_dec (quotient, const1_rtx);
4663 emit_label (label5);
4665 break;
4667 case CEIL_DIV_EXPR:
4668 case CEIL_MOD_EXPR:
4669 if (unsignedp)
4671 if (op1_is_constant
4672 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4673 && (size <= HOST_BITS_PER_WIDE_INT
4674 || INTVAL (op1) >= 0))
4676 rtx t1, t2, t3;
4677 unsigned HOST_WIDE_INT d = INTVAL (op1);
4678 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4679 floor_log2 (d), tquotient, 1);
4680 t2 = expand_binop (compute_mode, and_optab, op0,
4681 gen_int_mode (d - 1, compute_mode),
4682 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4683 t3 = gen_reg_rtx (compute_mode);
4684 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4685 compute_mode, 1, 1);
4686 if (t3 == 0)
4688 rtx_code_label *lab;
4689 lab = gen_label_rtx ();
4690 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4691 expand_inc (t1, const1_rtx);
4692 emit_label (lab);
4693 quotient = t1;
4695 else
4696 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4697 t1, t3),
4698 tquotient);
4699 break;
4702 /* Try using an instruction that produces both the quotient and
4703 remainder, using truncation. We can easily compensate the
4704 quotient or remainder to get ceiling rounding, once we have the
4705 remainder. Notice that we compute also the final remainder
4706 value here, and return the result right away. */
4707 if (target == 0 || GET_MODE (target) != compute_mode)
4708 target = gen_reg_rtx (compute_mode);
4710 if (rem_flag)
4712 remainder = (REG_P (target)
4713 ? target : gen_reg_rtx (compute_mode));
4714 quotient = gen_reg_rtx (compute_mode);
4716 else
4718 quotient = (REG_P (target)
4719 ? target : gen_reg_rtx (compute_mode));
4720 remainder = gen_reg_rtx (compute_mode);
4723 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4724 remainder, 1))
4726 /* This could be computed with a branch-less sequence.
4727 Save that for later. */
4728 rtx_code_label *label = gen_label_rtx ();
4729 do_cmp_and_jump (remainder, const0_rtx, EQ,
4730 compute_mode, label);
4731 expand_inc (quotient, const1_rtx);
4732 expand_dec (remainder, op1);
4733 emit_label (label);
4734 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4737 /* No luck with division elimination or divmod. Have to do it
4738 by conditionally adjusting op0 *and* the result. */
4740 rtx_code_label *label1, *label2;
4741 rtx adjusted_op0, tem;
4743 quotient = gen_reg_rtx (compute_mode);
4744 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4745 label1 = gen_label_rtx ();
4746 label2 = gen_label_rtx ();
4747 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4748 compute_mode, label1);
4749 emit_move_insn (quotient, const0_rtx);
4750 emit_jump_insn (targetm.gen_jump (label2));
4751 emit_barrier ();
4752 emit_label (label1);
4753 expand_dec (adjusted_op0, const1_rtx);
4754 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4755 quotient, 1, OPTAB_LIB_WIDEN);
4756 if (tem != quotient)
4757 emit_move_insn (quotient, tem);
4758 expand_inc (quotient, const1_rtx);
4759 emit_label (label2);
4762 else /* signed */
4764 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4765 && INTVAL (op1) >= 0)
4767 /* This is extremely similar to the code for the unsigned case
4768 above. For 2.7 we should merge these variants, but for
4769 2.6.1 I don't want to touch the code for unsigned since that
4770 get used in C. The signed case will only be used by other
4771 languages (Ada). */
4773 rtx t1, t2, t3;
4774 unsigned HOST_WIDE_INT d = INTVAL (op1);
4775 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4776 floor_log2 (d), tquotient, 0);
4777 t2 = expand_binop (compute_mode, and_optab, op0,
4778 gen_int_mode (d - 1, compute_mode),
4779 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4780 t3 = gen_reg_rtx (compute_mode);
4781 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4782 compute_mode, 1, 1);
4783 if (t3 == 0)
4785 rtx_code_label *lab;
4786 lab = gen_label_rtx ();
4787 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4788 expand_inc (t1, const1_rtx);
4789 emit_label (lab);
4790 quotient = t1;
4792 else
4793 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4794 t1, t3),
4795 tquotient);
4796 break;
4799 /* Try using an instruction that produces both the quotient and
4800 remainder, using truncation. We can easily compensate the
4801 quotient or remainder to get ceiling rounding, once we have the
4802 remainder. Notice that we compute also the final remainder
4803 value here, and return the result right away. */
4804 if (target == 0 || GET_MODE (target) != compute_mode)
4805 target = gen_reg_rtx (compute_mode);
4806 if (rem_flag)
4808 remainder= (REG_P (target)
4809 ? target : gen_reg_rtx (compute_mode));
4810 quotient = gen_reg_rtx (compute_mode);
4812 else
4814 quotient = (REG_P (target)
4815 ? target : gen_reg_rtx (compute_mode));
4816 remainder = gen_reg_rtx (compute_mode);
4819 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4820 remainder, 0))
4822 /* This could be computed with a branch-less sequence.
4823 Save that for later. */
4824 rtx tem;
4825 rtx_code_label *label = gen_label_rtx ();
4826 do_cmp_and_jump (remainder, const0_rtx, EQ,
4827 compute_mode, label);
4828 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4829 NULL_RTX, 0, OPTAB_WIDEN);
4830 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4831 expand_inc (quotient, const1_rtx);
4832 expand_dec (remainder, op1);
4833 emit_label (label);
4834 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4837 /* No luck with division elimination or divmod. Have to do it
4838 by conditionally adjusting op0 *and* the result. */
4840 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4841 rtx adjusted_op0;
4842 rtx tem;
4844 quotient = gen_reg_rtx (compute_mode);
4845 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4846 label1 = gen_label_rtx ();
4847 label2 = gen_label_rtx ();
4848 label3 = gen_label_rtx ();
4849 label4 = gen_label_rtx ();
4850 label5 = gen_label_rtx ();
4851 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4852 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4853 compute_mode, label1);
4854 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4855 quotient, 0, OPTAB_LIB_WIDEN);
4856 if (tem != quotient)
4857 emit_move_insn (quotient, tem);
4858 emit_jump_insn (targetm.gen_jump (label5));
4859 emit_barrier ();
4860 emit_label (label1);
4861 expand_dec (adjusted_op0, const1_rtx);
4862 emit_jump_insn (targetm.gen_jump (label4));
4863 emit_barrier ();
4864 emit_label (label2);
4865 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4866 compute_mode, label3);
4867 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4868 quotient, 0, OPTAB_LIB_WIDEN);
4869 if (tem != quotient)
4870 emit_move_insn (quotient, tem);
4871 emit_jump_insn (targetm.gen_jump (label5));
4872 emit_barrier ();
4873 emit_label (label3);
4874 expand_inc (adjusted_op0, const1_rtx);
4875 emit_label (label4);
4876 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4877 quotient, 0, OPTAB_LIB_WIDEN);
4878 if (tem != quotient)
4879 emit_move_insn (quotient, tem);
4880 expand_inc (quotient, const1_rtx);
4881 emit_label (label5);
4884 break;
4886 case EXACT_DIV_EXPR:
4887 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4889 HOST_WIDE_INT d = INTVAL (op1);
4890 unsigned HOST_WIDE_INT ml;
4891 int pre_shift;
4892 rtx t1;
4894 pre_shift = ctz_or_zero (d);
4895 ml = invert_mod2n (d >> pre_shift, size);
4896 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4897 pre_shift, NULL_RTX, unsignedp);
4898 quotient = expand_mult (compute_mode, t1,
4899 gen_int_mode (ml, compute_mode),
4900 NULL_RTX, 1);
4902 insn = get_last_insn ();
4903 set_dst_reg_note (insn, REG_EQUAL,
4904 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4905 compute_mode, op0, op1),
4906 quotient);
4908 break;
4910 case ROUND_DIV_EXPR:
4911 case ROUND_MOD_EXPR:
4912 if (unsignedp)
4914 rtx tem;
4915 rtx_code_label *label;
4916 label = gen_label_rtx ();
4917 quotient = gen_reg_rtx (compute_mode);
4918 remainder = gen_reg_rtx (compute_mode);
4919 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4921 rtx tem;
4922 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4923 quotient, 1, OPTAB_LIB_WIDEN);
4924 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4925 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4926 remainder, 1, OPTAB_LIB_WIDEN);
4928 tem = plus_constant (compute_mode, op1, -1);
4929 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4930 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4931 expand_inc (quotient, const1_rtx);
4932 expand_dec (remainder, op1);
4933 emit_label (label);
4935 else
4937 rtx abs_rem, abs_op1, tem, mask;
4938 rtx_code_label *label;
4939 label = gen_label_rtx ();
4940 quotient = gen_reg_rtx (compute_mode);
4941 remainder = gen_reg_rtx (compute_mode);
4942 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4944 rtx tem;
4945 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4946 quotient, 0, OPTAB_LIB_WIDEN);
4947 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4948 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4949 remainder, 0, OPTAB_LIB_WIDEN);
4951 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4952 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4953 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4954 1, NULL_RTX, 1);
4955 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4956 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4957 NULL_RTX, 0, OPTAB_WIDEN);
4958 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4959 size - 1, NULL_RTX, 0);
4960 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4961 NULL_RTX, 0, OPTAB_WIDEN);
4962 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4963 NULL_RTX, 0, OPTAB_WIDEN);
4964 expand_inc (quotient, tem);
4965 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4966 NULL_RTX, 0, OPTAB_WIDEN);
4967 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4968 NULL_RTX, 0, OPTAB_WIDEN);
4969 expand_dec (remainder, tem);
4970 emit_label (label);
4972 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4974 default:
4975 gcc_unreachable ();
4978 if (quotient == 0)
4980 if (target && GET_MODE (target) != compute_mode)
4981 target = 0;
4983 if (rem_flag)
4985 /* Try to produce the remainder without producing the quotient.
4986 If we seem to have a divmod pattern that does not require widening,
4987 don't try widening here. We should really have a WIDEN argument
4988 to expand_twoval_binop, since what we'd really like to do here is
4989 1) try a mod insn in compute_mode
4990 2) try a divmod insn in compute_mode
4991 3) try a div insn in compute_mode and multiply-subtract to get
4992 remainder
4993 4) try the same things with widening allowed. */
4994 remainder
4995 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4996 op0, op1, target,
4997 unsignedp,
4998 ((optab_handler (optab2, compute_mode)
4999 != CODE_FOR_nothing)
5000 ? OPTAB_DIRECT : OPTAB_WIDEN));
5001 if (remainder == 0)
5003 /* No luck there. Can we do remainder and divide at once
5004 without a library call? */
5005 remainder = gen_reg_rtx (compute_mode);
5006 if (! expand_twoval_binop ((unsignedp
5007 ? udivmod_optab
5008 : sdivmod_optab),
5009 op0, op1,
5010 NULL_RTX, remainder, unsignedp))
5011 remainder = 0;
5014 if (remainder)
5015 return gen_lowpart (mode, remainder);
5018 /* Produce the quotient. Try a quotient insn, but not a library call.
5019 If we have a divmod in this mode, use it in preference to widening
5020 the div (for this test we assume it will not fail). Note that optab2
5021 is set to the one of the two optabs that the call below will use. */
5022 quotient
5023 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5024 op0, op1, rem_flag ? NULL_RTX : target,
5025 unsignedp,
5026 ((optab_handler (optab2, compute_mode)
5027 != CODE_FOR_nothing)
5028 ? OPTAB_DIRECT : OPTAB_WIDEN));
5030 if (quotient == 0)
5032 /* No luck there. Try a quotient-and-remainder insn,
5033 keeping the quotient alone. */
5034 quotient = gen_reg_rtx (compute_mode);
5035 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5036 op0, op1,
5037 quotient, NULL_RTX, unsignedp))
5039 quotient = 0;
5040 if (! rem_flag)
5041 /* Still no luck. If we are not computing the remainder,
5042 use a library call for the quotient. */
5043 quotient = sign_expand_binop (compute_mode,
5044 udiv_optab, sdiv_optab,
5045 op0, op1, target,
5046 unsignedp, OPTAB_LIB_WIDEN);
5051 if (rem_flag)
5053 if (target && GET_MODE (target) != compute_mode)
5054 target = 0;
5056 if (quotient == 0)
5058 /* No divide instruction either. Use library for remainder. */
5059 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5060 op0, op1, target,
5061 unsignedp, OPTAB_LIB_WIDEN);
5062 /* No remainder function. Try a quotient-and-remainder
5063 function, keeping the remainder. */
5064 if (!remainder)
5066 remainder = gen_reg_rtx (compute_mode);
5067 if (!expand_twoval_binop_libfunc
5068 (unsignedp ? udivmod_optab : sdivmod_optab,
5069 op0, op1,
5070 NULL_RTX, remainder,
5071 unsignedp ? UMOD : MOD))
5072 remainder = NULL_RTX;
5075 else
5077 /* We divided. Now finish doing X - Y * (X / Y). */
5078 remainder = expand_mult (compute_mode, quotient, op1,
5079 NULL_RTX, unsignedp);
5080 remainder = expand_binop (compute_mode, sub_optab, op0,
5081 remainder, target, unsignedp,
5082 OPTAB_LIB_WIDEN);
5086 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5089 /* Return a tree node with data type TYPE, describing the value of X.
5090 Usually this is an VAR_DECL, if there is no obvious better choice.
5091 X may be an expression, however we only support those expressions
5092 generated by loop.c. */
5094 tree
5095 make_tree (tree type, rtx x)
5097 tree t;
5099 switch (GET_CODE (x))
5101 case CONST_INT:
5102 case CONST_WIDE_INT:
5103 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5104 return t;
5106 case CONST_DOUBLE:
5107 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5108 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5109 t = wide_int_to_tree (type,
5110 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5111 HOST_BITS_PER_WIDE_INT * 2));
5112 else
5113 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5115 return t;
5117 case CONST_VECTOR:
5119 int units = CONST_VECTOR_NUNITS (x);
5120 tree itype = TREE_TYPE (type);
5121 tree *elts;
5122 int i;
5124 /* Build a tree with vector elements. */
5125 elts = XALLOCAVEC (tree, units);
5126 for (i = units - 1; i >= 0; --i)
5128 rtx elt = CONST_VECTOR_ELT (x, i);
5129 elts[i] = make_tree (itype, elt);
5132 return build_vector (type, elts);
5135 case PLUS:
5136 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5137 make_tree (type, XEXP (x, 1)));
5139 case MINUS:
5140 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5141 make_tree (type, XEXP (x, 1)));
5143 case NEG:
5144 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5146 case MULT:
5147 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5148 make_tree (type, XEXP (x, 1)));
5150 case ASHIFT:
5151 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5152 make_tree (type, XEXP (x, 1)));
5154 case LSHIFTRT:
5155 t = unsigned_type_for (type);
5156 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5157 make_tree (t, XEXP (x, 0)),
5158 make_tree (type, XEXP (x, 1))));
5160 case ASHIFTRT:
5161 t = signed_type_for (type);
5162 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5163 make_tree (t, XEXP (x, 0)),
5164 make_tree (type, XEXP (x, 1))));
5166 case DIV:
5167 if (TREE_CODE (type) != REAL_TYPE)
5168 t = signed_type_for (type);
5169 else
5170 t = type;
5172 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5173 make_tree (t, XEXP (x, 0)),
5174 make_tree (t, XEXP (x, 1))));
5175 case UDIV:
5176 t = unsigned_type_for (type);
5177 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5178 make_tree (t, XEXP (x, 0)),
5179 make_tree (t, XEXP (x, 1))));
5181 case SIGN_EXTEND:
5182 case ZERO_EXTEND:
5183 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5184 GET_CODE (x) == ZERO_EXTEND);
5185 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5187 case CONST:
5188 return make_tree (type, XEXP (x, 0));
5190 case SYMBOL_REF:
5191 t = SYMBOL_REF_DECL (x);
5192 if (t)
5193 return fold_convert (type, build_fold_addr_expr (t));
5194 /* fall through. */
5196 default:
5197 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5199 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5200 address mode to pointer mode. */
5201 if (POINTER_TYPE_P (type))
5202 x = convert_memory_address_addr_space
5203 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5205 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5206 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5207 t->decl_with_rtl.rtl = x;
5209 return t;
5213 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5214 and returning TARGET.
5216 If TARGET is 0, a pseudo-register or constant is returned. */
5219 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5221 rtx tem = 0;
5223 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5224 tem = simplify_binary_operation (AND, mode, op0, op1);
5225 if (tem == 0)
5226 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5228 if (target == 0)
5229 target = tem;
5230 else if (tem != target)
5231 emit_move_insn (target, tem);
5232 return target;
5235 /* Helper function for emit_store_flag. */
5237 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5238 machine_mode mode, machine_mode compare_mode,
5239 int unsignedp, rtx x, rtx y, int normalizep,
5240 machine_mode target_mode)
5242 struct expand_operand ops[4];
5243 rtx op0, comparison, subtarget;
5244 rtx_insn *last;
5245 machine_mode result_mode = targetm.cstore_mode (icode);
5247 last = get_last_insn ();
5248 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5249 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5250 if (!x || !y)
5252 delete_insns_since (last);
5253 return NULL_RTX;
5256 if (target_mode == VOIDmode)
5257 target_mode = result_mode;
5258 if (!target)
5259 target = gen_reg_rtx (target_mode);
5261 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5263 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5264 create_fixed_operand (&ops[1], comparison);
5265 create_fixed_operand (&ops[2], x);
5266 create_fixed_operand (&ops[3], y);
5267 if (!maybe_expand_insn (icode, 4, ops))
5269 delete_insns_since (last);
5270 return NULL_RTX;
5272 subtarget = ops[0].value;
5274 /* If we are converting to a wider mode, first convert to
5275 TARGET_MODE, then normalize. This produces better combining
5276 opportunities on machines that have a SIGN_EXTRACT when we are
5277 testing a single bit. This mostly benefits the 68k.
5279 If STORE_FLAG_VALUE does not have the sign bit set when
5280 interpreted in MODE, we can do this conversion as unsigned, which
5281 is usually more efficient. */
5282 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5284 convert_move (target, subtarget,
5285 val_signbit_known_clear_p (result_mode,
5286 STORE_FLAG_VALUE));
5287 op0 = target;
5288 result_mode = target_mode;
5290 else
5291 op0 = subtarget;
5293 /* If we want to keep subexpressions around, don't reuse our last
5294 target. */
5295 if (optimize)
5296 subtarget = 0;
5298 /* Now normalize to the proper value in MODE. Sometimes we don't
5299 have to do anything. */
5300 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5302 /* STORE_FLAG_VALUE might be the most negative number, so write
5303 the comparison this way to avoid a compiler-time warning. */
5304 else if (- normalizep == STORE_FLAG_VALUE)
5305 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5307 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5308 it hard to use a value of just the sign bit due to ANSI integer
5309 constant typing rules. */
5310 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5311 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5312 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5313 normalizep == 1);
5314 else
5316 gcc_assert (STORE_FLAG_VALUE & 1);
5318 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5319 if (normalizep == -1)
5320 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5323 /* If we were converting to a smaller mode, do the conversion now. */
5324 if (target_mode != result_mode)
5326 convert_move (target, op0, 0);
5327 return target;
5329 else
5330 return op0;
5334 /* A subroutine of emit_store_flag only including "tricks" that do not
5335 need a recursive call. These are kept separate to avoid infinite
5336 loops. */
5338 static rtx
5339 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5340 machine_mode mode, int unsignedp, int normalizep,
5341 machine_mode target_mode)
5343 rtx subtarget;
5344 enum insn_code icode;
5345 machine_mode compare_mode;
5346 enum mode_class mclass;
5347 enum rtx_code scode;
5349 if (unsignedp)
5350 code = unsigned_condition (code);
5351 scode = swap_condition (code);
5353 /* If one operand is constant, make it the second one. Only do this
5354 if the other operand is not constant as well. */
5356 if (swap_commutative_operands_p (op0, op1))
5358 std::swap (op0, op1);
5359 code = swap_condition (code);
5362 if (mode == VOIDmode)
5363 mode = GET_MODE (op0);
5365 /* For some comparisons with 1 and -1, we can convert this to
5366 comparisons with zero. This will often produce more opportunities for
5367 store-flag insns. */
5369 switch (code)
5371 case LT:
5372 if (op1 == const1_rtx)
5373 op1 = const0_rtx, code = LE;
5374 break;
5375 case LE:
5376 if (op1 == constm1_rtx)
5377 op1 = const0_rtx, code = LT;
5378 break;
5379 case GE:
5380 if (op1 == const1_rtx)
5381 op1 = const0_rtx, code = GT;
5382 break;
5383 case GT:
5384 if (op1 == constm1_rtx)
5385 op1 = const0_rtx, code = GE;
5386 break;
5387 case GEU:
5388 if (op1 == const1_rtx)
5389 op1 = const0_rtx, code = NE;
5390 break;
5391 case LTU:
5392 if (op1 == const1_rtx)
5393 op1 = const0_rtx, code = EQ;
5394 break;
5395 default:
5396 break;
5399 /* If we are comparing a double-word integer with zero or -1, we can
5400 convert the comparison into one involving a single word. */
5401 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5402 && GET_MODE_CLASS (mode) == MODE_INT
5403 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5405 rtx tem;
5406 if ((code == EQ || code == NE)
5407 && (op1 == const0_rtx || op1 == constm1_rtx))
5409 rtx op00, op01;
5411 /* Do a logical OR or AND of the two words and compare the
5412 result. */
5413 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5414 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5415 tem = expand_binop (word_mode,
5416 op1 == const0_rtx ? ior_optab : and_optab,
5417 op00, op01, NULL_RTX, unsignedp,
5418 OPTAB_DIRECT);
5420 if (tem != 0)
5421 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5422 unsignedp, normalizep);
5424 else if ((code == LT || code == GE) && op1 == const0_rtx)
5426 rtx op0h;
5428 /* If testing the sign bit, can just test on high word. */
5429 op0h = simplify_gen_subreg (word_mode, op0, mode,
5430 subreg_highpart_offset (word_mode,
5431 mode));
5432 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5433 unsignedp, normalizep);
5435 else
5436 tem = NULL_RTX;
5438 if (tem)
5440 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5441 return tem;
5442 if (!target)
5443 target = gen_reg_rtx (target_mode);
5445 convert_move (target, tem,
5446 !val_signbit_known_set_p (word_mode,
5447 (normalizep ? normalizep
5448 : STORE_FLAG_VALUE)));
5449 return target;
5453 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5454 complement of A (for GE) and shifting the sign bit to the low bit. */
5455 if (op1 == const0_rtx && (code == LT || code == GE)
5456 && GET_MODE_CLASS (mode) == MODE_INT
5457 && (normalizep || STORE_FLAG_VALUE == 1
5458 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5460 subtarget = target;
5462 if (!target)
5463 target_mode = mode;
5465 /* If the result is to be wider than OP0, it is best to convert it
5466 first. If it is to be narrower, it is *incorrect* to convert it
5467 first. */
5468 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5470 op0 = convert_modes (target_mode, mode, op0, 0);
5471 mode = target_mode;
5474 if (target_mode != mode)
5475 subtarget = 0;
5477 if (code == GE)
5478 op0 = expand_unop (mode, one_cmpl_optab, op0,
5479 ((STORE_FLAG_VALUE == 1 || normalizep)
5480 ? 0 : subtarget), 0);
5482 if (STORE_FLAG_VALUE == 1 || normalizep)
5483 /* If we are supposed to produce a 0/1 value, we want to do
5484 a logical shift from the sign bit to the low-order bit; for
5485 a -1/0 value, we do an arithmetic shift. */
5486 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5487 GET_MODE_BITSIZE (mode) - 1,
5488 subtarget, normalizep != -1);
5490 if (mode != target_mode)
5491 op0 = convert_modes (target_mode, mode, op0, 0);
5493 return op0;
5496 mclass = GET_MODE_CLASS (mode);
5497 for (compare_mode = mode; compare_mode != VOIDmode;
5498 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5500 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5501 icode = optab_handler (cstore_optab, optab_mode);
5502 if (icode != CODE_FOR_nothing)
5504 do_pending_stack_adjust ();
5505 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5506 unsignedp, op0, op1, normalizep, target_mode);
5507 if (tem)
5508 return tem;
5510 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5512 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5513 unsignedp, op1, op0, normalizep, target_mode);
5514 if (tem)
5515 return tem;
5517 break;
5521 return 0;
5524 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5525 and storing in TARGET. Normally return TARGET.
5526 Return 0 if that cannot be done.
5528 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5529 it is VOIDmode, they cannot both be CONST_INT.
5531 UNSIGNEDP is for the case where we have to widen the operands
5532 to perform the operation. It says to use zero-extension.
5534 NORMALIZEP is 1 if we should convert the result to be either zero
5535 or one. Normalize is -1 if we should convert the result to be
5536 either zero or -1. If NORMALIZEP is zero, the result will be left
5537 "raw" out of the scc insn. */
5540 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5541 machine_mode mode, int unsignedp, int normalizep)
5543 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5544 enum rtx_code rcode;
5545 rtx subtarget;
5546 rtx tem, trueval;
5547 rtx_insn *last;
5549 /* If we compare constants, we shouldn't use a store-flag operation,
5550 but a constant load. We can get there via the vanilla route that
5551 usually generates a compare-branch sequence, but will in this case
5552 fold the comparison to a constant, and thus elide the branch. */
5553 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5554 return NULL_RTX;
5556 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5557 target_mode);
5558 if (tem)
5559 return tem;
5561 /* If we reached here, we can't do this with a scc insn, however there
5562 are some comparisons that can be done in other ways. Don't do any
5563 of these cases if branches are very cheap. */
5564 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5565 return 0;
5567 /* See what we need to return. We can only return a 1, -1, or the
5568 sign bit. */
5570 if (normalizep == 0)
5572 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5573 normalizep = STORE_FLAG_VALUE;
5575 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5577 else
5578 return 0;
5581 last = get_last_insn ();
5583 /* If optimizing, use different pseudo registers for each insn, instead
5584 of reusing the same pseudo. This leads to better CSE, but slows
5585 down the compiler, since there are more pseudos */
5586 subtarget = (!optimize
5587 && (target_mode == mode)) ? target : NULL_RTX;
5588 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5590 /* For floating-point comparisons, try the reverse comparison or try
5591 changing the "orderedness" of the comparison. */
5592 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5594 enum rtx_code first_code;
5595 bool and_them;
5597 rcode = reverse_condition_maybe_unordered (code);
5598 if (can_compare_p (rcode, mode, ccp_store_flag)
5599 && (code == ORDERED || code == UNORDERED
5600 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5601 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5603 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5604 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5606 /* For the reverse comparison, use either an addition or a XOR. */
5607 if (want_add
5608 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5609 optimize_insn_for_speed_p ()) == 0)
5611 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5612 STORE_FLAG_VALUE, target_mode);
5613 if (tem)
5614 return expand_binop (target_mode, add_optab, tem,
5615 gen_int_mode (normalizep, target_mode),
5616 target, 0, OPTAB_WIDEN);
5618 else if (!want_add
5619 && rtx_cost (trueval, mode, XOR, 1,
5620 optimize_insn_for_speed_p ()) == 0)
5622 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5623 normalizep, target_mode);
5624 if (tem)
5625 return expand_binop (target_mode, xor_optab, tem, trueval,
5626 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5630 delete_insns_since (last);
5632 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5633 if (code == ORDERED || code == UNORDERED)
5634 return 0;
5636 and_them = split_comparison (code, mode, &first_code, &code);
5638 /* If there are no NaNs, the first comparison should always fall through.
5639 Effectively change the comparison to the other one. */
5640 if (!HONOR_NANS (mode))
5642 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5643 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5644 target_mode);
5647 if (!HAVE_conditional_move)
5648 return 0;
5650 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5651 conditional move. */
5652 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5653 normalizep, target_mode);
5654 if (tem == 0)
5655 return 0;
5657 if (and_them)
5658 tem = emit_conditional_move (target, code, op0, op1, mode,
5659 tem, const0_rtx, GET_MODE (tem), 0);
5660 else
5661 tem = emit_conditional_move (target, code, op0, op1, mode,
5662 trueval, tem, GET_MODE (tem), 0);
5664 if (tem == 0)
5665 delete_insns_since (last);
5666 return tem;
5669 /* The remaining tricks only apply to integer comparisons. */
5671 if (GET_MODE_CLASS (mode) != MODE_INT)
5672 return 0;
5674 /* If this is an equality comparison of integers, we can try to exclusive-or
5675 (or subtract) the two operands and use a recursive call to try the
5676 comparison with zero. Don't do any of these cases if branches are
5677 very cheap. */
5679 if ((code == EQ || code == NE) && op1 != const0_rtx)
5681 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5682 OPTAB_WIDEN);
5684 if (tem == 0)
5685 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5686 OPTAB_WIDEN);
5687 if (tem != 0)
5688 tem = emit_store_flag (target, code, tem, const0_rtx,
5689 mode, unsignedp, normalizep);
5690 if (tem != 0)
5691 return tem;
5693 delete_insns_since (last);
5696 /* For integer comparisons, try the reverse comparison. However, for
5697 small X and if we'd have anyway to extend, implementing "X != 0"
5698 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5699 rcode = reverse_condition (code);
5700 if (can_compare_p (rcode, mode, ccp_store_flag)
5701 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5702 && code == NE
5703 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5704 && op1 == const0_rtx))
5706 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5707 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5709 /* Again, for the reverse comparison, use either an addition or a XOR. */
5710 if (want_add
5711 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5712 optimize_insn_for_speed_p ()) == 0)
5714 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5715 STORE_FLAG_VALUE, target_mode);
5716 if (tem != 0)
5717 tem = expand_binop (target_mode, add_optab, tem,
5718 gen_int_mode (normalizep, target_mode),
5719 target, 0, OPTAB_WIDEN);
5721 else if (!want_add
5722 && rtx_cost (trueval, mode, XOR, 1,
5723 optimize_insn_for_speed_p ()) == 0)
5725 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5726 normalizep, target_mode);
5727 if (tem != 0)
5728 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5729 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5732 if (tem != 0)
5733 return tem;
5734 delete_insns_since (last);
5737 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5738 the constant zero. Reject all other comparisons at this point. Only
5739 do LE and GT if branches are expensive since they are expensive on
5740 2-operand machines. */
5742 if (op1 != const0_rtx
5743 || (code != EQ && code != NE
5744 && (BRANCH_COST (optimize_insn_for_speed_p (),
5745 false) <= 1 || (code != LE && code != GT))))
5746 return 0;
5748 /* Try to put the result of the comparison in the sign bit. Assume we can't
5749 do the necessary operation below. */
5751 tem = 0;
5753 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5754 the sign bit set. */
5756 if (code == LE)
5758 /* This is destructive, so SUBTARGET can't be OP0. */
5759 if (rtx_equal_p (subtarget, op0))
5760 subtarget = 0;
5762 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5763 OPTAB_WIDEN);
5764 if (tem)
5765 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5766 OPTAB_WIDEN);
5769 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5770 number of bits in the mode of OP0, minus one. */
5772 if (code == GT)
5774 if (rtx_equal_p (subtarget, op0))
5775 subtarget = 0;
5777 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5778 GET_MODE_BITSIZE (mode) - 1,
5779 subtarget, 0);
5780 if (tem)
5781 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5782 OPTAB_WIDEN);
5785 if (code == EQ || code == NE)
5787 /* For EQ or NE, one way to do the comparison is to apply an operation
5788 that converts the operand into a positive number if it is nonzero
5789 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5790 for NE we negate. This puts the result in the sign bit. Then we
5791 normalize with a shift, if needed.
5793 Two operations that can do the above actions are ABS and FFS, so try
5794 them. If that doesn't work, and MODE is smaller than a full word,
5795 we can use zero-extension to the wider mode (an unsigned conversion)
5796 as the operation. */
5798 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5799 that is compensated by the subsequent overflow when subtracting
5800 one / negating. */
5802 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5803 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5804 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5805 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5806 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5808 tem = convert_modes (word_mode, mode, op0, 1);
5809 mode = word_mode;
5812 if (tem != 0)
5814 if (code == EQ)
5815 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5816 0, OPTAB_WIDEN);
5817 else
5818 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5821 /* If we couldn't do it that way, for NE we can "or" the two's complement
5822 of the value with itself. For EQ, we take the one's complement of
5823 that "or", which is an extra insn, so we only handle EQ if branches
5824 are expensive. */
5826 if (tem == 0
5827 && (code == NE
5828 || BRANCH_COST (optimize_insn_for_speed_p (),
5829 false) > 1))
5831 if (rtx_equal_p (subtarget, op0))
5832 subtarget = 0;
5834 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5835 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5836 OPTAB_WIDEN);
5838 if (tem && code == EQ)
5839 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5843 if (tem && normalizep)
5844 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5845 GET_MODE_BITSIZE (mode) - 1,
5846 subtarget, normalizep == 1);
5848 if (tem)
5850 if (!target)
5852 else if (GET_MODE (tem) != target_mode)
5854 convert_move (target, tem, 0);
5855 tem = target;
5857 else if (!subtarget)
5859 emit_move_insn (target, tem);
5860 tem = target;
5863 else
5864 delete_insns_since (last);
5866 return tem;
5869 /* Like emit_store_flag, but always succeeds. */
5872 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5873 machine_mode mode, int unsignedp, int normalizep)
5875 rtx tem;
5876 rtx_code_label *label;
5877 rtx trueval, falseval;
5879 /* First see if emit_store_flag can do the job. */
5880 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5881 if (tem != 0)
5882 return tem;
5884 if (!target)
5885 target = gen_reg_rtx (word_mode);
5887 /* If this failed, we have to do this with set/compare/jump/set code.
5888 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5889 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5890 if (code == NE
5891 && GET_MODE_CLASS (mode) == MODE_INT
5892 && REG_P (target)
5893 && op0 == target
5894 && op1 == const0_rtx)
5896 label = gen_label_rtx ();
5897 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5898 NULL_RTX, NULL, label, -1);
5899 emit_move_insn (target, trueval);
5900 emit_label (label);
5901 return target;
5904 if (!REG_P (target)
5905 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5906 target = gen_reg_rtx (GET_MODE (target));
5908 /* Jump in the right direction if the target cannot implement CODE
5909 but can jump on its reverse condition. */
5910 falseval = const0_rtx;
5911 if (! can_compare_p (code, mode, ccp_jump)
5912 && (! FLOAT_MODE_P (mode)
5913 || code == ORDERED || code == UNORDERED
5914 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5915 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5917 enum rtx_code rcode;
5918 if (FLOAT_MODE_P (mode))
5919 rcode = reverse_condition_maybe_unordered (code);
5920 else
5921 rcode = reverse_condition (code);
5923 /* Canonicalize to UNORDERED for the libcall. */
5924 if (can_compare_p (rcode, mode, ccp_jump)
5925 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5927 falseval = trueval;
5928 trueval = const0_rtx;
5929 code = rcode;
5933 emit_move_insn (target, trueval);
5934 label = gen_label_rtx ();
5935 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5936 label, -1);
5938 emit_move_insn (target, falseval);
5939 emit_label (label);
5941 return target;
5944 /* Perform possibly multi-word comparison and conditional jump to LABEL
5945 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5946 now a thin wrapper around do_compare_rtx_and_jump. */
5948 static void
5949 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5950 rtx_code_label *label)
5952 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5953 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5954 NULL, label, -1);