Simplify X / X, 0 / X and X % X
[official-gcc.git] / gcc / expmed.c
bloba21a632ab189f9226879ccf3704401060e61df0e
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2016 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "emit-rtl.h"
35 #include "diagnostic-core.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "dojump.h"
39 #include "explow.h"
40 #include "expr.h"
41 #include "langhooks.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 rtx, bool);
53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 rtx, bool);
56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT,
59 unsigned HOST_WIDE_INT,
60 rtx, bool);
61 static rtx extract_fixed_bit_field (machine_mode, rtx,
62 unsigned HOST_WIDE_INT,
63 unsigned HOST_WIDE_INT, rtx, int, bool);
64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
65 unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, rtx, int, bool);
67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
69 unsigned HOST_WIDE_INT, int, bool);
70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
75 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
76 The mask is truncated if necessary to the width of mode MODE. The
77 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
79 static inline rtx
80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
82 return immed_wide_int_const
83 (wi::shifted_mask (bitpos, bitsize, complement,
84 GET_MODE_PRECISION (mode)), mode);
87 /* Test whether a value is zero of a power of two. */
88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
89 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
91 struct init_expmed_rtl
93 rtx reg;
94 rtx plus;
95 rtx neg;
96 rtx mult;
97 rtx sdiv;
98 rtx udiv;
99 rtx sdiv_32;
100 rtx smod_32;
101 rtx wide_mult;
102 rtx wide_lshr;
103 rtx wide_trunc;
104 rtx shift;
105 rtx shift_mult;
106 rtx shift_add;
107 rtx shift_sub0;
108 rtx shift_sub1;
109 rtx zext;
110 rtx trunc;
112 rtx pow2[MAX_BITS_PER_WORD];
113 rtx cint[MAX_BITS_PER_WORD];
116 static void
117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
118 machine_mode from_mode, bool speed)
120 int to_size, from_size;
121 rtx which;
123 to_size = GET_MODE_PRECISION (to_mode);
124 from_size = GET_MODE_PRECISION (from_mode);
126 /* Most partial integers have a precision less than the "full"
127 integer it requires for storage. In case one doesn't, for
128 comparison purposes here, reduce the bit size by one in that
129 case. */
130 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
131 && pow2p_hwi (to_size))
132 to_size --;
133 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
134 && pow2p_hwi (from_size))
135 from_size --;
137 /* Assume cost of zero-extend and sign-extend is the same. */
138 which = (to_size < from_size ? all->trunc : all->zext);
140 PUT_MODE (all->reg, from_mode);
141 set_convert_cost (to_mode, from_mode, speed,
142 set_src_cost (which, to_mode, speed));
145 static void
146 init_expmed_one_mode (struct init_expmed_rtl *all,
147 machine_mode mode, int speed)
149 int m, n, mode_bitsize;
150 machine_mode mode_from;
152 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
154 PUT_MODE (all->reg, mode);
155 PUT_MODE (all->plus, mode);
156 PUT_MODE (all->neg, mode);
157 PUT_MODE (all->mult, mode);
158 PUT_MODE (all->sdiv, mode);
159 PUT_MODE (all->udiv, mode);
160 PUT_MODE (all->sdiv_32, mode);
161 PUT_MODE (all->smod_32, mode);
162 PUT_MODE (all->wide_trunc, mode);
163 PUT_MODE (all->shift, mode);
164 PUT_MODE (all->shift_mult, mode);
165 PUT_MODE (all->shift_add, mode);
166 PUT_MODE (all->shift_sub0, mode);
167 PUT_MODE (all->shift_sub1, mode);
168 PUT_MODE (all->zext, mode);
169 PUT_MODE (all->trunc, mode);
171 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
172 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
173 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
174 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
175 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
177 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
178 <= 2 * add_cost (speed, mode)));
179 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
180 <= 4 * add_cost (speed, mode)));
182 set_shift_cost (speed, mode, 0, 0);
184 int cost = add_cost (speed, mode);
185 set_shiftadd_cost (speed, mode, 0, cost);
186 set_shiftsub0_cost (speed, mode, 0, cost);
187 set_shiftsub1_cost (speed, mode, 0, cost);
190 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
191 for (m = 1; m < n; m++)
193 XEXP (all->shift, 1) = all->cint[m];
194 XEXP (all->shift_mult, 1) = all->pow2[m];
196 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
197 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
198 speed));
199 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
200 speed));
201 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
202 speed));
205 if (SCALAR_INT_MODE_P (mode))
207 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
208 mode_from = (machine_mode)(mode_from + 1))
209 init_expmed_one_conv (all, mode, mode_from, speed);
211 if (GET_MODE_CLASS (mode) == MODE_INT)
213 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
214 if (wider_mode != VOIDmode)
216 PUT_MODE (all->zext, wider_mode);
217 PUT_MODE (all->wide_mult, wider_mode);
218 PUT_MODE (all->wide_lshr, wider_mode);
219 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
221 set_mul_widen_cost (speed, wider_mode,
222 set_src_cost (all->wide_mult, wider_mode, speed));
223 set_mul_highpart_cost (speed, mode,
224 set_src_cost (all->wide_trunc, mode, speed));
229 void
230 init_expmed (void)
232 struct init_expmed_rtl all;
233 machine_mode mode = QImode;
234 int m, speed;
236 memset (&all, 0, sizeof all);
237 for (m = 1; m < MAX_BITS_PER_WORD; m++)
239 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
240 all.cint[m] = GEN_INT (m);
243 /* Avoid using hard regs in ways which may be unsupported. */
244 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
245 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
246 all.neg = gen_rtx_NEG (mode, all.reg);
247 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
248 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
249 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
250 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
251 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
252 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
253 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
254 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
255 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
256 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
257 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
258 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
259 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
260 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
261 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
263 for (speed = 0; speed < 2; speed++)
265 crtl->maybe_hot_insn_p = speed;
266 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
268 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
269 mode = (machine_mode)(mode + 1))
270 init_expmed_one_mode (&all, mode, speed);
272 if (MIN_MODE_PARTIAL_INT != VOIDmode)
273 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
274 mode = (machine_mode)(mode + 1))
275 init_expmed_one_mode (&all, mode, speed);
277 if (MIN_MODE_VECTOR_INT != VOIDmode)
278 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
279 mode = (machine_mode)(mode + 1))
280 init_expmed_one_mode (&all, mode, speed);
283 if (alg_hash_used_p ())
285 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
286 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
288 else
289 set_alg_hash_used_p (true);
290 default_rtl_profile ();
292 ggc_free (all.trunc);
293 ggc_free (all.shift_sub1);
294 ggc_free (all.shift_sub0);
295 ggc_free (all.shift_add);
296 ggc_free (all.shift_mult);
297 ggc_free (all.shift);
298 ggc_free (all.wide_trunc);
299 ggc_free (all.wide_lshr);
300 ggc_free (all.wide_mult);
301 ggc_free (all.zext);
302 ggc_free (all.smod_32);
303 ggc_free (all.sdiv_32);
304 ggc_free (all.udiv);
305 ggc_free (all.sdiv);
306 ggc_free (all.mult);
307 ggc_free (all.neg);
308 ggc_free (all.plus);
309 ggc_free (all.reg);
312 /* Return an rtx representing minus the value of X.
313 MODE is the intended mode of the result,
314 useful if X is a CONST_INT. */
317 negate_rtx (machine_mode mode, rtx x)
319 rtx result = simplify_unary_operation (NEG, mode, x, mode);
321 if (result == 0)
322 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
324 return result;
327 /* Whether reverse storage order is supported on the target. */
328 static int reverse_storage_order_supported = -1;
330 /* Check whether reverse storage order is supported on the target. */
332 static void
333 check_reverse_storage_order_support (void)
335 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
337 reverse_storage_order_supported = 0;
338 sorry ("reverse scalar storage order");
340 else
341 reverse_storage_order_supported = 1;
344 /* Whether reverse FP storage order is supported on the target. */
345 static int reverse_float_storage_order_supported = -1;
347 /* Check whether reverse FP storage order is supported on the target. */
349 static void
350 check_reverse_float_storage_order_support (void)
352 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
354 reverse_float_storage_order_supported = 0;
355 sorry ("reverse floating-point scalar storage order");
357 else
358 reverse_float_storage_order_supported = 1;
361 /* Return an rtx representing value of X with reverse storage order.
362 MODE is the intended mode of the result,
363 useful if X is a CONST_INT. */
366 flip_storage_order (enum machine_mode mode, rtx x)
368 enum machine_mode int_mode;
369 rtx result;
371 if (mode == QImode)
372 return x;
374 if (COMPLEX_MODE_P (mode))
376 rtx real = read_complex_part (x, false);
377 rtx imag = read_complex_part (x, true);
379 real = flip_storage_order (GET_MODE_INNER (mode), real);
380 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
382 return gen_rtx_CONCAT (mode, real, imag);
385 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
386 check_reverse_storage_order_support ();
388 if (SCALAR_INT_MODE_P (mode))
389 int_mode = mode;
390 else
392 if (FLOAT_MODE_P (mode)
393 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
394 check_reverse_float_storage_order_support ();
396 int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
397 if (int_mode == BLKmode)
399 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
400 return x;
402 x = gen_lowpart (int_mode, x);
405 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
406 if (result == 0)
407 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
409 if (int_mode != mode)
410 result = gen_lowpart (mode, result);
412 return result;
415 /* Adjust bitfield memory MEM so that it points to the first unit of mode
416 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
417 If MODE is BLKmode, return a reference to every byte in the bitfield.
418 Set *NEW_BITNUM to the bit position of the field within the new memory. */
420 static rtx
421 narrow_bit_field_mem (rtx mem, machine_mode mode,
422 unsigned HOST_WIDE_INT bitsize,
423 unsigned HOST_WIDE_INT bitnum,
424 unsigned HOST_WIDE_INT *new_bitnum)
426 if (mode == BLKmode)
428 *new_bitnum = bitnum % BITS_PER_UNIT;
429 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
430 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
431 / BITS_PER_UNIT);
432 return adjust_bitfield_address_size (mem, mode, offset, size);
434 else
436 unsigned int unit = GET_MODE_BITSIZE (mode);
437 *new_bitnum = bitnum % unit;
438 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
439 return adjust_bitfield_address (mem, mode, offset);
443 /* The caller wants to perform insertion or extraction PATTERN on a
444 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
445 BITREGION_START and BITREGION_END are as for store_bit_field
446 and FIELDMODE is the natural mode of the field.
448 Search for a mode that is compatible with the memory access
449 restrictions and (where applicable) with a register insertion or
450 extraction. Return the new memory on success, storing the adjusted
451 bit position in *NEW_BITNUM. Return null otherwise. */
453 static rtx
454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
455 rtx op0, HOST_WIDE_INT bitsize,
456 HOST_WIDE_INT bitnum,
457 unsigned HOST_WIDE_INT bitregion_start,
458 unsigned HOST_WIDE_INT bitregion_end,
459 machine_mode fieldmode,
460 unsigned HOST_WIDE_INT *new_bitnum)
462 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
463 bitregion_end, MEM_ALIGN (op0),
464 MEM_VOLATILE_P (op0));
465 machine_mode best_mode;
466 if (iter.next_mode (&best_mode))
468 /* We can use a memory in BEST_MODE. See whether this is true for
469 any wider modes. All other things being equal, we prefer to
470 use the widest mode possible because it tends to expose more
471 CSE opportunities. */
472 if (!iter.prefer_smaller_modes ())
474 /* Limit the search to the mode required by the corresponding
475 register insertion or extraction instruction, if any. */
476 machine_mode limit_mode = word_mode;
477 extraction_insn insn;
478 if (get_best_reg_extraction_insn (&insn, pattern,
479 GET_MODE_BITSIZE (best_mode),
480 fieldmode))
481 limit_mode = insn.field_mode;
483 machine_mode wider_mode;
484 while (iter.next_mode (&wider_mode)
485 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
486 best_mode = wider_mode;
488 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
489 new_bitnum);
491 return NULL_RTX;
494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
495 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
496 offset is then BITNUM / BITS_PER_UNIT. */
498 static bool
499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
500 unsigned HOST_WIDE_INT bitsize,
501 machine_mode struct_mode)
503 if (BYTES_BIG_ENDIAN)
504 return (bitnum % BITS_PER_UNIT == 0
505 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
506 || (bitnum + bitsize) % BITS_PER_WORD == 0));
507 else
508 return bitnum % BITS_PER_WORD == 0;
511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
512 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
513 Return false if the access would touch memory outside the range
514 BITREGION_START to BITREGION_END for conformance to the C++ memory
515 model. */
517 static bool
518 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
519 unsigned HOST_WIDE_INT bitnum,
520 machine_mode fieldmode,
521 unsigned HOST_WIDE_INT bitregion_start,
522 unsigned HOST_WIDE_INT bitregion_end)
524 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
526 /* -fstrict-volatile-bitfields must be enabled and we must have a
527 volatile MEM. */
528 if (!MEM_P (op0)
529 || !MEM_VOLATILE_P (op0)
530 || flag_strict_volatile_bitfields <= 0)
531 return false;
533 /* Non-integral modes likely only happen with packed structures.
534 Punt. */
535 if (!SCALAR_INT_MODE_P (fieldmode))
536 return false;
538 /* The bit size must not be larger than the field mode, and
539 the field mode must not be larger than a word. */
540 if (bitsize > modesize || modesize > BITS_PER_WORD)
541 return false;
543 /* Check for cases of unaligned fields that must be split. */
544 if (bitnum % modesize + bitsize > modesize)
545 return false;
547 /* The memory must be sufficiently aligned for a MODESIZE access.
548 This condition guarantees, that the memory access will not
549 touch anything after the end of the structure. */
550 if (MEM_ALIGN (op0) < modesize)
551 return false;
553 /* Check for cases where the C++ memory model applies. */
554 if (bitregion_end != 0
555 && (bitnum - bitnum % modesize < bitregion_start
556 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
557 return false;
559 return true;
562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
563 bit number BITNUM can be treated as a simple value of mode MODE. */
565 static bool
566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
567 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
569 return (MEM_P (op0)
570 && bitnum % BITS_PER_UNIT == 0
571 && bitsize == GET_MODE_BITSIZE (mode)
572 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
573 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
574 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
577 /* Try to use instruction INSV to store VALUE into a field of OP0.
578 BITSIZE and BITNUM are as for store_bit_field. */
580 static bool
581 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
582 unsigned HOST_WIDE_INT bitsize,
583 unsigned HOST_WIDE_INT bitnum,
584 rtx value)
586 struct expand_operand ops[4];
587 rtx value1;
588 rtx xop0 = op0;
589 rtx_insn *last = get_last_insn ();
590 bool copy_back = false;
592 machine_mode op_mode = insv->field_mode;
593 unsigned int unit = GET_MODE_BITSIZE (op_mode);
594 if (bitsize == 0 || bitsize > unit)
595 return false;
597 if (MEM_P (xop0))
598 /* Get a reference to the first byte of the field. */
599 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
600 &bitnum);
601 else
603 /* Convert from counting within OP0 to counting in OP_MODE. */
604 if (BYTES_BIG_ENDIAN)
605 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
607 /* If xop0 is a register, we need it in OP_MODE
608 to make it acceptable to the format of insv. */
609 if (GET_CODE (xop0) == SUBREG)
610 /* We can't just change the mode, because this might clobber op0,
611 and we will need the original value of op0 if insv fails. */
612 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
613 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
614 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
617 /* If the destination is a paradoxical subreg such that we need a
618 truncate to the inner mode, perform the insertion on a temporary and
619 truncate the result to the original destination. Note that we can't
620 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
621 X) 0)) is (reg:N X). */
622 if (GET_CODE (xop0) == SUBREG
623 && REG_P (SUBREG_REG (xop0))
624 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
625 op_mode))
627 rtx tem = gen_reg_rtx (op_mode);
628 emit_move_insn (tem, xop0);
629 xop0 = tem;
630 copy_back = true;
633 /* There are similar overflow check at the start of store_bit_field_1,
634 but that only check the situation where the field lies completely
635 outside the register, while there do have situation where the field
636 lies partialy in the register, we need to adjust bitsize for this
637 partial overflow situation. Without this fix, pr48335-2.c on big-endian
638 will broken on those arch support bit insert instruction, like arm, aarch64
639 etc. */
640 if (bitsize + bitnum > unit && bitnum < unit)
642 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
643 "destination object, data truncated into %wu-bit",
644 bitsize, unit - bitnum);
645 bitsize = unit - bitnum;
648 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
649 "backwards" from the size of the unit we are inserting into.
650 Otherwise, we count bits from the most significant on a
651 BYTES/BITS_BIG_ENDIAN machine. */
653 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
654 bitnum = unit - bitsize - bitnum;
656 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
657 value1 = value;
658 if (GET_MODE (value) != op_mode)
660 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
662 rtx tmp;
663 /* Optimization: Don't bother really extending VALUE
664 if it has all the bits we will actually use. However,
665 if we must narrow it, be sure we do it correctly. */
667 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
669 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
670 if (! tmp)
671 tmp = simplify_gen_subreg (op_mode,
672 force_reg (GET_MODE (value),
673 value1),
674 GET_MODE (value), 0);
676 else
678 tmp = gen_lowpart_if_possible (op_mode, value1);
679 if (! tmp)
680 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
681 value1));
683 value1 = tmp;
685 else if (CONST_INT_P (value))
686 value1 = gen_int_mode (INTVAL (value), op_mode);
687 else
688 /* Parse phase is supposed to make VALUE's data type
689 match that of the component reference, which is a type
690 at least as wide as the field; so VALUE should have
691 a mode that corresponds to that type. */
692 gcc_assert (CONSTANT_P (value));
695 create_fixed_operand (&ops[0], xop0);
696 create_integer_operand (&ops[1], bitsize);
697 create_integer_operand (&ops[2], bitnum);
698 create_input_operand (&ops[3], value1, op_mode);
699 if (maybe_expand_insn (insv->icode, 4, ops))
701 if (copy_back)
702 convert_move (op0, xop0, true);
703 return true;
705 delete_insns_since (last);
706 return false;
709 /* A subroutine of store_bit_field, with the same arguments. Return true
710 if the operation could be implemented.
712 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
713 no other way of implementing the operation. If FALLBACK_P is false,
714 return false instead. */
716 static bool
717 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
718 unsigned HOST_WIDE_INT bitnum,
719 unsigned HOST_WIDE_INT bitregion_start,
720 unsigned HOST_WIDE_INT bitregion_end,
721 machine_mode fieldmode,
722 rtx value, bool reverse, bool fallback_p)
724 rtx op0 = str_rtx;
725 rtx orig_value;
727 while (GET_CODE (op0) == SUBREG)
729 /* The following line once was done only if WORDS_BIG_ENDIAN,
730 but I think that is a mistake. WORDS_BIG_ENDIAN is
731 meaningful at a much higher level; when structures are copied
732 between memory and regs, the higher-numbered regs
733 always get higher addresses. */
734 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
735 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
736 int byte_offset = 0;
738 /* Paradoxical subregs need special handling on big-endian machines. */
739 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
741 int difference = inner_mode_size - outer_mode_size;
743 if (WORDS_BIG_ENDIAN)
744 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
745 if (BYTES_BIG_ENDIAN)
746 byte_offset += difference % UNITS_PER_WORD;
748 else
749 byte_offset = SUBREG_BYTE (op0);
751 bitnum += byte_offset * BITS_PER_UNIT;
752 op0 = SUBREG_REG (op0);
755 /* No action is needed if the target is a register and if the field
756 lies completely outside that register. This can occur if the source
757 code contains an out-of-bounds access to a small array. */
758 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
759 return true;
761 /* Use vec_set patterns for inserting parts of vectors whenever
762 available. */
763 if (VECTOR_MODE_P (GET_MODE (op0))
764 && !MEM_P (op0)
765 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
766 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
767 && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
768 && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
770 struct expand_operand ops[3];
771 machine_mode outermode = GET_MODE (op0);
772 machine_mode innermode = GET_MODE_INNER (outermode);
773 enum insn_code icode = optab_handler (vec_set_optab, outermode);
774 int pos = bitnum / GET_MODE_BITSIZE (innermode);
776 create_fixed_operand (&ops[0], op0);
777 create_input_operand (&ops[1], value, innermode);
778 create_integer_operand (&ops[2], pos);
779 if (maybe_expand_insn (icode, 3, ops))
780 return true;
783 /* If the target is a register, overwriting the entire object, or storing
784 a full-word or multi-word field can be done with just a SUBREG. */
785 if (!MEM_P (op0)
786 && bitsize == GET_MODE_BITSIZE (fieldmode)
787 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
788 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
790 /* Use the subreg machinery either to narrow OP0 to the required
791 words or to cope with mode punning between equal-sized modes.
792 In the latter case, use subreg on the rhs side, not lhs. */
793 rtx sub;
795 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
797 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
798 if (sub)
800 if (reverse)
801 sub = flip_storage_order (GET_MODE (op0), sub);
802 emit_move_insn (op0, sub);
803 return true;
806 else
808 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
809 bitnum / BITS_PER_UNIT);
810 if (sub)
812 if (reverse)
813 value = flip_storage_order (fieldmode, value);
814 emit_move_insn (sub, value);
815 return true;
820 /* If the target is memory, storing any naturally aligned field can be
821 done with a simple store. For targets that support fast unaligned
822 memory, any naturally sized, unit aligned field can be done directly. */
823 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
825 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
826 if (reverse)
827 value = flip_storage_order (fieldmode, value);
828 emit_move_insn (op0, value);
829 return true;
832 /* Make sure we are playing with integral modes. Pun with subregs
833 if we aren't. This must come after the entire register case above,
834 since that case is valid for any mode. The following cases are only
835 valid for integral modes. */
837 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
838 if (imode != GET_MODE (op0))
840 if (MEM_P (op0))
841 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
842 else
844 gcc_assert (imode != BLKmode);
845 op0 = gen_lowpart (imode, op0);
850 /* Storing an lsb-aligned field in a register
851 can be done with a movstrict instruction. */
853 if (!MEM_P (op0)
854 && !reverse
855 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
856 && bitsize == GET_MODE_BITSIZE (fieldmode)
857 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
859 struct expand_operand ops[2];
860 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
861 rtx arg0 = op0;
862 unsigned HOST_WIDE_INT subreg_off;
864 if (GET_CODE (arg0) == SUBREG)
866 /* Else we've got some float mode source being extracted into
867 a different float mode destination -- this combination of
868 subregs results in Severe Tire Damage. */
869 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
870 || GET_MODE_CLASS (fieldmode) == MODE_INT
871 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
872 arg0 = SUBREG_REG (arg0);
875 subreg_off = bitnum / BITS_PER_UNIT;
876 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
878 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
880 create_fixed_operand (&ops[0], arg0);
881 /* Shrink the source operand to FIELDMODE. */
882 create_convert_operand_to (&ops[1], value, fieldmode, false);
883 if (maybe_expand_insn (icode, 2, ops))
884 return true;
888 /* Handle fields bigger than a word. */
890 if (bitsize > BITS_PER_WORD)
892 /* Here we transfer the words of the field
893 in the order least significant first.
894 This is because the most significant word is the one which may
895 be less than full.
896 However, only do that if the value is not BLKmode. */
898 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
899 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
900 unsigned int i;
901 rtx_insn *last;
903 /* This is the mode we must force value to, so that there will be enough
904 subwords to extract. Note that fieldmode will often (always?) be
905 VOIDmode, because that is what store_field uses to indicate that this
906 is a bit field, but passing VOIDmode to operand_subword_force
907 is not allowed. */
908 fieldmode = GET_MODE (value);
909 if (fieldmode == VOIDmode)
910 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
912 last = get_last_insn ();
913 for (i = 0; i < nwords; i++)
915 /* If I is 0, use the low-order word in both field and target;
916 if I is 1, use the next to lowest word; and so on. */
917 unsigned int wordnum = (backwards
918 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
919 - i - 1
920 : i);
921 unsigned int bit_offset = (backwards ^ reverse
922 ? MAX ((int) bitsize - ((int) i + 1)
923 * BITS_PER_WORD,
925 : (int) i * BITS_PER_WORD);
926 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
927 unsigned HOST_WIDE_INT new_bitsize =
928 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
930 /* If the remaining chunk doesn't have full wordsize we have
931 to make sure that for big-endian machines the higher order
932 bits are used. */
933 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
934 value_word = simplify_expand_binop (word_mode, lshr_optab,
935 value_word,
936 GEN_INT (BITS_PER_WORD
937 - new_bitsize),
938 NULL_RTX, true,
939 OPTAB_LIB_WIDEN);
941 if (!store_bit_field_1 (op0, new_bitsize,
942 bitnum + bit_offset,
943 bitregion_start, bitregion_end,
944 word_mode,
945 value_word, reverse, fallback_p))
947 delete_insns_since (last);
948 return false;
951 return true;
954 /* If VALUE has a floating-point or complex mode, access it as an
955 integer of the corresponding size. This can occur on a machine
956 with 64 bit registers that uses SFmode for float. It can also
957 occur for unaligned float or complex fields. */
958 orig_value = value;
959 if (GET_MODE (value) != VOIDmode
960 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
961 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
963 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
964 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
967 /* If OP0 is a multi-word register, narrow it to the affected word.
968 If the region spans two words, defer to store_split_bit_field. */
969 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
971 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
973 if (!fallback_p)
974 return false;
976 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
977 bitregion_end, value, reverse);
978 return true;
980 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
981 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
982 gcc_assert (op0);
983 bitnum %= BITS_PER_WORD;
986 /* From here on we can assume that the field to be stored in fits
987 within a word. If the destination is a register, it too fits
988 in a word. */
990 extraction_insn insv;
991 if (!MEM_P (op0)
992 && !reverse
993 && get_best_reg_extraction_insn (&insv, EP_insv,
994 GET_MODE_BITSIZE (GET_MODE (op0)),
995 fieldmode)
996 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
997 return true;
999 /* If OP0 is a memory, try copying it to a register and seeing if a
1000 cheap register alternative is available. */
1001 if (MEM_P (op0) && !reverse)
1003 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1004 fieldmode)
1005 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1006 return true;
1008 rtx_insn *last = get_last_insn ();
1010 /* Try loading part of OP0 into a register, inserting the bitfield
1011 into that, and then copying the result back to OP0. */
1012 unsigned HOST_WIDE_INT bitpos;
1013 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1014 bitregion_start, bitregion_end,
1015 fieldmode, &bitpos);
1016 if (xop0)
1018 rtx tempreg = copy_to_reg (xop0);
1019 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1020 bitregion_start, bitregion_end,
1021 fieldmode, orig_value, reverse, false))
1023 emit_move_insn (xop0, tempreg);
1024 return true;
1026 delete_insns_since (last);
1030 if (!fallback_p)
1031 return false;
1033 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1034 bitregion_end, value, reverse);
1035 return true;
1038 /* Generate code to store value from rtx VALUE
1039 into a bit-field within structure STR_RTX
1040 containing BITSIZE bits starting at bit BITNUM.
1042 BITREGION_START is bitpos of the first bitfield in this region.
1043 BITREGION_END is the bitpos of the ending bitfield in this region.
1044 These two fields are 0, if the C++ memory model does not apply,
1045 or we are not interested in keeping track of bitfield regions.
1047 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1049 If REVERSE is true, the store is to be done in reverse order. */
1051 void
1052 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1053 unsigned HOST_WIDE_INT bitnum,
1054 unsigned HOST_WIDE_INT bitregion_start,
1055 unsigned HOST_WIDE_INT bitregion_end,
1056 machine_mode fieldmode,
1057 rtx value, bool reverse)
1059 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1060 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1061 bitregion_start, bitregion_end))
1063 /* Storing of a full word can be done with a simple store.
1064 We know here that the field can be accessed with one single
1065 instruction. For targets that support unaligned memory,
1066 an unaligned access may be necessary. */
1067 if (bitsize == GET_MODE_BITSIZE (fieldmode))
1069 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1070 bitnum / BITS_PER_UNIT);
1071 if (reverse)
1072 value = flip_storage_order (fieldmode, value);
1073 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1074 emit_move_insn (str_rtx, value);
1076 else
1078 rtx temp;
1080 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1081 &bitnum);
1082 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1083 temp = copy_to_reg (str_rtx);
1084 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1085 fieldmode, value, reverse, true))
1086 gcc_unreachable ();
1088 emit_move_insn (str_rtx, temp);
1091 return;
1094 /* Under the C++0x memory model, we must not touch bits outside the
1095 bit region. Adjust the address to start at the beginning of the
1096 bit region. */
1097 if (MEM_P (str_rtx) && bitregion_start > 0)
1099 machine_mode bestmode;
1100 HOST_WIDE_INT offset, size;
1102 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1104 offset = bitregion_start / BITS_PER_UNIT;
1105 bitnum -= bitregion_start;
1106 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1107 bitregion_end -= bitregion_start;
1108 bitregion_start = 0;
1109 bestmode = get_best_mode (bitsize, bitnum,
1110 bitregion_start, bitregion_end,
1111 MEM_ALIGN (str_rtx), VOIDmode,
1112 MEM_VOLATILE_P (str_rtx));
1113 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1116 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1117 bitregion_start, bitregion_end,
1118 fieldmode, value, reverse, true))
1119 gcc_unreachable ();
1122 /* Use shifts and boolean operations to store VALUE into a bit field of
1123 width BITSIZE in OP0, starting at bit BITNUM.
1125 If REVERSE is true, the store is to be done in reverse order. */
1127 static void
1128 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1129 unsigned HOST_WIDE_INT bitnum,
1130 unsigned HOST_WIDE_INT bitregion_start,
1131 unsigned HOST_WIDE_INT bitregion_end,
1132 rtx value, bool reverse)
1134 /* There is a case not handled here:
1135 a structure with a known alignment of just a halfword
1136 and a field split across two aligned halfwords within the structure.
1137 Or likewise a structure with a known alignment of just a byte
1138 and a field split across two bytes.
1139 Such cases are not supposed to be able to occur. */
1141 if (MEM_P (op0))
1143 machine_mode mode = GET_MODE (op0);
1144 if (GET_MODE_BITSIZE (mode) == 0
1145 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1146 mode = word_mode;
1147 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1148 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1150 if (mode == VOIDmode)
1152 /* The only way this should occur is if the field spans word
1153 boundaries. */
1154 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1155 bitregion_end, value, reverse);
1156 return;
1159 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1162 store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1165 /* Helper function for store_fixed_bit_field, stores
1166 the bit field always using the MODE of OP0. */
1168 static void
1169 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1170 unsigned HOST_WIDE_INT bitnum,
1171 rtx value, bool reverse)
1173 machine_mode mode;
1174 rtx temp;
1175 int all_zero = 0;
1176 int all_one = 0;
1178 mode = GET_MODE (op0);
1179 gcc_assert (SCALAR_INT_MODE_P (mode));
1181 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1182 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1184 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1185 /* BITNUM is the distance between our msb
1186 and that of the containing datum.
1187 Convert it to the distance from the lsb. */
1188 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1190 /* Now BITNUM is always the distance between our lsb
1191 and that of OP0. */
1193 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1194 we must first convert its mode to MODE. */
1196 if (CONST_INT_P (value))
1198 unsigned HOST_WIDE_INT v = UINTVAL (value);
1200 if (bitsize < HOST_BITS_PER_WIDE_INT)
1201 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1203 if (v == 0)
1204 all_zero = 1;
1205 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1206 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1207 || (bitsize == HOST_BITS_PER_WIDE_INT
1208 && v == HOST_WIDE_INT_M1U))
1209 all_one = 1;
1211 value = lshift_value (mode, v, bitnum);
1213 else
1215 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1216 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1218 if (GET_MODE (value) != mode)
1219 value = convert_to_mode (mode, value, 1);
1221 if (must_and)
1222 value = expand_binop (mode, and_optab, value,
1223 mask_rtx (mode, 0, bitsize, 0),
1224 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1225 if (bitnum > 0)
1226 value = expand_shift (LSHIFT_EXPR, mode, value,
1227 bitnum, NULL_RTX, 1);
1230 if (reverse)
1231 value = flip_storage_order (mode, value);
1233 /* Now clear the chosen bits in OP0,
1234 except that if VALUE is -1 we need not bother. */
1235 /* We keep the intermediates in registers to allow CSE to combine
1236 consecutive bitfield assignments. */
1238 temp = force_reg (mode, op0);
1240 if (! all_one)
1242 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1243 if (reverse)
1244 mask = flip_storage_order (mode, mask);
1245 temp = expand_binop (mode, and_optab, temp, mask,
1246 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1247 temp = force_reg (mode, temp);
1250 /* Now logical-or VALUE into OP0, unless it is zero. */
1252 if (! all_zero)
1254 temp = expand_binop (mode, ior_optab, temp, value,
1255 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1256 temp = force_reg (mode, temp);
1259 if (op0 != temp)
1261 op0 = copy_rtx (op0);
1262 emit_move_insn (op0, temp);
1266 /* Store a bit field that is split across multiple accessible memory objects.
1268 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1269 BITSIZE is the field width; BITPOS the position of its first bit
1270 (within the word).
1271 VALUE is the value to store.
1273 If REVERSE is true, the store is to be done in reverse order.
1275 This does not yet handle fields wider than BITS_PER_WORD. */
1277 static void
1278 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1279 unsigned HOST_WIDE_INT bitpos,
1280 unsigned HOST_WIDE_INT bitregion_start,
1281 unsigned HOST_WIDE_INT bitregion_end,
1282 rtx value, bool reverse)
1284 unsigned int unit, total_bits, bitsdone = 0;
1286 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1287 much at a time. */
1288 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1289 unit = BITS_PER_WORD;
1290 else
1291 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1293 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1294 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1295 again, and we will mutually recurse forever. */
1296 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1297 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1299 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1300 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1301 that VALUE might be a floating-point constant. */
1302 if (CONSTANT_P (value) && !CONST_INT_P (value))
1304 rtx word = gen_lowpart_common (word_mode, value);
1306 if (word && (value != word))
1307 value = word;
1308 else
1309 value = gen_lowpart_common (word_mode,
1310 force_reg (GET_MODE (value) != VOIDmode
1311 ? GET_MODE (value)
1312 : word_mode, value));
1315 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1317 while (bitsdone < bitsize)
1319 unsigned HOST_WIDE_INT thissize;
1320 unsigned HOST_WIDE_INT thispos;
1321 unsigned HOST_WIDE_INT offset;
1322 rtx part, word;
1324 offset = (bitpos + bitsdone) / unit;
1325 thispos = (bitpos + bitsdone) % unit;
1327 /* When region of bytes we can touch is restricted, decrease
1328 UNIT close to the end of the region as needed. If op0 is a REG
1329 or SUBREG of REG, don't do this, as there can't be data races
1330 on a register and we can expand shorter code in some cases. */
1331 if (bitregion_end
1332 && unit > BITS_PER_UNIT
1333 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1334 && !REG_P (op0)
1335 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1337 unit = unit / 2;
1338 continue;
1341 /* THISSIZE must not overrun a word boundary. Otherwise,
1342 store_fixed_bit_field will call us again, and we will mutually
1343 recurse forever. */
1344 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1345 thissize = MIN (thissize, unit - thispos);
1347 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1349 /* Fetch successively less significant portions. */
1350 if (CONST_INT_P (value))
1351 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1352 >> (bitsize - bitsdone - thissize))
1353 & ((HOST_WIDE_INT_1 << thissize) - 1));
1354 /* Likewise, but the source is little-endian. */
1355 else if (reverse)
1356 part = extract_fixed_bit_field (word_mode, value, thissize,
1357 bitsize - bitsdone - thissize,
1358 NULL_RTX, 1, false);
1359 else
1361 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1362 /* The args are chosen so that the last part includes the
1363 lsb. Give extract_bit_field the value it needs (with
1364 endianness compensation) to fetch the piece we want. */
1365 part = extract_fixed_bit_field (word_mode, value, thissize,
1366 total_bits - bitsize + bitsdone,
1367 NULL_RTX, 1, false);
1370 else
1372 /* Fetch successively more significant portions. */
1373 if (CONST_INT_P (value))
1374 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1375 >> bitsdone)
1376 & ((HOST_WIDE_INT_1 << thissize) - 1));
1377 /* Likewise, but the source is big-endian. */
1378 else if (reverse)
1379 part = extract_fixed_bit_field (word_mode, value, thissize,
1380 total_bits - bitsdone - thissize,
1381 NULL_RTX, 1, false);
1382 else
1383 part = extract_fixed_bit_field (word_mode, value, thissize,
1384 bitsdone, NULL_RTX, 1, false);
1387 /* If OP0 is a register, then handle OFFSET here. */
1388 if (SUBREG_P (op0) || REG_P (op0))
1390 machine_mode op0_mode = GET_MODE (op0);
1391 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1392 word = offset ? const0_rtx : op0;
1393 else
1394 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1395 GET_MODE (op0));
1396 offset &= BITS_PER_WORD / unit - 1;
1398 else
1399 word = op0;
1401 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1402 it is just an out-of-bounds access. Ignore it. */
1403 if (word != const0_rtx)
1404 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1405 bitregion_start, bitregion_end, part,
1406 reverse);
1407 bitsdone += thissize;
1411 /* A subroutine of extract_bit_field_1 that converts return value X
1412 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1413 to extract_bit_field. */
1415 static rtx
1416 convert_extracted_bit_field (rtx x, machine_mode mode,
1417 machine_mode tmode, bool unsignedp)
1419 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1420 return x;
1422 /* If the x mode is not a scalar integral, first convert to the
1423 integer mode of that size and then access it as a floating-point
1424 value via a SUBREG. */
1425 if (!SCALAR_INT_MODE_P (tmode))
1427 machine_mode smode;
1429 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1430 x = convert_to_mode (smode, x, unsignedp);
1431 x = force_reg (smode, x);
1432 return gen_lowpart (tmode, x);
1435 return convert_to_mode (tmode, x, unsignedp);
1438 /* Try to use an ext(z)v pattern to extract a field from OP0.
1439 Return the extracted value on success, otherwise return null.
1440 EXT_MODE is the mode of the extraction and the other arguments
1441 are as for extract_bit_field. */
1443 static rtx
1444 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1445 unsigned HOST_WIDE_INT bitsize,
1446 unsigned HOST_WIDE_INT bitnum,
1447 int unsignedp, rtx target,
1448 machine_mode mode, machine_mode tmode)
1450 struct expand_operand ops[4];
1451 rtx spec_target = target;
1452 rtx spec_target_subreg = 0;
1453 machine_mode ext_mode = extv->field_mode;
1454 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1456 if (bitsize == 0 || unit < bitsize)
1457 return NULL_RTX;
1459 if (MEM_P (op0))
1460 /* Get a reference to the first byte of the field. */
1461 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1462 &bitnum);
1463 else
1465 /* Convert from counting within OP0 to counting in EXT_MODE. */
1466 if (BYTES_BIG_ENDIAN)
1467 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1469 /* If op0 is a register, we need it in EXT_MODE to make it
1470 acceptable to the format of ext(z)v. */
1471 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1472 return NULL_RTX;
1473 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1474 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1477 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1478 "backwards" from the size of the unit we are extracting from.
1479 Otherwise, we count bits from the most significant on a
1480 BYTES/BITS_BIG_ENDIAN machine. */
1482 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1483 bitnum = unit - bitsize - bitnum;
1485 if (target == 0)
1486 target = spec_target = gen_reg_rtx (tmode);
1488 if (GET_MODE (target) != ext_mode)
1490 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1491 between the mode of the extraction (word_mode) and the target
1492 mode. Instead, create a temporary and use convert_move to set
1493 the target. */
1494 if (REG_P (target)
1495 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1497 target = gen_lowpart (ext_mode, target);
1498 if (GET_MODE_PRECISION (ext_mode)
1499 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1500 spec_target_subreg = target;
1502 else
1503 target = gen_reg_rtx (ext_mode);
1506 create_output_operand (&ops[0], target, ext_mode);
1507 create_fixed_operand (&ops[1], op0);
1508 create_integer_operand (&ops[2], bitsize);
1509 create_integer_operand (&ops[3], bitnum);
1510 if (maybe_expand_insn (extv->icode, 4, ops))
1512 target = ops[0].value;
1513 if (target == spec_target)
1514 return target;
1515 if (target == spec_target_subreg)
1516 return spec_target;
1517 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1519 return NULL_RTX;
1522 /* A subroutine of extract_bit_field, with the same arguments.
1523 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1524 if we can find no other means of implementing the operation.
1525 if FALLBACK_P is false, return NULL instead. */
1527 static rtx
1528 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1529 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1530 machine_mode mode, machine_mode tmode,
1531 bool reverse, bool fallback_p)
1533 rtx op0 = str_rtx;
1534 machine_mode int_mode;
1535 machine_mode mode1;
1537 if (tmode == VOIDmode)
1538 tmode = mode;
1540 while (GET_CODE (op0) == SUBREG)
1542 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1543 op0 = SUBREG_REG (op0);
1546 /* If we have an out-of-bounds access to a register, just return an
1547 uninitialized register of the required mode. This can occur if the
1548 source code contains an out-of-bounds access to a small array. */
1549 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1550 return gen_reg_rtx (tmode);
1552 if (REG_P (op0)
1553 && mode == GET_MODE (op0)
1554 && bitnum == 0
1555 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1557 if (reverse)
1558 op0 = flip_storage_order (mode, op0);
1559 /* We're trying to extract a full register from itself. */
1560 return op0;
1563 /* See if we can get a better vector mode before extracting. */
1564 if (VECTOR_MODE_P (GET_MODE (op0))
1565 && !MEM_P (op0)
1566 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1568 machine_mode new_mode;
1570 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1571 new_mode = MIN_MODE_VECTOR_FLOAT;
1572 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1573 new_mode = MIN_MODE_VECTOR_FRACT;
1574 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1575 new_mode = MIN_MODE_VECTOR_UFRACT;
1576 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1577 new_mode = MIN_MODE_VECTOR_ACCUM;
1578 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1579 new_mode = MIN_MODE_VECTOR_UACCUM;
1580 else
1581 new_mode = MIN_MODE_VECTOR_INT;
1583 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1584 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1585 && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1586 && targetm.vector_mode_supported_p (new_mode))
1587 break;
1588 if (new_mode != VOIDmode)
1589 op0 = gen_lowpart (new_mode, op0);
1592 /* Use vec_extract patterns for extracting parts of vectors whenever
1593 available. */
1594 if (VECTOR_MODE_P (GET_MODE (op0))
1595 && !MEM_P (op0)
1596 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1597 && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1598 == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1600 struct expand_operand ops[3];
1601 machine_mode outermode = GET_MODE (op0);
1602 machine_mode innermode = GET_MODE_INNER (outermode);
1603 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1604 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1606 create_output_operand (&ops[0], target, innermode);
1607 create_input_operand (&ops[1], op0, outermode);
1608 create_integer_operand (&ops[2], pos);
1609 if (maybe_expand_insn (icode, 3, ops))
1611 target = ops[0].value;
1612 if (GET_MODE (target) != mode)
1613 return gen_lowpart (tmode, target);
1614 return target;
1618 /* Make sure we are playing with integral modes. Pun with subregs
1619 if we aren't. */
1621 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1622 if (imode != GET_MODE (op0))
1624 if (MEM_P (op0))
1625 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1626 else if (imode != BLKmode)
1628 op0 = gen_lowpart (imode, op0);
1630 /* If we got a SUBREG, force it into a register since we
1631 aren't going to be able to do another SUBREG on it. */
1632 if (GET_CODE (op0) == SUBREG)
1633 op0 = force_reg (imode, op0);
1635 else
1637 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1638 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1639 emit_move_insn (mem, op0);
1640 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1645 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1646 If that's wrong, the solution is to test for it and set TARGET to 0
1647 if needed. */
1649 /* Get the mode of the field to use for atomic access or subreg
1650 conversion. */
1651 mode1 = mode;
1652 if (SCALAR_INT_MODE_P (tmode))
1654 machine_mode try_mode = mode_for_size (bitsize,
1655 GET_MODE_CLASS (tmode), 0);
1656 if (try_mode != BLKmode)
1657 mode1 = try_mode;
1659 gcc_assert (mode1 != BLKmode);
1661 /* Extraction of a full MODE1 value can be done with a subreg as long
1662 as the least significant bit of the value is the least significant
1663 bit of either OP0 or a word of OP0. */
1664 if (!MEM_P (op0)
1665 && !reverse
1666 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1667 && bitsize == GET_MODE_BITSIZE (mode1)
1668 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1670 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1671 bitnum / BITS_PER_UNIT);
1672 if (sub)
1673 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1676 /* Extraction of a full MODE1 value can be done with a load as long as
1677 the field is on a byte boundary and is sufficiently aligned. */
1678 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1680 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1681 if (reverse)
1682 op0 = flip_storage_order (mode1, op0);
1683 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1686 /* Handle fields bigger than a word. */
1688 if (bitsize > BITS_PER_WORD)
1690 /* Here we transfer the words of the field
1691 in the order least significant first.
1692 This is because the most significant word is the one which may
1693 be less than full. */
1695 const bool backwards = WORDS_BIG_ENDIAN;
1696 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1697 unsigned int i;
1698 rtx_insn *last;
1700 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1701 target = gen_reg_rtx (mode);
1703 /* In case we're about to clobber a base register or something
1704 (see gcc.c-torture/execute/20040625-1.c). */
1705 if (reg_mentioned_p (target, str_rtx))
1706 target = gen_reg_rtx (mode);
1708 /* Indicate for flow that the entire target reg is being set. */
1709 emit_clobber (target);
1711 last = get_last_insn ();
1712 for (i = 0; i < nwords; i++)
1714 /* If I is 0, use the low-order word in both field and target;
1715 if I is 1, use the next to lowest word; and so on. */
1716 /* Word number in TARGET to use. */
1717 unsigned int wordnum
1718 = (backwards
1719 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1720 : i);
1721 /* Offset from start of field in OP0. */
1722 unsigned int bit_offset = (backwards ^ reverse
1723 ? MAX ((int) bitsize - ((int) i + 1)
1724 * BITS_PER_WORD,
1726 : (int) i * BITS_PER_WORD);
1727 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1728 rtx result_part
1729 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1730 bitsize - i * BITS_PER_WORD),
1731 bitnum + bit_offset, 1, target_part,
1732 mode, word_mode, reverse, fallback_p);
1734 gcc_assert (target_part);
1735 if (!result_part)
1737 delete_insns_since (last);
1738 return NULL;
1741 if (result_part != target_part)
1742 emit_move_insn (target_part, result_part);
1745 if (unsignedp)
1747 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1748 need to be zero'd out. */
1749 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1751 unsigned int i, total_words;
1753 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1754 for (i = nwords; i < total_words; i++)
1755 emit_move_insn
1756 (operand_subword (target,
1757 backwards ? total_words - i - 1 : i,
1758 1, VOIDmode),
1759 const0_rtx);
1761 return target;
1764 /* Signed bit field: sign-extend with two arithmetic shifts. */
1765 target = expand_shift (LSHIFT_EXPR, mode, target,
1766 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1767 return expand_shift (RSHIFT_EXPR, mode, target,
1768 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1771 /* If OP0 is a multi-word register, narrow it to the affected word.
1772 If the region spans two words, defer to extract_split_bit_field. */
1773 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1775 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1777 if (!fallback_p)
1778 return NULL_RTX;
1779 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1780 reverse);
1781 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1783 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1784 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1785 bitnum %= BITS_PER_WORD;
1788 /* From here on we know the desired field is smaller than a word.
1789 If OP0 is a register, it too fits within a word. */
1790 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1791 extraction_insn extv;
1792 if (!MEM_P (op0)
1793 && !reverse
1794 /* ??? We could limit the structure size to the part of OP0 that
1795 contains the field, with appropriate checks for endianness
1796 and TRULY_NOOP_TRUNCATION. */
1797 && get_best_reg_extraction_insn (&extv, pattern,
1798 GET_MODE_BITSIZE (GET_MODE (op0)),
1799 tmode))
1801 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1802 unsignedp, target, mode,
1803 tmode);
1804 if (result)
1805 return result;
1808 /* If OP0 is a memory, try copying it to a register and seeing if a
1809 cheap register alternative is available. */
1810 if (MEM_P (op0) & !reverse)
1812 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1813 tmode))
1815 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1816 bitnum, unsignedp,
1817 target, mode,
1818 tmode);
1819 if (result)
1820 return result;
1823 rtx_insn *last = get_last_insn ();
1825 /* Try loading part of OP0 into a register and extracting the
1826 bitfield from that. */
1827 unsigned HOST_WIDE_INT bitpos;
1828 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1829 0, 0, tmode, &bitpos);
1830 if (xop0)
1832 xop0 = copy_to_reg (xop0);
1833 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1834 unsignedp, target,
1835 mode, tmode, reverse, false);
1836 if (result)
1837 return result;
1838 delete_insns_since (last);
1842 if (!fallback_p)
1843 return NULL;
1845 /* Find a correspondingly-sized integer field, so we can apply
1846 shifts and masks to it. */
1847 int_mode = int_mode_for_mode (tmode);
1848 if (int_mode == BLKmode)
1849 int_mode = int_mode_for_mode (mode);
1850 /* Should probably push op0 out to memory and then do a load. */
1851 gcc_assert (int_mode != BLKmode);
1853 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1854 unsignedp, reverse);
1856 /* Complex values must be reversed piecewise, so we need to undo the global
1857 reversal, convert to the complex mode and reverse again. */
1858 if (reverse && COMPLEX_MODE_P (tmode))
1860 target = flip_storage_order (int_mode, target);
1861 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1862 target = flip_storage_order (tmode, target);
1864 else
1865 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1867 return target;
1870 /* Generate code to extract a byte-field from STR_RTX
1871 containing BITSIZE bits, starting at BITNUM,
1872 and put it in TARGET if possible (if TARGET is nonzero).
1873 Regardless of TARGET, we return the rtx for where the value is placed.
1875 STR_RTX is the structure containing the byte (a REG or MEM).
1876 UNSIGNEDP is nonzero if this is an unsigned bit field.
1877 MODE is the natural mode of the field value once extracted.
1878 TMODE is the mode the caller would like the value to have;
1879 but the value may be returned with type MODE instead.
1881 If REVERSE is true, the extraction is to be done in reverse order.
1883 If a TARGET is specified and we can store in it at no extra cost,
1884 we do so, and return TARGET.
1885 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1886 if they are equally easy. */
1889 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1890 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1891 machine_mode mode, machine_mode tmode, bool reverse)
1893 machine_mode mode1;
1895 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1896 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1897 mode1 = GET_MODE (str_rtx);
1898 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1899 mode1 = GET_MODE (target);
1900 else
1901 mode1 = tmode;
1903 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1905 /* Extraction of a full MODE1 value can be done with a simple load.
1906 We know here that the field can be accessed with one single
1907 instruction. For targets that support unaligned memory,
1908 an unaligned access may be necessary. */
1909 if (bitsize == GET_MODE_BITSIZE (mode1))
1911 rtx result = adjust_bitfield_address (str_rtx, mode1,
1912 bitnum / BITS_PER_UNIT);
1913 if (reverse)
1914 result = flip_storage_order (mode1, result);
1915 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1916 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1919 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1920 &bitnum);
1921 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1922 str_rtx = copy_to_reg (str_rtx);
1925 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1926 target, mode, tmode, reverse, true);
1929 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1930 from bit BITNUM of OP0.
1932 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1933 If REVERSE is true, the extraction is to be done in reverse order.
1935 If TARGET is nonzero, attempts to store the value there
1936 and return TARGET, but this is not guaranteed.
1937 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1939 static rtx
1940 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1941 unsigned HOST_WIDE_INT bitsize,
1942 unsigned HOST_WIDE_INT bitnum, rtx target,
1943 int unsignedp, bool reverse)
1945 if (MEM_P (op0))
1947 machine_mode mode
1948 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1949 MEM_VOLATILE_P (op0));
1951 if (mode == VOIDmode)
1952 /* The only way this should occur is if the field spans word
1953 boundaries. */
1954 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1955 reverse);
1957 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1960 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1961 target, unsignedp, reverse);
1964 /* Helper function for extract_fixed_bit_field, extracts
1965 the bit field always using the MODE of OP0. */
1967 static rtx
1968 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1969 unsigned HOST_WIDE_INT bitsize,
1970 unsigned HOST_WIDE_INT bitnum, rtx target,
1971 int unsignedp, bool reverse)
1973 machine_mode mode = GET_MODE (op0);
1974 gcc_assert (SCALAR_INT_MODE_P (mode));
1976 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1977 for invalid input, such as extract equivalent of f5 from
1978 gcc.dg/pr48335-2.c. */
1980 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1981 /* BITNUM is the distance between our msb and that of OP0.
1982 Convert it to the distance from the lsb. */
1983 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1985 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1986 We have reduced the big-endian case to the little-endian case. */
1987 if (reverse)
1988 op0 = flip_storage_order (mode, op0);
1990 if (unsignedp)
1992 if (bitnum)
1994 /* If the field does not already start at the lsb,
1995 shift it so it does. */
1996 /* Maybe propagate the target for the shift. */
1997 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1998 if (tmode != mode)
1999 subtarget = 0;
2000 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2002 /* Convert the value to the desired mode. */
2003 if (mode != tmode)
2004 op0 = convert_to_mode (tmode, op0, 1);
2006 /* Unless the msb of the field used to be the msb when we shifted,
2007 mask out the upper bits. */
2009 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2010 return expand_binop (GET_MODE (op0), and_optab, op0,
2011 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2012 target, 1, OPTAB_LIB_WIDEN);
2013 return op0;
2016 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2017 then arithmetic-shift its lsb to the lsb of the word. */
2018 op0 = force_reg (mode, op0);
2020 /* Find the narrowest integer mode that contains the field. */
2022 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2023 mode = GET_MODE_WIDER_MODE (mode))
2024 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2026 op0 = convert_to_mode (mode, op0, 0);
2027 break;
2030 if (mode != tmode)
2031 target = 0;
2033 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2035 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2036 /* Maybe propagate the target for the shift. */
2037 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2038 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2041 return expand_shift (RSHIFT_EXPR, mode, op0,
2042 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2045 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2046 VALUE << BITPOS. */
2048 static rtx
2049 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2050 int bitpos)
2052 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2055 /* Extract a bit field that is split across two words
2056 and return an RTX for the result.
2058 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2059 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2060 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2062 If REVERSE is true, the extraction is to be done in reverse order. */
2064 static rtx
2065 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2066 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2067 bool reverse)
2069 unsigned int unit;
2070 unsigned int bitsdone = 0;
2071 rtx result = NULL_RTX;
2072 int first = 1;
2074 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2075 much at a time. */
2076 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2077 unit = BITS_PER_WORD;
2078 else
2079 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2081 while (bitsdone < bitsize)
2083 unsigned HOST_WIDE_INT thissize;
2084 rtx part, word;
2085 unsigned HOST_WIDE_INT thispos;
2086 unsigned HOST_WIDE_INT offset;
2088 offset = (bitpos + bitsdone) / unit;
2089 thispos = (bitpos + bitsdone) % unit;
2091 /* THISSIZE must not overrun a word boundary. Otherwise,
2092 extract_fixed_bit_field will call us again, and we will mutually
2093 recurse forever. */
2094 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2095 thissize = MIN (thissize, unit - thispos);
2097 /* If OP0 is a register, then handle OFFSET here. */
2098 if (SUBREG_P (op0) || REG_P (op0))
2100 word = operand_subword_force (op0, offset, GET_MODE (op0));
2101 offset = 0;
2103 else
2104 word = op0;
2106 /* Extract the parts in bit-counting order,
2107 whose meaning is determined by BYTES_PER_UNIT.
2108 OFFSET is in UNITs, and UNIT is in bits. */
2109 part = extract_fixed_bit_field (word_mode, word, thissize,
2110 offset * unit + thispos, 0, 1, reverse);
2111 bitsdone += thissize;
2113 /* Shift this part into place for the result. */
2114 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2116 if (bitsize != bitsdone)
2117 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2118 bitsize - bitsdone, 0, 1);
2120 else
2122 if (bitsdone != thissize)
2123 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2124 bitsdone - thissize, 0, 1);
2127 if (first)
2128 result = part;
2129 else
2130 /* Combine the parts with bitwise or. This works
2131 because we extracted each part as an unsigned bit field. */
2132 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2133 OPTAB_LIB_WIDEN);
2135 first = 0;
2138 /* Unsigned bit field: we are done. */
2139 if (unsignedp)
2140 return result;
2141 /* Signed bit field: sign-extend with two arithmetic shifts. */
2142 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2143 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2144 return expand_shift (RSHIFT_EXPR, word_mode, result,
2145 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2148 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2149 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2150 MODE, fill the upper bits with zeros. Fail if the layout of either
2151 mode is unknown (as for CC modes) or if the extraction would involve
2152 unprofitable mode punning. Return the value on success, otherwise
2153 return null.
2155 This is different from gen_lowpart* in these respects:
2157 - the returned value must always be considered an rvalue
2159 - when MODE is wider than SRC_MODE, the extraction involves
2160 a zero extension
2162 - when MODE is smaller than SRC_MODE, the extraction involves
2163 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2165 In other words, this routine performs a computation, whereas the
2166 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2167 operations. */
2170 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2172 machine_mode int_mode, src_int_mode;
2174 if (mode == src_mode)
2175 return src;
2177 if (CONSTANT_P (src))
2179 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2180 fails, it will happily create (subreg (symbol_ref)) or similar
2181 invalid SUBREGs. */
2182 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2183 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2184 if (ret)
2185 return ret;
2187 if (GET_MODE (src) == VOIDmode
2188 || !validate_subreg (mode, src_mode, src, byte))
2189 return NULL_RTX;
2191 src = force_reg (GET_MODE (src), src);
2192 return gen_rtx_SUBREG (mode, src, byte);
2195 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2196 return NULL_RTX;
2198 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2199 && MODES_TIEABLE_P (mode, src_mode))
2201 rtx x = gen_lowpart_common (mode, src);
2202 if (x)
2203 return x;
2206 src_int_mode = int_mode_for_mode (src_mode);
2207 int_mode = int_mode_for_mode (mode);
2208 if (src_int_mode == BLKmode || int_mode == BLKmode)
2209 return NULL_RTX;
2211 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2212 return NULL_RTX;
2213 if (!MODES_TIEABLE_P (int_mode, mode))
2214 return NULL_RTX;
2216 src = gen_lowpart (src_int_mode, src);
2217 src = convert_modes (int_mode, src_int_mode, src, true);
2218 src = gen_lowpart (mode, src);
2219 return src;
2222 /* Add INC into TARGET. */
2224 void
2225 expand_inc (rtx target, rtx inc)
2227 rtx value = expand_binop (GET_MODE (target), add_optab,
2228 target, inc,
2229 target, 0, OPTAB_LIB_WIDEN);
2230 if (value != target)
2231 emit_move_insn (target, value);
2234 /* Subtract DEC from TARGET. */
2236 void
2237 expand_dec (rtx target, rtx dec)
2239 rtx value = expand_binop (GET_MODE (target), sub_optab,
2240 target, dec,
2241 target, 0, OPTAB_LIB_WIDEN);
2242 if (value != target)
2243 emit_move_insn (target, value);
2246 /* Output a shift instruction for expression code CODE,
2247 with SHIFTED being the rtx for the value to shift,
2248 and AMOUNT the rtx for the amount to shift by.
2249 Store the result in the rtx TARGET, if that is convenient.
2250 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2251 Return the rtx for where the value is.
2252 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2253 in which case 0 is returned. */
2255 static rtx
2256 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2257 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2259 rtx op1, temp = 0;
2260 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2261 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2262 optab lshift_optab = ashl_optab;
2263 optab rshift_arith_optab = ashr_optab;
2264 optab rshift_uns_optab = lshr_optab;
2265 optab lrotate_optab = rotl_optab;
2266 optab rrotate_optab = rotr_optab;
2267 machine_mode op1_mode;
2268 machine_mode scalar_mode = mode;
2269 int attempt;
2270 bool speed = optimize_insn_for_speed_p ();
2272 if (VECTOR_MODE_P (mode))
2273 scalar_mode = GET_MODE_INNER (mode);
2274 op1 = amount;
2275 op1_mode = GET_MODE (op1);
2277 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2278 shift amount is a vector, use the vector/vector shift patterns. */
2279 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2281 lshift_optab = vashl_optab;
2282 rshift_arith_optab = vashr_optab;
2283 rshift_uns_optab = vlshr_optab;
2284 lrotate_optab = vrotl_optab;
2285 rrotate_optab = vrotr_optab;
2288 /* Previously detected shift-counts computed by NEGATE_EXPR
2289 and shifted in the other direction; but that does not work
2290 on all machines. */
2292 if (SHIFT_COUNT_TRUNCATED)
2294 if (CONST_INT_P (op1)
2295 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2296 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2297 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2298 % GET_MODE_BITSIZE (scalar_mode));
2299 else if (GET_CODE (op1) == SUBREG
2300 && subreg_lowpart_p (op1)
2301 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2302 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2303 op1 = SUBREG_REG (op1);
2306 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2307 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2308 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2309 amount instead. */
2310 if (rotate
2311 && CONST_INT_P (op1)
2312 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2313 GET_MODE_BITSIZE (scalar_mode) - 1))
2315 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2316 left = !left;
2317 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2320 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2321 Note that this is not the case for bigger values. For instance a rotation
2322 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2323 0x04030201 (bswapsi). */
2324 if (rotate
2325 && CONST_INT_P (op1)
2326 && INTVAL (op1) == BITS_PER_UNIT
2327 && GET_MODE_SIZE (scalar_mode) == 2
2328 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2329 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2330 unsignedp);
2332 if (op1 == const0_rtx)
2333 return shifted;
2335 /* Check whether its cheaper to implement a left shift by a constant
2336 bit count by a sequence of additions. */
2337 if (code == LSHIFT_EXPR
2338 && CONST_INT_P (op1)
2339 && INTVAL (op1) > 0
2340 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2341 && INTVAL (op1) < MAX_BITS_PER_WORD
2342 && (shift_cost (speed, mode, INTVAL (op1))
2343 > INTVAL (op1) * add_cost (speed, mode))
2344 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2346 int i;
2347 for (i = 0; i < INTVAL (op1); i++)
2349 temp = force_reg (mode, shifted);
2350 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2351 unsignedp, OPTAB_LIB_WIDEN);
2353 return shifted;
2356 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2358 enum optab_methods methods;
2360 if (attempt == 0)
2361 methods = OPTAB_DIRECT;
2362 else if (attempt == 1)
2363 methods = OPTAB_WIDEN;
2364 else
2365 methods = OPTAB_LIB_WIDEN;
2367 if (rotate)
2369 /* Widening does not work for rotation. */
2370 if (methods == OPTAB_WIDEN)
2371 continue;
2372 else if (methods == OPTAB_LIB_WIDEN)
2374 /* If we have been unable to open-code this by a rotation,
2375 do it as the IOR of two shifts. I.e., to rotate A
2376 by N bits, compute
2377 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2378 where C is the bitsize of A.
2380 It is theoretically possible that the target machine might
2381 not be able to perform either shift and hence we would
2382 be making two libcalls rather than just the one for the
2383 shift (similarly if IOR could not be done). We will allow
2384 this extremely unlikely lossage to avoid complicating the
2385 code below. */
2387 rtx subtarget = target == shifted ? 0 : target;
2388 rtx new_amount, other_amount;
2389 rtx temp1;
2391 new_amount = op1;
2392 if (op1 == const0_rtx)
2393 return shifted;
2394 else if (CONST_INT_P (op1))
2395 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2396 - INTVAL (op1));
2397 else
2399 other_amount
2400 = simplify_gen_unary (NEG, GET_MODE (op1),
2401 op1, GET_MODE (op1));
2402 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2403 other_amount
2404 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2405 gen_int_mode (mask, GET_MODE (op1)));
2408 shifted = force_reg (mode, shifted);
2410 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2411 mode, shifted, new_amount, 0, 1);
2412 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2413 mode, shifted, other_amount,
2414 subtarget, 1);
2415 return expand_binop (mode, ior_optab, temp, temp1, target,
2416 unsignedp, methods);
2419 temp = expand_binop (mode,
2420 left ? lrotate_optab : rrotate_optab,
2421 shifted, op1, target, unsignedp, methods);
2423 else if (unsignedp)
2424 temp = expand_binop (mode,
2425 left ? lshift_optab : rshift_uns_optab,
2426 shifted, op1, target, unsignedp, methods);
2428 /* Do arithmetic shifts.
2429 Also, if we are going to widen the operand, we can just as well
2430 use an arithmetic right-shift instead of a logical one. */
2431 if (temp == 0 && ! rotate
2432 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2434 enum optab_methods methods1 = methods;
2436 /* If trying to widen a log shift to an arithmetic shift,
2437 don't accept an arithmetic shift of the same size. */
2438 if (unsignedp)
2439 methods1 = OPTAB_MUST_WIDEN;
2441 /* Arithmetic shift */
2443 temp = expand_binop (mode,
2444 left ? lshift_optab : rshift_arith_optab,
2445 shifted, op1, target, unsignedp, methods1);
2448 /* We used to try extzv here for logical right shifts, but that was
2449 only useful for one machine, the VAX, and caused poor code
2450 generation there for lshrdi3, so the code was deleted and a
2451 define_expand for lshrsi3 was added to vax.md. */
2454 gcc_assert (temp != NULL_RTX || may_fail);
2455 return temp;
2458 /* Output a shift instruction for expression code CODE,
2459 with SHIFTED being the rtx for the value to shift,
2460 and AMOUNT the amount to shift by.
2461 Store the result in the rtx TARGET, if that is convenient.
2462 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2463 Return the rtx for where the value is. */
2466 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2467 int amount, rtx target, int unsignedp)
2469 return expand_shift_1 (code, mode,
2470 shifted, GEN_INT (amount), target, unsignedp);
2473 /* Likewise, but return 0 if that cannot be done. */
2475 static rtx
2476 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2477 int amount, rtx target, int unsignedp)
2479 return expand_shift_1 (code, mode,
2480 shifted, GEN_INT (amount), target, unsignedp, true);
2483 /* Output a shift instruction for expression code CODE,
2484 with SHIFTED being the rtx for the value to shift,
2485 and AMOUNT the tree for the amount to shift by.
2486 Store the result in the rtx TARGET, if that is convenient.
2487 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2488 Return the rtx for where the value is. */
2491 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2492 tree amount, rtx target, int unsignedp)
2494 return expand_shift_1 (code, mode,
2495 shifted, expand_normal (amount), target, unsignedp);
2499 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2500 const struct mult_cost *, machine_mode mode);
2501 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2502 const struct algorithm *, enum mult_variant);
2503 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2504 static rtx extract_high_half (machine_mode, rtx);
2505 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2506 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2507 int, int);
2508 /* Compute and return the best algorithm for multiplying by T.
2509 The algorithm must cost less than cost_limit
2510 If retval.cost >= COST_LIMIT, no algorithm was found and all
2511 other field of the returned struct are undefined.
2512 MODE is the machine mode of the multiplication. */
2514 static void
2515 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2516 const struct mult_cost *cost_limit, machine_mode mode)
2518 int m;
2519 struct algorithm *alg_in, *best_alg;
2520 struct mult_cost best_cost;
2521 struct mult_cost new_limit;
2522 int op_cost, op_latency;
2523 unsigned HOST_WIDE_INT orig_t = t;
2524 unsigned HOST_WIDE_INT q;
2525 int maxm, hash_index;
2526 bool cache_hit = false;
2527 enum alg_code cache_alg = alg_zero;
2528 bool speed = optimize_insn_for_speed_p ();
2529 machine_mode imode;
2530 struct alg_hash_entry *entry_ptr;
2532 /* Indicate that no algorithm is yet found. If no algorithm
2533 is found, this value will be returned and indicate failure. */
2534 alg_out->cost.cost = cost_limit->cost + 1;
2535 alg_out->cost.latency = cost_limit->latency + 1;
2537 if (cost_limit->cost < 0
2538 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2539 return;
2541 /* Be prepared for vector modes. */
2542 imode = GET_MODE_INNER (mode);
2544 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2546 /* Restrict the bits of "t" to the multiplication's mode. */
2547 t &= GET_MODE_MASK (imode);
2549 /* t == 1 can be done in zero cost. */
2550 if (t == 1)
2552 alg_out->ops = 1;
2553 alg_out->cost.cost = 0;
2554 alg_out->cost.latency = 0;
2555 alg_out->op[0] = alg_m;
2556 return;
2559 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2560 fail now. */
2561 if (t == 0)
2563 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2564 return;
2565 else
2567 alg_out->ops = 1;
2568 alg_out->cost.cost = zero_cost (speed);
2569 alg_out->cost.latency = zero_cost (speed);
2570 alg_out->op[0] = alg_zero;
2571 return;
2575 /* We'll be needing a couple extra algorithm structures now. */
2577 alg_in = XALLOCA (struct algorithm);
2578 best_alg = XALLOCA (struct algorithm);
2579 best_cost = *cost_limit;
2581 /* Compute the hash index. */
2582 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2584 /* See if we already know what to do for T. */
2585 entry_ptr = alg_hash_entry_ptr (hash_index);
2586 if (entry_ptr->t == t
2587 && entry_ptr->mode == mode
2588 && entry_ptr->speed == speed
2589 && entry_ptr->alg != alg_unknown)
2591 cache_alg = entry_ptr->alg;
2593 if (cache_alg == alg_impossible)
2595 /* The cache tells us that it's impossible to synthesize
2596 multiplication by T within entry_ptr->cost. */
2597 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2598 /* COST_LIMIT is at least as restrictive as the one
2599 recorded in the hash table, in which case we have no
2600 hope of synthesizing a multiplication. Just
2601 return. */
2602 return;
2604 /* If we get here, COST_LIMIT is less restrictive than the
2605 one recorded in the hash table, so we may be able to
2606 synthesize a multiplication. Proceed as if we didn't
2607 have the cache entry. */
2609 else
2611 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2612 /* The cached algorithm shows that this multiplication
2613 requires more cost than COST_LIMIT. Just return. This
2614 way, we don't clobber this cache entry with
2615 alg_impossible but retain useful information. */
2616 return;
2618 cache_hit = true;
2620 switch (cache_alg)
2622 case alg_shift:
2623 goto do_alg_shift;
2625 case alg_add_t_m2:
2626 case alg_sub_t_m2:
2627 goto do_alg_addsub_t_m2;
2629 case alg_add_factor:
2630 case alg_sub_factor:
2631 goto do_alg_addsub_factor;
2633 case alg_add_t2_m:
2634 goto do_alg_add_t2_m;
2636 case alg_sub_t2_m:
2637 goto do_alg_sub_t2_m;
2639 default:
2640 gcc_unreachable ();
2645 /* If we have a group of zero bits at the low-order part of T, try
2646 multiplying by the remaining bits and then doing a shift. */
2648 if ((t & 1) == 0)
2650 do_alg_shift:
2651 m = ctz_or_zero (t); /* m = number of low zero bits */
2652 if (m < maxm)
2654 q = t >> m;
2655 /* The function expand_shift will choose between a shift and
2656 a sequence of additions, so the observed cost is given as
2657 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2658 op_cost = m * add_cost (speed, mode);
2659 if (shift_cost (speed, mode, m) < op_cost)
2660 op_cost = shift_cost (speed, mode, m);
2661 new_limit.cost = best_cost.cost - op_cost;
2662 new_limit.latency = best_cost.latency - op_cost;
2663 synth_mult (alg_in, q, &new_limit, mode);
2665 alg_in->cost.cost += op_cost;
2666 alg_in->cost.latency += op_cost;
2667 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2669 best_cost = alg_in->cost;
2670 std::swap (alg_in, best_alg);
2671 best_alg->log[best_alg->ops] = m;
2672 best_alg->op[best_alg->ops] = alg_shift;
2675 /* See if treating ORIG_T as a signed number yields a better
2676 sequence. Try this sequence only for a negative ORIG_T
2677 as it would be useless for a non-negative ORIG_T. */
2678 if ((HOST_WIDE_INT) orig_t < 0)
2680 /* Shift ORIG_T as follows because a right shift of a
2681 negative-valued signed type is implementation
2682 defined. */
2683 q = ~(~orig_t >> m);
2684 /* The function expand_shift will choose between a shift
2685 and a sequence of additions, so the observed cost is
2686 given as MIN (m * add_cost(speed, mode),
2687 shift_cost(speed, mode, m)). */
2688 op_cost = m * add_cost (speed, mode);
2689 if (shift_cost (speed, mode, m) < op_cost)
2690 op_cost = shift_cost (speed, mode, m);
2691 new_limit.cost = best_cost.cost - op_cost;
2692 new_limit.latency = best_cost.latency - op_cost;
2693 synth_mult (alg_in, q, &new_limit, mode);
2695 alg_in->cost.cost += op_cost;
2696 alg_in->cost.latency += op_cost;
2697 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2699 best_cost = alg_in->cost;
2700 std::swap (alg_in, best_alg);
2701 best_alg->log[best_alg->ops] = m;
2702 best_alg->op[best_alg->ops] = alg_shift;
2706 if (cache_hit)
2707 goto done;
2710 /* If we have an odd number, add or subtract one. */
2711 if ((t & 1) != 0)
2713 unsigned HOST_WIDE_INT w;
2715 do_alg_addsub_t_m2:
2716 for (w = 1; (w & t) != 0; w <<= 1)
2718 /* If T was -1, then W will be zero after the loop. This is another
2719 case where T ends with ...111. Handling this with (T + 1) and
2720 subtract 1 produces slightly better code and results in algorithm
2721 selection much faster than treating it like the ...0111 case
2722 below. */
2723 if (w == 0
2724 || (w > 2
2725 /* Reject the case where t is 3.
2726 Thus we prefer addition in that case. */
2727 && t != 3))
2729 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2731 op_cost = add_cost (speed, mode);
2732 new_limit.cost = best_cost.cost - op_cost;
2733 new_limit.latency = best_cost.latency - op_cost;
2734 synth_mult (alg_in, t + 1, &new_limit, mode);
2736 alg_in->cost.cost += op_cost;
2737 alg_in->cost.latency += op_cost;
2738 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2740 best_cost = alg_in->cost;
2741 std::swap (alg_in, best_alg);
2742 best_alg->log[best_alg->ops] = 0;
2743 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2746 else
2748 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2750 op_cost = add_cost (speed, mode);
2751 new_limit.cost = best_cost.cost - op_cost;
2752 new_limit.latency = best_cost.latency - op_cost;
2753 synth_mult (alg_in, t - 1, &new_limit, mode);
2755 alg_in->cost.cost += op_cost;
2756 alg_in->cost.latency += op_cost;
2757 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2759 best_cost = alg_in->cost;
2760 std::swap (alg_in, best_alg);
2761 best_alg->log[best_alg->ops] = 0;
2762 best_alg->op[best_alg->ops] = alg_add_t_m2;
2766 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2767 quickly with a - a * n for some appropriate constant n. */
2768 m = exact_log2 (-orig_t + 1);
2769 if (m >= 0 && m < maxm)
2771 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2772 /* If the target has a cheap shift-and-subtract insn use
2773 that in preference to a shift insn followed by a sub insn.
2774 Assume that the shift-and-sub is "atomic" with a latency
2775 equal to it's cost, otherwise assume that on superscalar
2776 hardware the shift may be executed concurrently with the
2777 earlier steps in the algorithm. */
2778 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2780 op_cost = shiftsub1_cost (speed, mode, m);
2781 op_latency = op_cost;
2783 else
2784 op_latency = add_cost (speed, mode);
2786 new_limit.cost = best_cost.cost - op_cost;
2787 new_limit.latency = best_cost.latency - op_latency;
2788 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2789 &new_limit, mode);
2791 alg_in->cost.cost += op_cost;
2792 alg_in->cost.latency += op_latency;
2793 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2795 best_cost = alg_in->cost;
2796 std::swap (alg_in, best_alg);
2797 best_alg->log[best_alg->ops] = m;
2798 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2802 if (cache_hit)
2803 goto done;
2806 /* Look for factors of t of the form
2807 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2808 If we find such a factor, we can multiply by t using an algorithm that
2809 multiplies by q, shift the result by m and add/subtract it to itself.
2811 We search for large factors first and loop down, even if large factors
2812 are less probable than small; if we find a large factor we will find a
2813 good sequence quickly, and therefore be able to prune (by decreasing
2814 COST_LIMIT) the search. */
2816 do_alg_addsub_factor:
2817 for (m = floor_log2 (t - 1); m >= 2; m--)
2819 unsigned HOST_WIDE_INT d;
2821 d = (HOST_WIDE_INT_1U << m) + 1;
2822 if (t % d == 0 && t > d && m < maxm
2823 && (!cache_hit || cache_alg == alg_add_factor))
2825 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2826 if (shiftadd_cost (speed, mode, m) <= op_cost)
2827 op_cost = shiftadd_cost (speed, mode, m);
2829 op_latency = op_cost;
2832 new_limit.cost = best_cost.cost - op_cost;
2833 new_limit.latency = best_cost.latency - op_latency;
2834 synth_mult (alg_in, t / d, &new_limit, mode);
2836 alg_in->cost.cost += op_cost;
2837 alg_in->cost.latency += op_latency;
2838 if (alg_in->cost.latency < op_cost)
2839 alg_in->cost.latency = op_cost;
2840 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2842 best_cost = alg_in->cost;
2843 std::swap (alg_in, best_alg);
2844 best_alg->log[best_alg->ops] = m;
2845 best_alg->op[best_alg->ops] = alg_add_factor;
2847 /* Other factors will have been taken care of in the recursion. */
2848 break;
2851 d = (HOST_WIDE_INT_1U << m) - 1;
2852 if (t % d == 0 && t > d && m < maxm
2853 && (!cache_hit || cache_alg == alg_sub_factor))
2855 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2856 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2857 op_cost = shiftsub0_cost (speed, mode, m);
2859 op_latency = op_cost;
2861 new_limit.cost = best_cost.cost - op_cost;
2862 new_limit.latency = best_cost.latency - op_latency;
2863 synth_mult (alg_in, t / d, &new_limit, mode);
2865 alg_in->cost.cost += op_cost;
2866 alg_in->cost.latency += op_latency;
2867 if (alg_in->cost.latency < op_cost)
2868 alg_in->cost.latency = op_cost;
2869 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2871 best_cost = alg_in->cost;
2872 std::swap (alg_in, best_alg);
2873 best_alg->log[best_alg->ops] = m;
2874 best_alg->op[best_alg->ops] = alg_sub_factor;
2876 break;
2879 if (cache_hit)
2880 goto done;
2882 /* Try shift-and-add (load effective address) instructions,
2883 i.e. do a*3, a*5, a*9. */
2884 if ((t & 1) != 0)
2886 do_alg_add_t2_m:
2887 q = t - 1;
2888 m = ctz_hwi (q);
2889 if (q && m < maxm)
2891 op_cost = shiftadd_cost (speed, mode, m);
2892 new_limit.cost = best_cost.cost - op_cost;
2893 new_limit.latency = best_cost.latency - op_cost;
2894 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2896 alg_in->cost.cost += op_cost;
2897 alg_in->cost.latency += op_cost;
2898 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2900 best_cost = alg_in->cost;
2901 std::swap (alg_in, best_alg);
2902 best_alg->log[best_alg->ops] = m;
2903 best_alg->op[best_alg->ops] = alg_add_t2_m;
2906 if (cache_hit)
2907 goto done;
2909 do_alg_sub_t2_m:
2910 q = t + 1;
2911 m = ctz_hwi (q);
2912 if (q && m < maxm)
2914 op_cost = shiftsub0_cost (speed, mode, m);
2915 new_limit.cost = best_cost.cost - op_cost;
2916 new_limit.latency = best_cost.latency - op_cost;
2917 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2919 alg_in->cost.cost += op_cost;
2920 alg_in->cost.latency += op_cost;
2921 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2923 best_cost = alg_in->cost;
2924 std::swap (alg_in, best_alg);
2925 best_alg->log[best_alg->ops] = m;
2926 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2929 if (cache_hit)
2930 goto done;
2933 done:
2934 /* If best_cost has not decreased, we have not found any algorithm. */
2935 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2937 /* We failed to find an algorithm. Record alg_impossible for
2938 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2939 we are asked to find an algorithm for T within the same or
2940 lower COST_LIMIT, we can immediately return to the
2941 caller. */
2942 entry_ptr->t = t;
2943 entry_ptr->mode = mode;
2944 entry_ptr->speed = speed;
2945 entry_ptr->alg = alg_impossible;
2946 entry_ptr->cost = *cost_limit;
2947 return;
2950 /* Cache the result. */
2951 if (!cache_hit)
2953 entry_ptr->t = t;
2954 entry_ptr->mode = mode;
2955 entry_ptr->speed = speed;
2956 entry_ptr->alg = best_alg->op[best_alg->ops];
2957 entry_ptr->cost.cost = best_cost.cost;
2958 entry_ptr->cost.latency = best_cost.latency;
2961 /* If we are getting a too long sequence for `struct algorithm'
2962 to record, make this search fail. */
2963 if (best_alg->ops == MAX_BITS_PER_WORD)
2964 return;
2966 /* Copy the algorithm from temporary space to the space at alg_out.
2967 We avoid using structure assignment because the majority of
2968 best_alg is normally undefined, and this is a critical function. */
2969 alg_out->ops = best_alg->ops + 1;
2970 alg_out->cost = best_cost;
2971 memcpy (alg_out->op, best_alg->op,
2972 alg_out->ops * sizeof *alg_out->op);
2973 memcpy (alg_out->log, best_alg->log,
2974 alg_out->ops * sizeof *alg_out->log);
2977 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2978 Try three variations:
2980 - a shift/add sequence based on VAL itself
2981 - a shift/add sequence based on -VAL, followed by a negation
2982 - a shift/add sequence based on VAL - 1, followed by an addition.
2984 Return true if the cheapest of these cost less than MULT_COST,
2985 describing the algorithm in *ALG and final fixup in *VARIANT. */
2987 bool
2988 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2989 struct algorithm *alg, enum mult_variant *variant,
2990 int mult_cost)
2992 struct algorithm alg2;
2993 struct mult_cost limit;
2994 int op_cost;
2995 bool speed = optimize_insn_for_speed_p ();
2997 /* Fail quickly for impossible bounds. */
2998 if (mult_cost < 0)
2999 return false;
3001 /* Ensure that mult_cost provides a reasonable upper bound.
3002 Any constant multiplication can be performed with less
3003 than 2 * bits additions. */
3004 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3005 if (mult_cost > op_cost)
3006 mult_cost = op_cost;
3008 *variant = basic_variant;
3009 limit.cost = mult_cost;
3010 limit.latency = mult_cost;
3011 synth_mult (alg, val, &limit, mode);
3013 /* This works only if the inverted value actually fits in an
3014 `unsigned int' */
3015 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3017 op_cost = neg_cost (speed, mode);
3018 if (MULT_COST_LESS (&alg->cost, mult_cost))
3020 limit.cost = alg->cost.cost - op_cost;
3021 limit.latency = alg->cost.latency - op_cost;
3023 else
3025 limit.cost = mult_cost - op_cost;
3026 limit.latency = mult_cost - op_cost;
3029 synth_mult (&alg2, -val, &limit, mode);
3030 alg2.cost.cost += op_cost;
3031 alg2.cost.latency += op_cost;
3032 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3033 *alg = alg2, *variant = negate_variant;
3036 /* This proves very useful for division-by-constant. */
3037 op_cost = add_cost (speed, mode);
3038 if (MULT_COST_LESS (&alg->cost, mult_cost))
3040 limit.cost = alg->cost.cost - op_cost;
3041 limit.latency = alg->cost.latency - op_cost;
3043 else
3045 limit.cost = mult_cost - op_cost;
3046 limit.latency = mult_cost - op_cost;
3049 synth_mult (&alg2, val - 1, &limit, mode);
3050 alg2.cost.cost += op_cost;
3051 alg2.cost.latency += op_cost;
3052 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3053 *alg = alg2, *variant = add_variant;
3055 return MULT_COST_LESS (&alg->cost, mult_cost);
3058 /* A subroutine of expand_mult, used for constant multiplications.
3059 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3060 convenient. Use the shift/add sequence described by ALG and apply
3061 the final fixup specified by VARIANT. */
3063 static rtx
3064 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3065 rtx target, const struct algorithm *alg,
3066 enum mult_variant variant)
3068 unsigned HOST_WIDE_INT val_so_far;
3069 rtx_insn *insn;
3070 rtx accum, tem;
3071 int opno;
3072 machine_mode nmode;
3074 /* Avoid referencing memory over and over and invalid sharing
3075 on SUBREGs. */
3076 op0 = force_reg (mode, op0);
3078 /* ACCUM starts out either as OP0 or as a zero, depending on
3079 the first operation. */
3081 if (alg->op[0] == alg_zero)
3083 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3084 val_so_far = 0;
3086 else if (alg->op[0] == alg_m)
3088 accum = copy_to_mode_reg (mode, op0);
3089 val_so_far = 1;
3091 else
3092 gcc_unreachable ();
3094 for (opno = 1; opno < alg->ops; opno++)
3096 int log = alg->log[opno];
3097 rtx shift_subtarget = optimize ? 0 : accum;
3098 rtx add_target
3099 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3100 && !optimize)
3101 ? target : 0;
3102 rtx accum_target = optimize ? 0 : accum;
3103 rtx accum_inner;
3105 switch (alg->op[opno])
3107 case alg_shift:
3108 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3109 /* REG_EQUAL note will be attached to the following insn. */
3110 emit_move_insn (accum, tem);
3111 val_so_far <<= log;
3112 break;
3114 case alg_add_t_m2:
3115 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3116 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3117 add_target ? add_target : accum_target);
3118 val_so_far += HOST_WIDE_INT_1U << log;
3119 break;
3121 case alg_sub_t_m2:
3122 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3123 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3124 add_target ? add_target : accum_target);
3125 val_so_far -= HOST_WIDE_INT_1U << log;
3126 break;
3128 case alg_add_t2_m:
3129 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3130 log, shift_subtarget, 0);
3131 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3132 add_target ? add_target : accum_target);
3133 val_so_far = (val_so_far << log) + 1;
3134 break;
3136 case alg_sub_t2_m:
3137 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3138 log, shift_subtarget, 0);
3139 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3140 add_target ? add_target : accum_target);
3141 val_so_far = (val_so_far << log) - 1;
3142 break;
3144 case alg_add_factor:
3145 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3146 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3147 add_target ? add_target : accum_target);
3148 val_so_far += val_so_far << log;
3149 break;
3151 case alg_sub_factor:
3152 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3153 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3154 (add_target
3155 ? add_target : (optimize ? 0 : tem)));
3156 val_so_far = (val_so_far << log) - val_so_far;
3157 break;
3159 default:
3160 gcc_unreachable ();
3163 if (SCALAR_INT_MODE_P (mode))
3165 /* Write a REG_EQUAL note on the last insn so that we can cse
3166 multiplication sequences. Note that if ACCUM is a SUBREG,
3167 we've set the inner register and must properly indicate that. */
3168 tem = op0, nmode = mode;
3169 accum_inner = accum;
3170 if (GET_CODE (accum) == SUBREG)
3172 accum_inner = SUBREG_REG (accum);
3173 nmode = GET_MODE (accum_inner);
3174 tem = gen_lowpart (nmode, op0);
3177 insn = get_last_insn ();
3178 set_dst_reg_note (insn, REG_EQUAL,
3179 gen_rtx_MULT (nmode, tem,
3180 gen_int_mode (val_so_far, nmode)),
3181 accum_inner);
3185 if (variant == negate_variant)
3187 val_so_far = -val_so_far;
3188 accum = expand_unop (mode, neg_optab, accum, target, 0);
3190 else if (variant == add_variant)
3192 val_so_far = val_so_far + 1;
3193 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3196 /* Compare only the bits of val and val_so_far that are significant
3197 in the result mode, to avoid sign-/zero-extension confusion. */
3198 nmode = GET_MODE_INNER (mode);
3199 val &= GET_MODE_MASK (nmode);
3200 val_so_far &= GET_MODE_MASK (nmode);
3201 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3203 return accum;
3206 /* Perform a multiplication and return an rtx for the result.
3207 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3208 TARGET is a suggestion for where to store the result (an rtx).
3210 We check specially for a constant integer as OP1.
3211 If you want this check for OP0 as well, then before calling
3212 you should swap the two operands if OP0 would be constant. */
3215 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3216 int unsignedp)
3218 enum mult_variant variant;
3219 struct algorithm algorithm;
3220 rtx scalar_op1;
3221 int max_cost;
3222 bool speed = optimize_insn_for_speed_p ();
3223 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3225 if (CONSTANT_P (op0))
3226 std::swap (op0, op1);
3228 /* For vectors, there are several simplifications that can be made if
3229 all elements of the vector constant are identical. */
3230 scalar_op1 = unwrap_const_vec_duplicate (op1);
3232 if (INTEGRAL_MODE_P (mode))
3234 rtx fake_reg;
3235 HOST_WIDE_INT coeff;
3236 bool is_neg;
3237 int mode_bitsize;
3239 if (op1 == CONST0_RTX (mode))
3240 return op1;
3241 if (op1 == CONST1_RTX (mode))
3242 return op0;
3243 if (op1 == CONSTM1_RTX (mode))
3244 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3245 op0, target, 0);
3247 if (do_trapv)
3248 goto skip_synth;
3250 /* If mode is integer vector mode, check if the backend supports
3251 vector lshift (by scalar or vector) at all. If not, we can't use
3252 synthetized multiply. */
3253 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3254 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3255 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3256 goto skip_synth;
3258 /* These are the operations that are potentially turned into
3259 a sequence of shifts and additions. */
3260 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3262 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3263 less than or equal in size to `unsigned int' this doesn't matter.
3264 If the mode is larger than `unsigned int', then synth_mult works
3265 only if the constant value exactly fits in an `unsigned int' without
3266 any truncation. This means that multiplying by negative values does
3267 not work; results are off by 2^32 on a 32 bit machine. */
3268 if (CONST_INT_P (scalar_op1))
3270 coeff = INTVAL (scalar_op1);
3271 is_neg = coeff < 0;
3273 #if TARGET_SUPPORTS_WIDE_INT
3274 else if (CONST_WIDE_INT_P (scalar_op1))
3275 #else
3276 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3277 #endif
3279 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3280 /* Perfect power of 2 (other than 1, which is handled above). */
3281 if (shift > 0)
3282 return expand_shift (LSHIFT_EXPR, mode, op0,
3283 shift, target, unsignedp);
3284 else
3285 goto skip_synth;
3287 else
3288 goto skip_synth;
3290 /* We used to test optimize here, on the grounds that it's better to
3291 produce a smaller program when -O is not used. But this causes
3292 such a terrible slowdown sometimes that it seems better to always
3293 use synth_mult. */
3295 /* Special case powers of two. */
3296 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3297 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3298 return expand_shift (LSHIFT_EXPR, mode, op0,
3299 floor_log2 (coeff), target, unsignedp);
3301 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3303 /* Attempt to handle multiplication of DImode values by negative
3304 coefficients, by performing the multiplication by a positive
3305 multiplier and then inverting the result. */
3306 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3308 /* Its safe to use -coeff even for INT_MIN, as the
3309 result is interpreted as an unsigned coefficient.
3310 Exclude cost of op0 from max_cost to match the cost
3311 calculation of the synth_mult. */
3312 coeff = -(unsigned HOST_WIDE_INT) coeff;
3313 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3314 mode, speed)
3315 - neg_cost (speed, mode));
3316 if (max_cost <= 0)
3317 goto skip_synth;
3319 /* Special case powers of two. */
3320 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3322 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3323 floor_log2 (coeff), target, unsignedp);
3324 return expand_unop (mode, neg_optab, temp, target, 0);
3327 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3328 max_cost))
3330 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3331 &algorithm, variant);
3332 return expand_unop (mode, neg_optab, temp, target, 0);
3334 goto skip_synth;
3337 /* Exclude cost of op0 from max_cost to match the cost
3338 calculation of the synth_mult. */
3339 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3340 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3341 return expand_mult_const (mode, op0, coeff, target,
3342 &algorithm, variant);
3344 skip_synth:
3346 /* Expand x*2.0 as x+x. */
3347 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3348 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3350 op0 = force_reg (GET_MODE (op0), op0);
3351 return expand_binop (mode, add_optab, op0, op0,
3352 target, unsignedp, OPTAB_LIB_WIDEN);
3355 /* This used to use umul_optab if unsigned, but for non-widening multiply
3356 there is no difference between signed and unsigned. */
3357 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3358 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3359 gcc_assert (op0);
3360 return op0;
3363 /* Return a cost estimate for multiplying a register by the given
3364 COEFFicient in the given MODE and SPEED. */
3367 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3369 int max_cost;
3370 struct algorithm algorithm;
3371 enum mult_variant variant;
3373 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3374 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3375 mode, speed);
3376 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3377 return algorithm.cost.cost;
3378 else
3379 return max_cost;
3382 /* Perform a widening multiplication and return an rtx for the result.
3383 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3384 TARGET is a suggestion for where to store the result (an rtx).
3385 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3386 or smul_widen_optab.
3388 We check specially for a constant integer as OP1, comparing the
3389 cost of a widening multiply against the cost of a sequence of shifts
3390 and adds. */
3393 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3394 int unsignedp, optab this_optab)
3396 bool speed = optimize_insn_for_speed_p ();
3397 rtx cop1;
3399 if (CONST_INT_P (op1)
3400 && GET_MODE (op0) != VOIDmode
3401 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3402 this_optab == umul_widen_optab))
3403 && CONST_INT_P (cop1)
3404 && (INTVAL (cop1) >= 0
3405 || HWI_COMPUTABLE_MODE_P (mode)))
3407 HOST_WIDE_INT coeff = INTVAL (cop1);
3408 int max_cost;
3409 enum mult_variant variant;
3410 struct algorithm algorithm;
3412 if (coeff == 0)
3413 return CONST0_RTX (mode);
3415 /* Special case powers of two. */
3416 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3418 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3419 return expand_shift (LSHIFT_EXPR, mode, op0,
3420 floor_log2 (coeff), target, unsignedp);
3423 /* Exclude cost of op0 from max_cost to match the cost
3424 calculation of the synth_mult. */
3425 max_cost = mul_widen_cost (speed, mode);
3426 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3427 max_cost))
3429 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3430 return expand_mult_const (mode, op0, coeff, target,
3431 &algorithm, variant);
3434 return expand_binop (mode, this_optab, op0, op1, target,
3435 unsignedp, OPTAB_LIB_WIDEN);
3438 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3439 replace division by D, and put the least significant N bits of the result
3440 in *MULTIPLIER_PTR and return the most significant bit.
3442 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3443 needed precision is in PRECISION (should be <= N).
3445 PRECISION should be as small as possible so this function can choose
3446 multiplier more freely.
3448 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3449 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3451 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3452 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3454 unsigned HOST_WIDE_INT
3455 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3456 unsigned HOST_WIDE_INT *multiplier_ptr,
3457 int *post_shift_ptr, int *lgup_ptr)
3459 int lgup, post_shift;
3460 int pow, pow2;
3462 /* lgup = ceil(log2(divisor)); */
3463 lgup = ceil_log2 (d);
3465 gcc_assert (lgup <= n);
3467 pow = n + lgup;
3468 pow2 = n + lgup - precision;
3470 /* mlow = 2^(N + lgup)/d */
3471 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3472 wide_int mlow = wi::udiv_trunc (val, d);
3474 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3475 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3476 wide_int mhigh = wi::udiv_trunc (val, d);
3478 /* If precision == N, then mlow, mhigh exceed 2^N
3479 (but they do not exceed 2^(N+1)). */
3481 /* Reduce to lowest terms. */
3482 for (post_shift = lgup; post_shift > 0; post_shift--)
3484 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3485 HOST_BITS_PER_WIDE_INT);
3486 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3487 HOST_BITS_PER_WIDE_INT);
3488 if (ml_lo >= mh_lo)
3489 break;
3491 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3492 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3495 *post_shift_ptr = post_shift;
3496 *lgup_ptr = lgup;
3497 if (n < HOST_BITS_PER_WIDE_INT)
3499 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3500 *multiplier_ptr = mhigh.to_uhwi () & mask;
3501 return mhigh.to_uhwi () >= mask;
3503 else
3505 *multiplier_ptr = mhigh.to_uhwi ();
3506 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3510 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3511 congruent to 1 (mod 2**N). */
3513 static unsigned HOST_WIDE_INT
3514 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3516 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3518 /* The algorithm notes that the choice y = x satisfies
3519 x*y == 1 mod 2^3, since x is assumed odd.
3520 Each iteration doubles the number of bits of significance in y. */
3522 unsigned HOST_WIDE_INT mask;
3523 unsigned HOST_WIDE_INT y = x;
3524 int nbit = 3;
3526 mask = (n == HOST_BITS_PER_WIDE_INT
3527 ? HOST_WIDE_INT_M1U
3528 : (HOST_WIDE_INT_1U << n) - 1);
3530 while (nbit < n)
3532 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3533 nbit *= 2;
3535 return y;
3538 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3539 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3540 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3541 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3542 become signed.
3544 The result is put in TARGET if that is convenient.
3546 MODE is the mode of operation. */
3549 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3550 rtx op1, rtx target, int unsignedp)
3552 rtx tem;
3553 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3555 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3556 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3557 tem = expand_and (mode, tem, op1, NULL_RTX);
3558 adj_operand
3559 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3560 adj_operand);
3562 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3563 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3564 tem = expand_and (mode, tem, op0, NULL_RTX);
3565 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3566 target);
3568 return target;
3571 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3573 static rtx
3574 extract_high_half (machine_mode mode, rtx op)
3576 machine_mode wider_mode;
3578 if (mode == word_mode)
3579 return gen_highpart (mode, op);
3581 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3583 wider_mode = GET_MODE_WIDER_MODE (mode);
3584 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3585 GET_MODE_BITSIZE (mode), 0, 1);
3586 return convert_modes (mode, wider_mode, op, 0);
3589 /* Like expmed_mult_highpart, but only consider using a multiplication
3590 optab. OP1 is an rtx for the constant operand. */
3592 static rtx
3593 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3594 rtx target, int unsignedp, int max_cost)
3596 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3597 machine_mode wider_mode;
3598 optab moptab;
3599 rtx tem;
3600 int size;
3601 bool speed = optimize_insn_for_speed_p ();
3603 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3605 wider_mode = GET_MODE_WIDER_MODE (mode);
3606 size = GET_MODE_BITSIZE (mode);
3608 /* Firstly, try using a multiplication insn that only generates the needed
3609 high part of the product, and in the sign flavor of unsignedp. */
3610 if (mul_highpart_cost (speed, mode) < max_cost)
3612 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3613 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3614 unsignedp, OPTAB_DIRECT);
3615 if (tem)
3616 return tem;
3619 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3620 Need to adjust the result after the multiplication. */
3621 if (size - 1 < BITS_PER_WORD
3622 && (mul_highpart_cost (speed, mode)
3623 + 2 * shift_cost (speed, mode, size-1)
3624 + 4 * add_cost (speed, mode) < max_cost))
3626 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3627 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3628 unsignedp, OPTAB_DIRECT);
3629 if (tem)
3630 /* We used the wrong signedness. Adjust the result. */
3631 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3632 tem, unsignedp);
3635 /* Try widening multiplication. */
3636 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3637 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3638 && mul_widen_cost (speed, wider_mode) < max_cost)
3640 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3641 unsignedp, OPTAB_WIDEN);
3642 if (tem)
3643 return extract_high_half (mode, tem);
3646 /* Try widening the mode and perform a non-widening multiplication. */
3647 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3648 && size - 1 < BITS_PER_WORD
3649 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3650 < max_cost))
3652 rtx_insn *insns;
3653 rtx wop0, wop1;
3655 /* We need to widen the operands, for example to ensure the
3656 constant multiplier is correctly sign or zero extended.
3657 Use a sequence to clean-up any instructions emitted by
3658 the conversions if things don't work out. */
3659 start_sequence ();
3660 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3661 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3662 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3663 unsignedp, OPTAB_WIDEN);
3664 insns = get_insns ();
3665 end_sequence ();
3667 if (tem)
3669 emit_insn (insns);
3670 return extract_high_half (mode, tem);
3674 /* Try widening multiplication of opposite signedness, and adjust. */
3675 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3676 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3677 && size - 1 < BITS_PER_WORD
3678 && (mul_widen_cost (speed, wider_mode)
3679 + 2 * shift_cost (speed, mode, size-1)
3680 + 4 * add_cost (speed, mode) < max_cost))
3682 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3683 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3684 if (tem != 0)
3686 tem = extract_high_half (mode, tem);
3687 /* We used the wrong signedness. Adjust the result. */
3688 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3689 target, unsignedp);
3693 return 0;
3696 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3697 putting the high half of the result in TARGET if that is convenient,
3698 and return where the result is. If the operation can not be performed,
3699 0 is returned.
3701 MODE is the mode of operation and result.
3703 UNSIGNEDP nonzero means unsigned multiply.
3705 MAX_COST is the total allowed cost for the expanded RTL. */
3707 static rtx
3708 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3709 rtx target, int unsignedp, int max_cost)
3711 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3712 unsigned HOST_WIDE_INT cnst1;
3713 int extra_cost;
3714 bool sign_adjust = false;
3715 enum mult_variant variant;
3716 struct algorithm alg;
3717 rtx tem;
3718 bool speed = optimize_insn_for_speed_p ();
3720 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3721 /* We can't support modes wider than HOST_BITS_PER_INT. */
3722 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3724 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3726 /* We can't optimize modes wider than BITS_PER_WORD.
3727 ??? We might be able to perform double-word arithmetic if
3728 mode == word_mode, however all the cost calculations in
3729 synth_mult etc. assume single-word operations. */
3730 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3731 return expmed_mult_highpart_optab (mode, op0, op1, target,
3732 unsignedp, max_cost);
3734 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3736 /* Check whether we try to multiply by a negative constant. */
3737 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3739 sign_adjust = true;
3740 extra_cost += add_cost (speed, mode);
3743 /* See whether shift/add multiplication is cheap enough. */
3744 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3745 max_cost - extra_cost))
3747 /* See whether the specialized multiplication optabs are
3748 cheaper than the shift/add version. */
3749 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3750 alg.cost.cost + extra_cost);
3751 if (tem)
3752 return tem;
3754 tem = convert_to_mode (wider_mode, op0, unsignedp);
3755 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3756 tem = extract_high_half (mode, tem);
3758 /* Adjust result for signedness. */
3759 if (sign_adjust)
3760 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3762 return tem;
3764 return expmed_mult_highpart_optab (mode, op0, op1, target,
3765 unsignedp, max_cost);
3769 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3771 static rtx
3772 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3774 rtx result, temp, shift;
3775 rtx_code_label *label;
3776 int logd;
3777 int prec = GET_MODE_PRECISION (mode);
3779 logd = floor_log2 (d);
3780 result = gen_reg_rtx (mode);
3782 /* Avoid conditional branches when they're expensive. */
3783 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3784 && optimize_insn_for_speed_p ())
3786 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3787 mode, 0, -1);
3788 if (signmask)
3790 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3791 signmask = force_reg (mode, signmask);
3792 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3794 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3795 which instruction sequence to use. If logical right shifts
3796 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3797 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3799 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3800 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3801 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3802 > COSTS_N_INSNS (2)))
3804 temp = expand_binop (mode, xor_optab, op0, signmask,
3805 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3806 temp = expand_binop (mode, sub_optab, temp, signmask,
3807 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3808 temp = expand_binop (mode, and_optab, temp,
3809 gen_int_mode (masklow, mode),
3810 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3811 temp = expand_binop (mode, xor_optab, temp, signmask,
3812 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3813 temp = expand_binop (mode, sub_optab, temp, signmask,
3814 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3816 else
3818 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3819 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3820 signmask = force_reg (mode, signmask);
3822 temp = expand_binop (mode, add_optab, op0, signmask,
3823 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3824 temp = expand_binop (mode, and_optab, temp,
3825 gen_int_mode (masklow, mode),
3826 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3827 temp = expand_binop (mode, sub_optab, temp, signmask,
3828 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3830 return temp;
3834 /* Mask contains the mode's signbit and the significant bits of the
3835 modulus. By including the signbit in the operation, many targets
3836 can avoid an explicit compare operation in the following comparison
3837 against zero. */
3838 wide_int mask = wi::mask (logd, false, prec);
3839 mask = wi::set_bit (mask, prec - 1);
3841 temp = expand_binop (mode, and_optab, op0,
3842 immed_wide_int_const (mask, mode),
3843 result, 1, OPTAB_LIB_WIDEN);
3844 if (temp != result)
3845 emit_move_insn (result, temp);
3847 label = gen_label_rtx ();
3848 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3850 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3851 0, OPTAB_LIB_WIDEN);
3853 mask = wi::mask (logd, true, prec);
3854 temp = expand_binop (mode, ior_optab, temp,
3855 immed_wide_int_const (mask, mode),
3856 result, 1, OPTAB_LIB_WIDEN);
3857 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3858 0, OPTAB_LIB_WIDEN);
3859 if (temp != result)
3860 emit_move_insn (result, temp);
3861 emit_label (label);
3862 return result;
3865 /* Expand signed division of OP0 by a power of two D in mode MODE.
3866 This routine is only called for positive values of D. */
3868 static rtx
3869 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3871 rtx temp;
3872 rtx_code_label *label;
3873 int logd;
3875 logd = floor_log2 (d);
3877 if (d == 2
3878 && BRANCH_COST (optimize_insn_for_speed_p (),
3879 false) >= 1)
3881 temp = gen_reg_rtx (mode);
3882 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3883 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3884 0, OPTAB_LIB_WIDEN);
3885 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3888 if (HAVE_conditional_move
3889 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3891 rtx temp2;
3893 start_sequence ();
3894 temp2 = copy_to_mode_reg (mode, op0);
3895 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3896 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3897 temp = force_reg (mode, temp);
3899 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3900 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3901 mode, temp, temp2, mode, 0);
3902 if (temp2)
3904 rtx_insn *seq = get_insns ();
3905 end_sequence ();
3906 emit_insn (seq);
3907 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3909 end_sequence ();
3912 if (BRANCH_COST (optimize_insn_for_speed_p (),
3913 false) >= 2)
3915 int ushift = GET_MODE_BITSIZE (mode) - logd;
3917 temp = gen_reg_rtx (mode);
3918 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3919 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3920 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3921 > COSTS_N_INSNS (1))
3922 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3923 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3924 else
3925 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3926 ushift, NULL_RTX, 1);
3927 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3928 0, OPTAB_LIB_WIDEN);
3929 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3932 label = gen_label_rtx ();
3933 temp = copy_to_mode_reg (mode, op0);
3934 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3935 expand_inc (temp, gen_int_mode (d - 1, mode));
3936 emit_label (label);
3937 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3940 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3941 if that is convenient, and returning where the result is.
3942 You may request either the quotient or the remainder as the result;
3943 specify REM_FLAG nonzero to get the remainder.
3945 CODE is the expression code for which kind of division this is;
3946 it controls how rounding is done. MODE is the machine mode to use.
3947 UNSIGNEDP nonzero means do unsigned division. */
3949 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3950 and then correct it by or'ing in missing high bits
3951 if result of ANDI is nonzero.
3952 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3953 This could optimize to a bfexts instruction.
3954 But C doesn't use these operations, so their optimizations are
3955 left for later. */
3956 /* ??? For modulo, we don't actually need the highpart of the first product,
3957 the low part will do nicely. And for small divisors, the second multiply
3958 can also be a low-part only multiply or even be completely left out.
3959 E.g. to calculate the remainder of a division by 3 with a 32 bit
3960 multiply, multiply with 0x55555556 and extract the upper two bits;
3961 the result is exact for inputs up to 0x1fffffff.
3962 The input range can be reduced by using cross-sum rules.
3963 For odd divisors >= 3, the following table gives right shift counts
3964 so that if a number is shifted by an integer multiple of the given
3965 amount, the remainder stays the same:
3966 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3967 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3968 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3969 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3970 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3972 Cross-sum rules for even numbers can be derived by leaving as many bits
3973 to the right alone as the divisor has zeros to the right.
3974 E.g. if x is an unsigned 32 bit number:
3975 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3979 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3980 rtx op0, rtx op1, rtx target, int unsignedp)
3982 machine_mode compute_mode;
3983 rtx tquotient;
3984 rtx quotient = 0, remainder = 0;
3985 rtx_insn *last;
3986 int size;
3987 rtx_insn *insn;
3988 optab optab1, optab2;
3989 int op1_is_constant, op1_is_pow2 = 0;
3990 int max_cost, extra_cost;
3991 static HOST_WIDE_INT last_div_const = 0;
3992 bool speed = optimize_insn_for_speed_p ();
3994 op1_is_constant = CONST_INT_P (op1);
3995 if (op1_is_constant)
3997 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3998 if (unsignedp)
3999 ext_op1 &= GET_MODE_MASK (mode);
4000 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
4001 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
4005 This is the structure of expand_divmod:
4007 First comes code to fix up the operands so we can perform the operations
4008 correctly and efficiently.
4010 Second comes a switch statement with code specific for each rounding mode.
4011 For some special operands this code emits all RTL for the desired
4012 operation, for other cases, it generates only a quotient and stores it in
4013 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4014 to indicate that it has not done anything.
4016 Last comes code that finishes the operation. If QUOTIENT is set and
4017 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4018 QUOTIENT is not set, it is computed using trunc rounding.
4020 We try to generate special code for division and remainder when OP1 is a
4021 constant. If |OP1| = 2**n we can use shifts and some other fast
4022 operations. For other values of OP1, we compute a carefully selected
4023 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4024 by m.
4026 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4027 half of the product. Different strategies for generating the product are
4028 implemented in expmed_mult_highpart.
4030 If what we actually want is the remainder, we generate that by another
4031 by-constant multiplication and a subtraction. */
4033 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4034 code below will malfunction if we are, so check here and handle
4035 the special case if so. */
4036 if (op1 == const1_rtx)
4037 return rem_flag ? const0_rtx : op0;
4039 /* When dividing by -1, we could get an overflow.
4040 negv_optab can handle overflows. */
4041 if (! unsignedp && op1 == constm1_rtx)
4043 if (rem_flag)
4044 return const0_rtx;
4045 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4046 ? negv_optab : neg_optab, op0, target, 0);
4049 if (target
4050 /* Don't use the function value register as a target
4051 since we have to read it as well as write it,
4052 and function-inlining gets confused by this. */
4053 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4054 /* Don't clobber an operand while doing a multi-step calculation. */
4055 || ((rem_flag || op1_is_constant)
4056 && (reg_mentioned_p (target, op0)
4057 || (MEM_P (op0) && MEM_P (target))))
4058 || reg_mentioned_p (target, op1)
4059 || (MEM_P (op1) && MEM_P (target))))
4060 target = 0;
4062 /* Get the mode in which to perform this computation. Normally it will
4063 be MODE, but sometimes we can't do the desired operation in MODE.
4064 If so, pick a wider mode in which we can do the operation. Convert
4065 to that mode at the start to avoid repeated conversions.
4067 First see what operations we need. These depend on the expression
4068 we are evaluating. (We assume that divxx3 insns exist under the
4069 same conditions that modxx3 insns and that these insns don't normally
4070 fail. If these assumptions are not correct, we may generate less
4071 efficient code in some cases.)
4073 Then see if we find a mode in which we can open-code that operation
4074 (either a division, modulus, or shift). Finally, check for the smallest
4075 mode for which we can do the operation with a library call. */
4077 /* We might want to refine this now that we have division-by-constant
4078 optimization. Since expmed_mult_highpart tries so many variants, it is
4079 not straightforward to generalize this. Maybe we should make an array
4080 of possible modes in init_expmed? Save this for GCC 2.7. */
4082 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4083 ? (unsignedp ? lshr_optab : ashr_optab)
4084 : (unsignedp ? udiv_optab : sdiv_optab));
4085 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4086 ? optab1
4087 : (unsignedp ? udivmod_optab : sdivmod_optab));
4089 for (compute_mode = mode; compute_mode != VOIDmode;
4090 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4091 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4092 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4093 break;
4095 if (compute_mode == VOIDmode)
4096 for (compute_mode = mode; compute_mode != VOIDmode;
4097 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4098 if (optab_libfunc (optab1, compute_mode)
4099 || optab_libfunc (optab2, compute_mode))
4100 break;
4102 /* If we still couldn't find a mode, use MODE, but expand_binop will
4103 probably die. */
4104 if (compute_mode == VOIDmode)
4105 compute_mode = mode;
4107 if (target && GET_MODE (target) == compute_mode)
4108 tquotient = target;
4109 else
4110 tquotient = gen_reg_rtx (compute_mode);
4112 size = GET_MODE_BITSIZE (compute_mode);
4113 #if 0
4114 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4115 (mode), and thereby get better code when OP1 is a constant. Do that
4116 later. It will require going over all usages of SIZE below. */
4117 size = GET_MODE_BITSIZE (mode);
4118 #endif
4120 /* Only deduct something for a REM if the last divide done was
4121 for a different constant. Then set the constant of the last
4122 divide. */
4123 max_cost = (unsignedp
4124 ? udiv_cost (speed, compute_mode)
4125 : sdiv_cost (speed, compute_mode));
4126 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4127 && INTVAL (op1) == last_div_const))
4128 max_cost -= (mul_cost (speed, compute_mode)
4129 + add_cost (speed, compute_mode));
4131 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4133 /* Now convert to the best mode to use. */
4134 if (compute_mode != mode)
4136 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4137 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4139 /* convert_modes may have placed op1 into a register, so we
4140 must recompute the following. */
4141 op1_is_constant = CONST_INT_P (op1);
4142 op1_is_pow2 = (op1_is_constant
4143 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4144 || (! unsignedp
4145 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4148 /* If one of the operands is a volatile MEM, copy it into a register. */
4150 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4151 op0 = force_reg (compute_mode, op0);
4152 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4153 op1 = force_reg (compute_mode, op1);
4155 /* If we need the remainder or if OP1 is constant, we need to
4156 put OP0 in a register in case it has any queued subexpressions. */
4157 if (rem_flag || op1_is_constant)
4158 op0 = force_reg (compute_mode, op0);
4160 last = get_last_insn ();
4162 /* Promote floor rounding to trunc rounding for unsigned operations. */
4163 if (unsignedp)
4165 if (code == FLOOR_DIV_EXPR)
4166 code = TRUNC_DIV_EXPR;
4167 if (code == FLOOR_MOD_EXPR)
4168 code = TRUNC_MOD_EXPR;
4169 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4170 code = TRUNC_DIV_EXPR;
4173 if (op1 != const0_rtx)
4174 switch (code)
4176 case TRUNC_MOD_EXPR:
4177 case TRUNC_DIV_EXPR:
4178 if (op1_is_constant)
4180 if (unsignedp)
4182 unsigned HOST_WIDE_INT mh, ml;
4183 int pre_shift, post_shift;
4184 int dummy;
4185 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4186 & GET_MODE_MASK (compute_mode));
4188 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4190 pre_shift = floor_log2 (d);
4191 if (rem_flag)
4193 unsigned HOST_WIDE_INT mask
4194 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4195 remainder
4196 = expand_binop (compute_mode, and_optab, op0,
4197 gen_int_mode (mask, compute_mode),
4198 remainder, 1,
4199 OPTAB_LIB_WIDEN);
4200 if (remainder)
4201 return gen_lowpart (mode, remainder);
4203 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4204 pre_shift, tquotient, 1);
4206 else if (size <= HOST_BITS_PER_WIDE_INT)
4208 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4210 /* Most significant bit of divisor is set; emit an scc
4211 insn. */
4212 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4213 compute_mode, 1, 1);
4215 else
4217 /* Find a suitable multiplier and right shift count
4218 instead of multiplying with D. */
4220 mh = choose_multiplier (d, size, size,
4221 &ml, &post_shift, &dummy);
4223 /* If the suggested multiplier is more than SIZE bits,
4224 we can do better for even divisors, using an
4225 initial right shift. */
4226 if (mh != 0 && (d & 1) == 0)
4228 pre_shift = ctz_or_zero (d);
4229 mh = choose_multiplier (d >> pre_shift, size,
4230 size - pre_shift,
4231 &ml, &post_shift, &dummy);
4232 gcc_assert (!mh);
4234 else
4235 pre_shift = 0;
4237 if (mh != 0)
4239 rtx t1, t2, t3, t4;
4241 if (post_shift - 1 >= BITS_PER_WORD)
4242 goto fail1;
4244 extra_cost
4245 = (shift_cost (speed, compute_mode, post_shift - 1)
4246 + shift_cost (speed, compute_mode, 1)
4247 + 2 * add_cost (speed, compute_mode));
4248 t1 = expmed_mult_highpart
4249 (compute_mode, op0,
4250 gen_int_mode (ml, compute_mode),
4251 NULL_RTX, 1, max_cost - extra_cost);
4252 if (t1 == 0)
4253 goto fail1;
4254 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4255 op0, t1),
4256 NULL_RTX);
4257 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4258 t2, 1, NULL_RTX, 1);
4259 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4260 t1, t3),
4261 NULL_RTX);
4262 quotient = expand_shift
4263 (RSHIFT_EXPR, compute_mode, t4,
4264 post_shift - 1, tquotient, 1);
4266 else
4268 rtx t1, t2;
4270 if (pre_shift >= BITS_PER_WORD
4271 || post_shift >= BITS_PER_WORD)
4272 goto fail1;
4274 t1 = expand_shift
4275 (RSHIFT_EXPR, compute_mode, op0,
4276 pre_shift, NULL_RTX, 1);
4277 extra_cost
4278 = (shift_cost (speed, compute_mode, pre_shift)
4279 + shift_cost (speed, compute_mode, post_shift));
4280 t2 = expmed_mult_highpart
4281 (compute_mode, t1,
4282 gen_int_mode (ml, compute_mode),
4283 NULL_RTX, 1, max_cost - extra_cost);
4284 if (t2 == 0)
4285 goto fail1;
4286 quotient = expand_shift
4287 (RSHIFT_EXPR, compute_mode, t2,
4288 post_shift, tquotient, 1);
4292 else /* Too wide mode to use tricky code */
4293 break;
4295 insn = get_last_insn ();
4296 if (insn != last)
4297 set_dst_reg_note (insn, REG_EQUAL,
4298 gen_rtx_UDIV (compute_mode, op0, op1),
4299 quotient);
4301 else /* TRUNC_DIV, signed */
4303 unsigned HOST_WIDE_INT ml;
4304 int lgup, post_shift;
4305 rtx mlr;
4306 HOST_WIDE_INT d = INTVAL (op1);
4307 unsigned HOST_WIDE_INT abs_d;
4309 /* Since d might be INT_MIN, we have to cast to
4310 unsigned HOST_WIDE_INT before negating to avoid
4311 undefined signed overflow. */
4312 abs_d = (d >= 0
4313 ? (unsigned HOST_WIDE_INT) d
4314 : - (unsigned HOST_WIDE_INT) d);
4316 /* n rem d = n rem -d */
4317 if (rem_flag && d < 0)
4319 d = abs_d;
4320 op1 = gen_int_mode (abs_d, compute_mode);
4323 if (d == 1)
4324 quotient = op0;
4325 else if (d == -1)
4326 quotient = expand_unop (compute_mode, neg_optab, op0,
4327 tquotient, 0);
4328 else if (HOST_BITS_PER_WIDE_INT >= size
4329 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4331 /* This case is not handled correctly below. */
4332 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4333 compute_mode, 1, 1);
4334 if (quotient == 0)
4335 goto fail1;
4337 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4338 && (rem_flag
4339 ? smod_pow2_cheap (speed, compute_mode)
4340 : sdiv_pow2_cheap (speed, compute_mode))
4341 /* We assume that cheap metric is true if the
4342 optab has an expander for this mode. */
4343 && ((optab_handler ((rem_flag ? smod_optab
4344 : sdiv_optab),
4345 compute_mode)
4346 != CODE_FOR_nothing)
4347 || (optab_handler (sdivmod_optab,
4348 compute_mode)
4349 != CODE_FOR_nothing)))
4351 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4353 if (rem_flag)
4355 remainder = expand_smod_pow2 (compute_mode, op0, d);
4356 if (remainder)
4357 return gen_lowpart (mode, remainder);
4360 if (sdiv_pow2_cheap (speed, compute_mode)
4361 && ((optab_handler (sdiv_optab, compute_mode)
4362 != CODE_FOR_nothing)
4363 || (optab_handler (sdivmod_optab, compute_mode)
4364 != CODE_FOR_nothing)))
4365 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4366 compute_mode, op0,
4367 gen_int_mode (abs_d,
4368 compute_mode),
4369 NULL_RTX, 0);
4370 else
4371 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4373 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4374 negate the quotient. */
4375 if (d < 0)
4377 insn = get_last_insn ();
4378 if (insn != last
4379 && abs_d < (HOST_WIDE_INT_1U
4380 << (HOST_BITS_PER_WIDE_INT - 1)))
4381 set_dst_reg_note (insn, REG_EQUAL,
4382 gen_rtx_DIV (compute_mode, op0,
4383 gen_int_mode
4384 (abs_d,
4385 compute_mode)),
4386 quotient);
4388 quotient = expand_unop (compute_mode, neg_optab,
4389 quotient, quotient, 0);
4392 else if (size <= HOST_BITS_PER_WIDE_INT)
4394 choose_multiplier (abs_d, size, size - 1,
4395 &ml, &post_shift, &lgup);
4396 if (ml < HOST_WIDE_INT_1U << (size - 1))
4398 rtx t1, t2, t3;
4400 if (post_shift >= BITS_PER_WORD
4401 || size - 1 >= BITS_PER_WORD)
4402 goto fail1;
4404 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4405 + shift_cost (speed, compute_mode, size - 1)
4406 + add_cost (speed, compute_mode));
4407 t1 = expmed_mult_highpart
4408 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4409 NULL_RTX, 0, max_cost - extra_cost);
4410 if (t1 == 0)
4411 goto fail1;
4412 t2 = expand_shift
4413 (RSHIFT_EXPR, compute_mode, t1,
4414 post_shift, NULL_RTX, 0);
4415 t3 = expand_shift
4416 (RSHIFT_EXPR, compute_mode, op0,
4417 size - 1, NULL_RTX, 0);
4418 if (d < 0)
4419 quotient
4420 = force_operand (gen_rtx_MINUS (compute_mode,
4421 t3, t2),
4422 tquotient);
4423 else
4424 quotient
4425 = force_operand (gen_rtx_MINUS (compute_mode,
4426 t2, t3),
4427 tquotient);
4429 else
4431 rtx t1, t2, t3, t4;
4433 if (post_shift >= BITS_PER_WORD
4434 || size - 1 >= BITS_PER_WORD)
4435 goto fail1;
4437 ml |= HOST_WIDE_INT_M1U << (size - 1);
4438 mlr = gen_int_mode (ml, compute_mode);
4439 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4440 + shift_cost (speed, compute_mode, size - 1)
4441 + 2 * add_cost (speed, compute_mode));
4442 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4443 NULL_RTX, 0,
4444 max_cost - extra_cost);
4445 if (t1 == 0)
4446 goto fail1;
4447 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4448 t1, op0),
4449 NULL_RTX);
4450 t3 = expand_shift
4451 (RSHIFT_EXPR, compute_mode, t2,
4452 post_shift, NULL_RTX, 0);
4453 t4 = expand_shift
4454 (RSHIFT_EXPR, compute_mode, op0,
4455 size - 1, NULL_RTX, 0);
4456 if (d < 0)
4457 quotient
4458 = force_operand (gen_rtx_MINUS (compute_mode,
4459 t4, t3),
4460 tquotient);
4461 else
4462 quotient
4463 = force_operand (gen_rtx_MINUS (compute_mode,
4464 t3, t4),
4465 tquotient);
4468 else /* Too wide mode to use tricky code */
4469 break;
4471 insn = get_last_insn ();
4472 if (insn != last)
4473 set_dst_reg_note (insn, REG_EQUAL,
4474 gen_rtx_DIV (compute_mode, op0, op1),
4475 quotient);
4477 break;
4479 fail1:
4480 delete_insns_since (last);
4481 break;
4483 case FLOOR_DIV_EXPR:
4484 case FLOOR_MOD_EXPR:
4485 /* We will come here only for signed operations. */
4486 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4488 unsigned HOST_WIDE_INT mh, ml;
4489 int pre_shift, lgup, post_shift;
4490 HOST_WIDE_INT d = INTVAL (op1);
4492 if (d > 0)
4494 /* We could just as easily deal with negative constants here,
4495 but it does not seem worth the trouble for GCC 2.6. */
4496 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4498 pre_shift = floor_log2 (d);
4499 if (rem_flag)
4501 unsigned HOST_WIDE_INT mask
4502 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4503 remainder = expand_binop
4504 (compute_mode, and_optab, op0,
4505 gen_int_mode (mask, compute_mode),
4506 remainder, 0, OPTAB_LIB_WIDEN);
4507 if (remainder)
4508 return gen_lowpart (mode, remainder);
4510 quotient = expand_shift
4511 (RSHIFT_EXPR, compute_mode, op0,
4512 pre_shift, tquotient, 0);
4514 else
4516 rtx t1, t2, t3, t4;
4518 mh = choose_multiplier (d, size, size - 1,
4519 &ml, &post_shift, &lgup);
4520 gcc_assert (!mh);
4522 if (post_shift < BITS_PER_WORD
4523 && size - 1 < BITS_PER_WORD)
4525 t1 = expand_shift
4526 (RSHIFT_EXPR, compute_mode, op0,
4527 size - 1, NULL_RTX, 0);
4528 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4529 NULL_RTX, 0, OPTAB_WIDEN);
4530 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4531 + shift_cost (speed, compute_mode, size - 1)
4532 + 2 * add_cost (speed, compute_mode));
4533 t3 = expmed_mult_highpart
4534 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4535 NULL_RTX, 1, max_cost - extra_cost);
4536 if (t3 != 0)
4538 t4 = expand_shift
4539 (RSHIFT_EXPR, compute_mode, t3,
4540 post_shift, NULL_RTX, 1);
4541 quotient = expand_binop (compute_mode, xor_optab,
4542 t4, t1, tquotient, 0,
4543 OPTAB_WIDEN);
4548 else
4550 rtx nsign, t1, t2, t3, t4;
4551 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4552 op0, constm1_rtx), NULL_RTX);
4553 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4554 0, OPTAB_WIDEN);
4555 nsign = expand_shift
4556 (RSHIFT_EXPR, compute_mode, t2,
4557 size - 1, NULL_RTX, 0);
4558 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4559 NULL_RTX);
4560 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4561 NULL_RTX, 0);
4562 if (t4)
4564 rtx t5;
4565 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4566 NULL_RTX, 0);
4567 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4568 t4, t5),
4569 tquotient);
4574 if (quotient != 0)
4575 break;
4576 delete_insns_since (last);
4578 /* Try using an instruction that produces both the quotient and
4579 remainder, using truncation. We can easily compensate the quotient
4580 or remainder to get floor rounding, once we have the remainder.
4581 Notice that we compute also the final remainder value here,
4582 and return the result right away. */
4583 if (target == 0 || GET_MODE (target) != compute_mode)
4584 target = gen_reg_rtx (compute_mode);
4586 if (rem_flag)
4588 remainder
4589 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4590 quotient = gen_reg_rtx (compute_mode);
4592 else
4594 quotient
4595 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4596 remainder = gen_reg_rtx (compute_mode);
4599 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4600 quotient, remainder, 0))
4602 /* This could be computed with a branch-less sequence.
4603 Save that for later. */
4604 rtx tem;
4605 rtx_code_label *label = gen_label_rtx ();
4606 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4607 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4608 NULL_RTX, 0, OPTAB_WIDEN);
4609 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4610 expand_dec (quotient, const1_rtx);
4611 expand_inc (remainder, op1);
4612 emit_label (label);
4613 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4616 /* No luck with division elimination or divmod. Have to do it
4617 by conditionally adjusting op0 *and* the result. */
4619 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4620 rtx adjusted_op0;
4621 rtx tem;
4623 quotient = gen_reg_rtx (compute_mode);
4624 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4625 label1 = gen_label_rtx ();
4626 label2 = gen_label_rtx ();
4627 label3 = gen_label_rtx ();
4628 label4 = gen_label_rtx ();
4629 label5 = gen_label_rtx ();
4630 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4631 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4632 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4633 quotient, 0, OPTAB_LIB_WIDEN);
4634 if (tem != quotient)
4635 emit_move_insn (quotient, tem);
4636 emit_jump_insn (targetm.gen_jump (label5));
4637 emit_barrier ();
4638 emit_label (label1);
4639 expand_inc (adjusted_op0, const1_rtx);
4640 emit_jump_insn (targetm.gen_jump (label4));
4641 emit_barrier ();
4642 emit_label (label2);
4643 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4644 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4645 quotient, 0, OPTAB_LIB_WIDEN);
4646 if (tem != quotient)
4647 emit_move_insn (quotient, tem);
4648 emit_jump_insn (targetm.gen_jump (label5));
4649 emit_barrier ();
4650 emit_label (label3);
4651 expand_dec (adjusted_op0, const1_rtx);
4652 emit_label (label4);
4653 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4654 quotient, 0, OPTAB_LIB_WIDEN);
4655 if (tem != quotient)
4656 emit_move_insn (quotient, tem);
4657 expand_dec (quotient, const1_rtx);
4658 emit_label (label5);
4660 break;
4662 case CEIL_DIV_EXPR:
4663 case CEIL_MOD_EXPR:
4664 if (unsignedp)
4666 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4668 rtx t1, t2, t3;
4669 unsigned HOST_WIDE_INT d = INTVAL (op1);
4670 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4671 floor_log2 (d), tquotient, 1);
4672 t2 = expand_binop (compute_mode, and_optab, op0,
4673 gen_int_mode (d - 1, compute_mode),
4674 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4675 t3 = gen_reg_rtx (compute_mode);
4676 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4677 compute_mode, 1, 1);
4678 if (t3 == 0)
4680 rtx_code_label *lab;
4681 lab = gen_label_rtx ();
4682 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4683 expand_inc (t1, const1_rtx);
4684 emit_label (lab);
4685 quotient = t1;
4687 else
4688 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4689 t1, t3),
4690 tquotient);
4691 break;
4694 /* Try using an instruction that produces both the quotient and
4695 remainder, using truncation. We can easily compensate the
4696 quotient or remainder to get ceiling rounding, once we have the
4697 remainder. Notice that we compute also the final remainder
4698 value here, and return the result right away. */
4699 if (target == 0 || GET_MODE (target) != compute_mode)
4700 target = gen_reg_rtx (compute_mode);
4702 if (rem_flag)
4704 remainder = (REG_P (target)
4705 ? target : gen_reg_rtx (compute_mode));
4706 quotient = gen_reg_rtx (compute_mode);
4708 else
4710 quotient = (REG_P (target)
4711 ? target : gen_reg_rtx (compute_mode));
4712 remainder = gen_reg_rtx (compute_mode);
4715 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4716 remainder, 1))
4718 /* This could be computed with a branch-less sequence.
4719 Save that for later. */
4720 rtx_code_label *label = gen_label_rtx ();
4721 do_cmp_and_jump (remainder, const0_rtx, EQ,
4722 compute_mode, label);
4723 expand_inc (quotient, const1_rtx);
4724 expand_dec (remainder, op1);
4725 emit_label (label);
4726 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4729 /* No luck with division elimination or divmod. Have to do it
4730 by conditionally adjusting op0 *and* the result. */
4732 rtx_code_label *label1, *label2;
4733 rtx adjusted_op0, tem;
4735 quotient = gen_reg_rtx (compute_mode);
4736 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4737 label1 = gen_label_rtx ();
4738 label2 = gen_label_rtx ();
4739 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4740 compute_mode, label1);
4741 emit_move_insn (quotient, const0_rtx);
4742 emit_jump_insn (targetm.gen_jump (label2));
4743 emit_barrier ();
4744 emit_label (label1);
4745 expand_dec (adjusted_op0, const1_rtx);
4746 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4747 quotient, 1, OPTAB_LIB_WIDEN);
4748 if (tem != quotient)
4749 emit_move_insn (quotient, tem);
4750 expand_inc (quotient, const1_rtx);
4751 emit_label (label2);
4754 else /* signed */
4756 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4757 && INTVAL (op1) >= 0)
4759 /* This is extremely similar to the code for the unsigned case
4760 above. For 2.7 we should merge these variants, but for
4761 2.6.1 I don't want to touch the code for unsigned since that
4762 get used in C. The signed case will only be used by other
4763 languages (Ada). */
4765 rtx t1, t2, t3;
4766 unsigned HOST_WIDE_INT d = INTVAL (op1);
4767 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4768 floor_log2 (d), tquotient, 0);
4769 t2 = expand_binop (compute_mode, and_optab, op0,
4770 gen_int_mode (d - 1, compute_mode),
4771 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4772 t3 = gen_reg_rtx (compute_mode);
4773 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4774 compute_mode, 1, 1);
4775 if (t3 == 0)
4777 rtx_code_label *lab;
4778 lab = gen_label_rtx ();
4779 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4780 expand_inc (t1, const1_rtx);
4781 emit_label (lab);
4782 quotient = t1;
4784 else
4785 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4786 t1, t3),
4787 tquotient);
4788 break;
4791 /* Try using an instruction that produces both the quotient and
4792 remainder, using truncation. We can easily compensate the
4793 quotient or remainder to get ceiling rounding, once we have the
4794 remainder. Notice that we compute also the final remainder
4795 value here, and return the result right away. */
4796 if (target == 0 || GET_MODE (target) != compute_mode)
4797 target = gen_reg_rtx (compute_mode);
4798 if (rem_flag)
4800 remainder= (REG_P (target)
4801 ? target : gen_reg_rtx (compute_mode));
4802 quotient = gen_reg_rtx (compute_mode);
4804 else
4806 quotient = (REG_P (target)
4807 ? target : gen_reg_rtx (compute_mode));
4808 remainder = gen_reg_rtx (compute_mode);
4811 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4812 remainder, 0))
4814 /* This could be computed with a branch-less sequence.
4815 Save that for later. */
4816 rtx tem;
4817 rtx_code_label *label = gen_label_rtx ();
4818 do_cmp_and_jump (remainder, const0_rtx, EQ,
4819 compute_mode, label);
4820 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4821 NULL_RTX, 0, OPTAB_WIDEN);
4822 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4823 expand_inc (quotient, const1_rtx);
4824 expand_dec (remainder, op1);
4825 emit_label (label);
4826 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4829 /* No luck with division elimination or divmod. Have to do it
4830 by conditionally adjusting op0 *and* the result. */
4832 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4833 rtx adjusted_op0;
4834 rtx tem;
4836 quotient = gen_reg_rtx (compute_mode);
4837 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4838 label1 = gen_label_rtx ();
4839 label2 = gen_label_rtx ();
4840 label3 = gen_label_rtx ();
4841 label4 = gen_label_rtx ();
4842 label5 = gen_label_rtx ();
4843 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4844 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4845 compute_mode, label1);
4846 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4847 quotient, 0, OPTAB_LIB_WIDEN);
4848 if (tem != quotient)
4849 emit_move_insn (quotient, tem);
4850 emit_jump_insn (targetm.gen_jump (label5));
4851 emit_barrier ();
4852 emit_label (label1);
4853 expand_dec (adjusted_op0, const1_rtx);
4854 emit_jump_insn (targetm.gen_jump (label4));
4855 emit_barrier ();
4856 emit_label (label2);
4857 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4858 compute_mode, label3);
4859 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4860 quotient, 0, OPTAB_LIB_WIDEN);
4861 if (tem != quotient)
4862 emit_move_insn (quotient, tem);
4863 emit_jump_insn (targetm.gen_jump (label5));
4864 emit_barrier ();
4865 emit_label (label3);
4866 expand_inc (adjusted_op0, const1_rtx);
4867 emit_label (label4);
4868 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4869 quotient, 0, OPTAB_LIB_WIDEN);
4870 if (tem != quotient)
4871 emit_move_insn (quotient, tem);
4872 expand_inc (quotient, const1_rtx);
4873 emit_label (label5);
4876 break;
4878 case EXACT_DIV_EXPR:
4879 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4881 HOST_WIDE_INT d = INTVAL (op1);
4882 unsigned HOST_WIDE_INT ml;
4883 int pre_shift;
4884 rtx t1;
4886 pre_shift = ctz_or_zero (d);
4887 ml = invert_mod2n (d >> pre_shift, size);
4888 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4889 pre_shift, NULL_RTX, unsignedp);
4890 quotient = expand_mult (compute_mode, t1,
4891 gen_int_mode (ml, compute_mode),
4892 NULL_RTX, 1);
4894 insn = get_last_insn ();
4895 set_dst_reg_note (insn, REG_EQUAL,
4896 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4897 compute_mode, op0, op1),
4898 quotient);
4900 break;
4902 case ROUND_DIV_EXPR:
4903 case ROUND_MOD_EXPR:
4904 if (unsignedp)
4906 rtx tem;
4907 rtx_code_label *label;
4908 label = gen_label_rtx ();
4909 quotient = gen_reg_rtx (compute_mode);
4910 remainder = gen_reg_rtx (compute_mode);
4911 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4913 rtx tem;
4914 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4915 quotient, 1, OPTAB_LIB_WIDEN);
4916 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4917 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4918 remainder, 1, OPTAB_LIB_WIDEN);
4920 tem = plus_constant (compute_mode, op1, -1);
4921 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4922 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4923 expand_inc (quotient, const1_rtx);
4924 expand_dec (remainder, op1);
4925 emit_label (label);
4927 else
4929 rtx abs_rem, abs_op1, tem, mask;
4930 rtx_code_label *label;
4931 label = gen_label_rtx ();
4932 quotient = gen_reg_rtx (compute_mode);
4933 remainder = gen_reg_rtx (compute_mode);
4934 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4936 rtx tem;
4937 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4938 quotient, 0, OPTAB_LIB_WIDEN);
4939 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4940 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4941 remainder, 0, OPTAB_LIB_WIDEN);
4943 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4944 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4945 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4946 1, NULL_RTX, 1);
4947 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4948 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4949 NULL_RTX, 0, OPTAB_WIDEN);
4950 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4951 size - 1, NULL_RTX, 0);
4952 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4953 NULL_RTX, 0, OPTAB_WIDEN);
4954 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4955 NULL_RTX, 0, OPTAB_WIDEN);
4956 expand_inc (quotient, tem);
4957 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4958 NULL_RTX, 0, OPTAB_WIDEN);
4959 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4960 NULL_RTX, 0, OPTAB_WIDEN);
4961 expand_dec (remainder, tem);
4962 emit_label (label);
4964 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4966 default:
4967 gcc_unreachable ();
4970 if (quotient == 0)
4972 if (target && GET_MODE (target) != compute_mode)
4973 target = 0;
4975 if (rem_flag)
4977 /* Try to produce the remainder without producing the quotient.
4978 If we seem to have a divmod pattern that does not require widening,
4979 don't try widening here. We should really have a WIDEN argument
4980 to expand_twoval_binop, since what we'd really like to do here is
4981 1) try a mod insn in compute_mode
4982 2) try a divmod insn in compute_mode
4983 3) try a div insn in compute_mode and multiply-subtract to get
4984 remainder
4985 4) try the same things with widening allowed. */
4986 remainder
4987 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4988 op0, op1, target,
4989 unsignedp,
4990 ((optab_handler (optab2, compute_mode)
4991 != CODE_FOR_nothing)
4992 ? OPTAB_DIRECT : OPTAB_WIDEN));
4993 if (remainder == 0)
4995 /* No luck there. Can we do remainder and divide at once
4996 without a library call? */
4997 remainder = gen_reg_rtx (compute_mode);
4998 if (! expand_twoval_binop ((unsignedp
4999 ? udivmod_optab
5000 : sdivmod_optab),
5001 op0, op1,
5002 NULL_RTX, remainder, unsignedp))
5003 remainder = 0;
5006 if (remainder)
5007 return gen_lowpart (mode, remainder);
5010 /* Produce the quotient. Try a quotient insn, but not a library call.
5011 If we have a divmod in this mode, use it in preference to widening
5012 the div (for this test we assume it will not fail). Note that optab2
5013 is set to the one of the two optabs that the call below will use. */
5014 quotient
5015 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5016 op0, op1, rem_flag ? NULL_RTX : target,
5017 unsignedp,
5018 ((optab_handler (optab2, compute_mode)
5019 != CODE_FOR_nothing)
5020 ? OPTAB_DIRECT : OPTAB_WIDEN));
5022 if (quotient == 0)
5024 /* No luck there. Try a quotient-and-remainder insn,
5025 keeping the quotient alone. */
5026 quotient = gen_reg_rtx (compute_mode);
5027 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5028 op0, op1,
5029 quotient, NULL_RTX, unsignedp))
5031 quotient = 0;
5032 if (! rem_flag)
5033 /* Still no luck. If we are not computing the remainder,
5034 use a library call for the quotient. */
5035 quotient = sign_expand_binop (compute_mode,
5036 udiv_optab, sdiv_optab,
5037 op0, op1, target,
5038 unsignedp, OPTAB_LIB_WIDEN);
5043 if (rem_flag)
5045 if (target && GET_MODE (target) != compute_mode)
5046 target = 0;
5048 if (quotient == 0)
5050 /* No divide instruction either. Use library for remainder. */
5051 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5052 op0, op1, target,
5053 unsignedp, OPTAB_LIB_WIDEN);
5054 /* No remainder function. Try a quotient-and-remainder
5055 function, keeping the remainder. */
5056 if (!remainder)
5058 remainder = gen_reg_rtx (compute_mode);
5059 if (!expand_twoval_binop_libfunc
5060 (unsignedp ? udivmod_optab : sdivmod_optab,
5061 op0, op1,
5062 NULL_RTX, remainder,
5063 unsignedp ? UMOD : MOD))
5064 remainder = NULL_RTX;
5067 else
5069 /* We divided. Now finish doing X - Y * (X / Y). */
5070 remainder = expand_mult (compute_mode, quotient, op1,
5071 NULL_RTX, unsignedp);
5072 remainder = expand_binop (compute_mode, sub_optab, op0,
5073 remainder, target, unsignedp,
5074 OPTAB_LIB_WIDEN);
5078 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5081 /* Return a tree node with data type TYPE, describing the value of X.
5082 Usually this is an VAR_DECL, if there is no obvious better choice.
5083 X may be an expression, however we only support those expressions
5084 generated by loop.c. */
5086 tree
5087 make_tree (tree type, rtx x)
5089 tree t;
5091 switch (GET_CODE (x))
5093 case CONST_INT:
5094 case CONST_WIDE_INT:
5095 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5096 return t;
5098 case CONST_DOUBLE:
5099 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5100 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5101 t = wide_int_to_tree (type,
5102 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5103 HOST_BITS_PER_WIDE_INT * 2));
5104 else
5105 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5107 return t;
5109 case CONST_VECTOR:
5111 int units = CONST_VECTOR_NUNITS (x);
5112 tree itype = TREE_TYPE (type);
5113 tree *elts;
5114 int i;
5116 /* Build a tree with vector elements. */
5117 elts = XALLOCAVEC (tree, units);
5118 for (i = units - 1; i >= 0; --i)
5120 rtx elt = CONST_VECTOR_ELT (x, i);
5121 elts[i] = make_tree (itype, elt);
5124 return build_vector (type, elts);
5127 case PLUS:
5128 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5129 make_tree (type, XEXP (x, 1)));
5131 case MINUS:
5132 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5133 make_tree (type, XEXP (x, 1)));
5135 case NEG:
5136 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5138 case MULT:
5139 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5140 make_tree (type, XEXP (x, 1)));
5142 case ASHIFT:
5143 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5144 make_tree (type, XEXP (x, 1)));
5146 case LSHIFTRT:
5147 t = unsigned_type_for (type);
5148 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5149 make_tree (t, XEXP (x, 0)),
5150 make_tree (type, XEXP (x, 1))));
5152 case ASHIFTRT:
5153 t = signed_type_for (type);
5154 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5155 make_tree (t, XEXP (x, 0)),
5156 make_tree (type, XEXP (x, 1))));
5158 case DIV:
5159 if (TREE_CODE (type) != REAL_TYPE)
5160 t = signed_type_for (type);
5161 else
5162 t = type;
5164 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5165 make_tree (t, XEXP (x, 0)),
5166 make_tree (t, XEXP (x, 1))));
5167 case UDIV:
5168 t = unsigned_type_for (type);
5169 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5170 make_tree (t, XEXP (x, 0)),
5171 make_tree (t, XEXP (x, 1))));
5173 case SIGN_EXTEND:
5174 case ZERO_EXTEND:
5175 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5176 GET_CODE (x) == ZERO_EXTEND);
5177 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5179 case CONST:
5180 return make_tree (type, XEXP (x, 0));
5182 case SYMBOL_REF:
5183 t = SYMBOL_REF_DECL (x);
5184 if (t)
5185 return fold_convert (type, build_fold_addr_expr (t));
5186 /* fall through. */
5188 default:
5189 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5191 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5192 address mode to pointer mode. */
5193 if (POINTER_TYPE_P (type))
5194 x = convert_memory_address_addr_space
5195 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5197 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5198 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5199 t->decl_with_rtl.rtl = x;
5201 return t;
5205 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5206 and returning TARGET.
5208 If TARGET is 0, a pseudo-register or constant is returned. */
5211 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5213 rtx tem = 0;
5215 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5216 tem = simplify_binary_operation (AND, mode, op0, op1);
5217 if (tem == 0)
5218 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5220 if (target == 0)
5221 target = tem;
5222 else if (tem != target)
5223 emit_move_insn (target, tem);
5224 return target;
5227 /* Helper function for emit_store_flag. */
5229 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5230 machine_mode mode, machine_mode compare_mode,
5231 int unsignedp, rtx x, rtx y, int normalizep,
5232 machine_mode target_mode)
5234 struct expand_operand ops[4];
5235 rtx op0, comparison, subtarget;
5236 rtx_insn *last;
5237 machine_mode result_mode = targetm.cstore_mode (icode);
5239 last = get_last_insn ();
5240 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5241 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5242 if (!x || !y)
5244 delete_insns_since (last);
5245 return NULL_RTX;
5248 if (target_mode == VOIDmode)
5249 target_mode = result_mode;
5250 if (!target)
5251 target = gen_reg_rtx (target_mode);
5253 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5255 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5256 create_fixed_operand (&ops[1], comparison);
5257 create_fixed_operand (&ops[2], x);
5258 create_fixed_operand (&ops[3], y);
5259 if (!maybe_expand_insn (icode, 4, ops))
5261 delete_insns_since (last);
5262 return NULL_RTX;
5264 subtarget = ops[0].value;
5266 /* If we are converting to a wider mode, first convert to
5267 TARGET_MODE, then normalize. This produces better combining
5268 opportunities on machines that have a SIGN_EXTRACT when we are
5269 testing a single bit. This mostly benefits the 68k.
5271 If STORE_FLAG_VALUE does not have the sign bit set when
5272 interpreted in MODE, we can do this conversion as unsigned, which
5273 is usually more efficient. */
5274 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5276 convert_move (target, subtarget,
5277 val_signbit_known_clear_p (result_mode,
5278 STORE_FLAG_VALUE));
5279 op0 = target;
5280 result_mode = target_mode;
5282 else
5283 op0 = subtarget;
5285 /* If we want to keep subexpressions around, don't reuse our last
5286 target. */
5287 if (optimize)
5288 subtarget = 0;
5290 /* Now normalize to the proper value in MODE. Sometimes we don't
5291 have to do anything. */
5292 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5294 /* STORE_FLAG_VALUE might be the most negative number, so write
5295 the comparison this way to avoid a compiler-time warning. */
5296 else if (- normalizep == STORE_FLAG_VALUE)
5297 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5299 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5300 it hard to use a value of just the sign bit due to ANSI integer
5301 constant typing rules. */
5302 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5303 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5304 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5305 normalizep == 1);
5306 else
5308 gcc_assert (STORE_FLAG_VALUE & 1);
5310 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5311 if (normalizep == -1)
5312 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5315 /* If we were converting to a smaller mode, do the conversion now. */
5316 if (target_mode != result_mode)
5318 convert_move (target, op0, 0);
5319 return target;
5321 else
5322 return op0;
5326 /* A subroutine of emit_store_flag only including "tricks" that do not
5327 need a recursive call. These are kept separate to avoid infinite
5328 loops. */
5330 static rtx
5331 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5332 machine_mode mode, int unsignedp, int normalizep,
5333 machine_mode target_mode)
5335 rtx subtarget;
5336 enum insn_code icode;
5337 machine_mode compare_mode;
5338 enum mode_class mclass;
5339 enum rtx_code scode;
5341 if (unsignedp)
5342 code = unsigned_condition (code);
5343 scode = swap_condition (code);
5345 /* If one operand is constant, make it the second one. Only do this
5346 if the other operand is not constant as well. */
5348 if (swap_commutative_operands_p (op0, op1))
5350 std::swap (op0, op1);
5351 code = swap_condition (code);
5354 if (mode == VOIDmode)
5355 mode = GET_MODE (op0);
5357 /* For some comparisons with 1 and -1, we can convert this to
5358 comparisons with zero. This will often produce more opportunities for
5359 store-flag insns. */
5361 switch (code)
5363 case LT:
5364 if (op1 == const1_rtx)
5365 op1 = const0_rtx, code = LE;
5366 break;
5367 case LE:
5368 if (op1 == constm1_rtx)
5369 op1 = const0_rtx, code = LT;
5370 break;
5371 case GE:
5372 if (op1 == const1_rtx)
5373 op1 = const0_rtx, code = GT;
5374 break;
5375 case GT:
5376 if (op1 == constm1_rtx)
5377 op1 = const0_rtx, code = GE;
5378 break;
5379 case GEU:
5380 if (op1 == const1_rtx)
5381 op1 = const0_rtx, code = NE;
5382 break;
5383 case LTU:
5384 if (op1 == const1_rtx)
5385 op1 = const0_rtx, code = EQ;
5386 break;
5387 default:
5388 break;
5391 /* If we are comparing a double-word integer with zero or -1, we can
5392 convert the comparison into one involving a single word. */
5393 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5394 && GET_MODE_CLASS (mode) == MODE_INT
5395 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5397 rtx tem;
5398 if ((code == EQ || code == NE)
5399 && (op1 == const0_rtx || op1 == constm1_rtx))
5401 rtx op00, op01;
5403 /* Do a logical OR or AND of the two words and compare the
5404 result. */
5405 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5406 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5407 tem = expand_binop (word_mode,
5408 op1 == const0_rtx ? ior_optab : and_optab,
5409 op00, op01, NULL_RTX, unsignedp,
5410 OPTAB_DIRECT);
5412 if (tem != 0)
5413 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5414 unsignedp, normalizep);
5416 else if ((code == LT || code == GE) && op1 == const0_rtx)
5418 rtx op0h;
5420 /* If testing the sign bit, can just test on high word. */
5421 op0h = simplify_gen_subreg (word_mode, op0, mode,
5422 subreg_highpart_offset (word_mode,
5423 mode));
5424 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5425 unsignedp, normalizep);
5427 else
5428 tem = NULL_RTX;
5430 if (tem)
5432 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5433 return tem;
5434 if (!target)
5435 target = gen_reg_rtx (target_mode);
5437 convert_move (target, tem,
5438 !val_signbit_known_set_p (word_mode,
5439 (normalizep ? normalizep
5440 : STORE_FLAG_VALUE)));
5441 return target;
5445 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5446 complement of A (for GE) and shifting the sign bit to the low bit. */
5447 if (op1 == const0_rtx && (code == LT || code == GE)
5448 && GET_MODE_CLASS (mode) == MODE_INT
5449 && (normalizep || STORE_FLAG_VALUE == 1
5450 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5452 subtarget = target;
5454 if (!target)
5455 target_mode = mode;
5457 /* If the result is to be wider than OP0, it is best to convert it
5458 first. If it is to be narrower, it is *incorrect* to convert it
5459 first. */
5460 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5462 op0 = convert_modes (target_mode, mode, op0, 0);
5463 mode = target_mode;
5466 if (target_mode != mode)
5467 subtarget = 0;
5469 if (code == GE)
5470 op0 = expand_unop (mode, one_cmpl_optab, op0,
5471 ((STORE_FLAG_VALUE == 1 || normalizep)
5472 ? 0 : subtarget), 0);
5474 if (STORE_FLAG_VALUE == 1 || normalizep)
5475 /* If we are supposed to produce a 0/1 value, we want to do
5476 a logical shift from the sign bit to the low-order bit; for
5477 a -1/0 value, we do an arithmetic shift. */
5478 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5479 GET_MODE_BITSIZE (mode) - 1,
5480 subtarget, normalizep != -1);
5482 if (mode != target_mode)
5483 op0 = convert_modes (target_mode, mode, op0, 0);
5485 return op0;
5488 mclass = GET_MODE_CLASS (mode);
5489 for (compare_mode = mode; compare_mode != VOIDmode;
5490 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5492 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5493 icode = optab_handler (cstore_optab, optab_mode);
5494 if (icode != CODE_FOR_nothing)
5496 do_pending_stack_adjust ();
5497 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5498 unsignedp, op0, op1, normalizep, target_mode);
5499 if (tem)
5500 return tem;
5502 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5504 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5505 unsignedp, op1, op0, normalizep, target_mode);
5506 if (tem)
5507 return tem;
5509 break;
5513 return 0;
5516 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5517 and storing in TARGET. Normally return TARGET.
5518 Return 0 if that cannot be done.
5520 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5521 it is VOIDmode, they cannot both be CONST_INT.
5523 UNSIGNEDP is for the case where we have to widen the operands
5524 to perform the operation. It says to use zero-extension.
5526 NORMALIZEP is 1 if we should convert the result to be either zero
5527 or one. Normalize is -1 if we should convert the result to be
5528 either zero or -1. If NORMALIZEP is zero, the result will be left
5529 "raw" out of the scc insn. */
5532 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5533 machine_mode mode, int unsignedp, int normalizep)
5535 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5536 enum rtx_code rcode;
5537 rtx subtarget;
5538 rtx tem, trueval;
5539 rtx_insn *last;
5541 /* If we compare constants, we shouldn't use a store-flag operation,
5542 but a constant load. We can get there via the vanilla route that
5543 usually generates a compare-branch sequence, but will in this case
5544 fold the comparison to a constant, and thus elide the branch. */
5545 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5546 return NULL_RTX;
5548 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5549 target_mode);
5550 if (tem)
5551 return tem;
5553 /* If we reached here, we can't do this with a scc insn, however there
5554 are some comparisons that can be done in other ways. Don't do any
5555 of these cases if branches are very cheap. */
5556 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5557 return 0;
5559 /* See what we need to return. We can only return a 1, -1, or the
5560 sign bit. */
5562 if (normalizep == 0)
5564 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5565 normalizep = STORE_FLAG_VALUE;
5567 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5569 else
5570 return 0;
5573 last = get_last_insn ();
5575 /* If optimizing, use different pseudo registers for each insn, instead
5576 of reusing the same pseudo. This leads to better CSE, but slows
5577 down the compiler, since there are more pseudos */
5578 subtarget = (!optimize
5579 && (target_mode == mode)) ? target : NULL_RTX;
5580 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5582 /* For floating-point comparisons, try the reverse comparison or try
5583 changing the "orderedness" of the comparison. */
5584 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5586 enum rtx_code first_code;
5587 bool and_them;
5589 rcode = reverse_condition_maybe_unordered (code);
5590 if (can_compare_p (rcode, mode, ccp_store_flag)
5591 && (code == ORDERED || code == UNORDERED
5592 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5593 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5595 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5596 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5598 /* For the reverse comparison, use either an addition or a XOR. */
5599 if (want_add
5600 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5601 optimize_insn_for_speed_p ()) == 0)
5603 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5604 STORE_FLAG_VALUE, target_mode);
5605 if (tem)
5606 return expand_binop (target_mode, add_optab, tem,
5607 gen_int_mode (normalizep, target_mode),
5608 target, 0, OPTAB_WIDEN);
5610 else if (!want_add
5611 && rtx_cost (trueval, mode, XOR, 1,
5612 optimize_insn_for_speed_p ()) == 0)
5614 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5615 normalizep, target_mode);
5616 if (tem)
5617 return expand_binop (target_mode, xor_optab, tem, trueval,
5618 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5622 delete_insns_since (last);
5624 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5625 if (code == ORDERED || code == UNORDERED)
5626 return 0;
5628 and_them = split_comparison (code, mode, &first_code, &code);
5630 /* If there are no NaNs, the first comparison should always fall through.
5631 Effectively change the comparison to the other one. */
5632 if (!HONOR_NANS (mode))
5634 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5635 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5636 target_mode);
5639 if (!HAVE_conditional_move)
5640 return 0;
5642 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5643 conditional move. */
5644 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5645 normalizep, target_mode);
5646 if (tem == 0)
5647 return 0;
5649 if (and_them)
5650 tem = emit_conditional_move (target, code, op0, op1, mode,
5651 tem, const0_rtx, GET_MODE (tem), 0);
5652 else
5653 tem = emit_conditional_move (target, code, op0, op1, mode,
5654 trueval, tem, GET_MODE (tem), 0);
5656 if (tem == 0)
5657 delete_insns_since (last);
5658 return tem;
5661 /* The remaining tricks only apply to integer comparisons. */
5663 if (GET_MODE_CLASS (mode) != MODE_INT)
5664 return 0;
5666 /* If this is an equality comparison of integers, we can try to exclusive-or
5667 (or subtract) the two operands and use a recursive call to try the
5668 comparison with zero. Don't do any of these cases if branches are
5669 very cheap. */
5671 if ((code == EQ || code == NE) && op1 != const0_rtx)
5673 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5674 OPTAB_WIDEN);
5676 if (tem == 0)
5677 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5678 OPTAB_WIDEN);
5679 if (tem != 0)
5680 tem = emit_store_flag (target, code, tem, const0_rtx,
5681 mode, unsignedp, normalizep);
5682 if (tem != 0)
5683 return tem;
5685 delete_insns_since (last);
5688 /* For integer comparisons, try the reverse comparison. However, for
5689 small X and if we'd have anyway to extend, implementing "X != 0"
5690 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5691 rcode = reverse_condition (code);
5692 if (can_compare_p (rcode, mode, ccp_store_flag)
5693 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5694 && code == NE
5695 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5696 && op1 == const0_rtx))
5698 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5699 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5701 /* Again, for the reverse comparison, use either an addition or a XOR. */
5702 if (want_add
5703 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5704 optimize_insn_for_speed_p ()) == 0)
5706 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5707 STORE_FLAG_VALUE, target_mode);
5708 if (tem != 0)
5709 tem = expand_binop (target_mode, add_optab, tem,
5710 gen_int_mode (normalizep, target_mode),
5711 target, 0, OPTAB_WIDEN);
5713 else if (!want_add
5714 && rtx_cost (trueval, mode, XOR, 1,
5715 optimize_insn_for_speed_p ()) == 0)
5717 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5718 normalizep, target_mode);
5719 if (tem != 0)
5720 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5721 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5724 if (tem != 0)
5725 return tem;
5726 delete_insns_since (last);
5729 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5730 the constant zero. Reject all other comparisons at this point. Only
5731 do LE and GT if branches are expensive since they are expensive on
5732 2-operand machines. */
5734 if (op1 != const0_rtx
5735 || (code != EQ && code != NE
5736 && (BRANCH_COST (optimize_insn_for_speed_p (),
5737 false) <= 1 || (code != LE && code != GT))))
5738 return 0;
5740 /* Try to put the result of the comparison in the sign bit. Assume we can't
5741 do the necessary operation below. */
5743 tem = 0;
5745 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5746 the sign bit set. */
5748 if (code == LE)
5750 /* This is destructive, so SUBTARGET can't be OP0. */
5751 if (rtx_equal_p (subtarget, op0))
5752 subtarget = 0;
5754 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5755 OPTAB_WIDEN);
5756 if (tem)
5757 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5758 OPTAB_WIDEN);
5761 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5762 number of bits in the mode of OP0, minus one. */
5764 if (code == GT)
5766 if (rtx_equal_p (subtarget, op0))
5767 subtarget = 0;
5769 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5770 GET_MODE_BITSIZE (mode) - 1,
5771 subtarget, 0);
5772 if (tem)
5773 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5774 OPTAB_WIDEN);
5777 if (code == EQ || code == NE)
5779 /* For EQ or NE, one way to do the comparison is to apply an operation
5780 that converts the operand into a positive number if it is nonzero
5781 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5782 for NE we negate. This puts the result in the sign bit. Then we
5783 normalize with a shift, if needed.
5785 Two operations that can do the above actions are ABS and FFS, so try
5786 them. If that doesn't work, and MODE is smaller than a full word,
5787 we can use zero-extension to the wider mode (an unsigned conversion)
5788 as the operation. */
5790 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5791 that is compensated by the subsequent overflow when subtracting
5792 one / negating. */
5794 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5795 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5796 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5797 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5798 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5800 tem = convert_modes (word_mode, mode, op0, 1);
5801 mode = word_mode;
5804 if (tem != 0)
5806 if (code == EQ)
5807 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5808 0, OPTAB_WIDEN);
5809 else
5810 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5813 /* If we couldn't do it that way, for NE we can "or" the two's complement
5814 of the value with itself. For EQ, we take the one's complement of
5815 that "or", which is an extra insn, so we only handle EQ if branches
5816 are expensive. */
5818 if (tem == 0
5819 && (code == NE
5820 || BRANCH_COST (optimize_insn_for_speed_p (),
5821 false) > 1))
5823 if (rtx_equal_p (subtarget, op0))
5824 subtarget = 0;
5826 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5827 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5828 OPTAB_WIDEN);
5830 if (tem && code == EQ)
5831 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5835 if (tem && normalizep)
5836 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5837 GET_MODE_BITSIZE (mode) - 1,
5838 subtarget, normalizep == 1);
5840 if (tem)
5842 if (!target)
5844 else if (GET_MODE (tem) != target_mode)
5846 convert_move (target, tem, 0);
5847 tem = target;
5849 else if (!subtarget)
5851 emit_move_insn (target, tem);
5852 tem = target;
5855 else
5856 delete_insns_since (last);
5858 return tem;
5861 /* Like emit_store_flag, but always succeeds. */
5864 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5865 machine_mode mode, int unsignedp, int normalizep)
5867 rtx tem;
5868 rtx_code_label *label;
5869 rtx trueval, falseval;
5871 /* First see if emit_store_flag can do the job. */
5872 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5873 if (tem != 0)
5874 return tem;
5876 if (!target)
5877 target = gen_reg_rtx (word_mode);
5879 /* If this failed, we have to do this with set/compare/jump/set code.
5880 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5881 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5882 if (code == NE
5883 && GET_MODE_CLASS (mode) == MODE_INT
5884 && REG_P (target)
5885 && op0 == target
5886 && op1 == const0_rtx)
5888 label = gen_label_rtx ();
5889 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5890 NULL_RTX, NULL, label, -1);
5891 emit_move_insn (target, trueval);
5892 emit_label (label);
5893 return target;
5896 if (!REG_P (target)
5897 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5898 target = gen_reg_rtx (GET_MODE (target));
5900 /* Jump in the right direction if the target cannot implement CODE
5901 but can jump on its reverse condition. */
5902 falseval = const0_rtx;
5903 if (! can_compare_p (code, mode, ccp_jump)
5904 && (! FLOAT_MODE_P (mode)
5905 || code == ORDERED || code == UNORDERED
5906 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5907 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5909 enum rtx_code rcode;
5910 if (FLOAT_MODE_P (mode))
5911 rcode = reverse_condition_maybe_unordered (code);
5912 else
5913 rcode = reverse_condition (code);
5915 /* Canonicalize to UNORDERED for the libcall. */
5916 if (can_compare_p (rcode, mode, ccp_jump)
5917 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5919 falseval = trueval;
5920 trueval = const0_rtx;
5921 code = rcode;
5925 emit_move_insn (target, trueval);
5926 label = gen_label_rtx ();
5927 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5928 label, -1);
5930 emit_move_insn (target, falseval);
5931 emit_label (label);
5933 return target;
5936 /* Perform possibly multi-word comparison and conditional jump to LABEL
5937 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5938 now a thin wrapper around do_compare_rtx_and_jump. */
5940 static void
5941 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5942 rtx_code_label *label)
5944 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5945 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5946 NULL, label, -1);