[19/77] Add a smallest_int_mode_for_size helper function
[official-gcc.git] / gcc / expmed.c
blobf7ac82145de3b5e3f4d8cd385b57bed88e5507f4
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2017 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "emit-rtl.h"
35 #include "diagnostic-core.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "dojump.h"
39 #include "explow.h"
40 #include "expr.h"
41 #include "langhooks.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 rtx, bool);
53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 rtx, bool);
56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT,
59 unsigned HOST_WIDE_INT,
60 rtx, bool);
61 static rtx extract_fixed_bit_field (machine_mode, rtx,
62 unsigned HOST_WIDE_INT,
63 unsigned HOST_WIDE_INT, rtx, int, bool);
64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
65 unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, rtx, int, bool);
67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
69 unsigned HOST_WIDE_INT, int, bool);
70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
75 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
76 The mask is truncated if necessary to the width of mode MODE. The
77 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
79 static inline rtx
80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
82 return immed_wide_int_const
83 (wi::shifted_mask (bitpos, bitsize, complement,
84 GET_MODE_PRECISION (mode)), mode);
87 /* Test whether a value is zero of a power of two. */
88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
89 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
91 struct init_expmed_rtl
93 rtx reg;
94 rtx plus;
95 rtx neg;
96 rtx mult;
97 rtx sdiv;
98 rtx udiv;
99 rtx sdiv_32;
100 rtx smod_32;
101 rtx wide_mult;
102 rtx wide_lshr;
103 rtx wide_trunc;
104 rtx shift;
105 rtx shift_mult;
106 rtx shift_add;
107 rtx shift_sub0;
108 rtx shift_sub1;
109 rtx zext;
110 rtx trunc;
112 rtx pow2[MAX_BITS_PER_WORD];
113 rtx cint[MAX_BITS_PER_WORD];
116 static void
117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
118 machine_mode from_mode, bool speed)
120 int to_size, from_size;
121 rtx which;
123 to_size = GET_MODE_PRECISION (to_mode);
124 from_size = GET_MODE_PRECISION (from_mode);
126 /* Most partial integers have a precision less than the "full"
127 integer it requires for storage. In case one doesn't, for
128 comparison purposes here, reduce the bit size by one in that
129 case. */
130 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
131 && pow2p_hwi (to_size))
132 to_size --;
133 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
134 && pow2p_hwi (from_size))
135 from_size --;
137 /* Assume cost of zero-extend and sign-extend is the same. */
138 which = (to_size < from_size ? all->trunc : all->zext);
140 PUT_MODE (all->reg, from_mode);
141 set_convert_cost (to_mode, from_mode, speed,
142 set_src_cost (which, to_mode, speed));
145 static void
146 init_expmed_one_mode (struct init_expmed_rtl *all,
147 machine_mode mode, int speed)
149 int m, n, mode_bitsize;
150 machine_mode mode_from;
152 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
154 PUT_MODE (all->reg, mode);
155 PUT_MODE (all->plus, mode);
156 PUT_MODE (all->neg, mode);
157 PUT_MODE (all->mult, mode);
158 PUT_MODE (all->sdiv, mode);
159 PUT_MODE (all->udiv, mode);
160 PUT_MODE (all->sdiv_32, mode);
161 PUT_MODE (all->smod_32, mode);
162 PUT_MODE (all->wide_trunc, mode);
163 PUT_MODE (all->shift, mode);
164 PUT_MODE (all->shift_mult, mode);
165 PUT_MODE (all->shift_add, mode);
166 PUT_MODE (all->shift_sub0, mode);
167 PUT_MODE (all->shift_sub1, mode);
168 PUT_MODE (all->zext, mode);
169 PUT_MODE (all->trunc, mode);
171 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
172 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
173 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
174 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
175 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
177 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
178 <= 2 * add_cost (speed, mode)));
179 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
180 <= 4 * add_cost (speed, mode)));
182 set_shift_cost (speed, mode, 0, 0);
184 int cost = add_cost (speed, mode);
185 set_shiftadd_cost (speed, mode, 0, cost);
186 set_shiftsub0_cost (speed, mode, 0, cost);
187 set_shiftsub1_cost (speed, mode, 0, cost);
190 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
191 for (m = 1; m < n; m++)
193 XEXP (all->shift, 1) = all->cint[m];
194 XEXP (all->shift_mult, 1) = all->pow2[m];
196 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
197 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
198 speed));
199 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
200 speed));
201 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
202 speed));
205 if (SCALAR_INT_MODE_P (mode))
207 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
208 mode_from = (machine_mode)(mode_from + 1))
209 init_expmed_one_conv (all, mode, mode_from, speed);
211 machine_mode wider_mode;
212 if (GET_MODE_CLASS (mode) == MODE_INT
213 && GET_MODE_WIDER_MODE (mode).exists (&wider_mode))
215 PUT_MODE (all->zext, wider_mode);
216 PUT_MODE (all->wide_mult, wider_mode);
217 PUT_MODE (all->wide_lshr, wider_mode);
218 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
220 set_mul_widen_cost (speed, wider_mode,
221 set_src_cost (all->wide_mult, wider_mode, speed));
222 set_mul_highpart_cost (speed, mode,
223 set_src_cost (all->wide_trunc, mode, speed));
228 void
229 init_expmed (void)
231 struct init_expmed_rtl all;
232 machine_mode mode = QImode;
233 int m, speed;
235 memset (&all, 0, sizeof all);
236 for (m = 1; m < MAX_BITS_PER_WORD; m++)
238 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
239 all.cint[m] = GEN_INT (m);
242 /* Avoid using hard regs in ways which may be unsupported. */
243 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
244 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
245 all.neg = gen_rtx_NEG (mode, all.reg);
246 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
247 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
248 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
249 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
250 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
251 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
252 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
253 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
254 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
255 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
256 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
257 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
258 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
259 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
260 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
262 for (speed = 0; speed < 2; speed++)
264 crtl->maybe_hot_insn_p = speed;
265 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
267 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
268 mode = (machine_mode)(mode + 1))
269 init_expmed_one_mode (&all, mode, speed);
271 if (MIN_MODE_PARTIAL_INT != VOIDmode)
272 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
273 mode = (machine_mode)(mode + 1))
274 init_expmed_one_mode (&all, mode, speed);
276 if (MIN_MODE_VECTOR_INT != VOIDmode)
277 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
278 mode = (machine_mode)(mode + 1))
279 init_expmed_one_mode (&all, mode, speed);
282 if (alg_hash_used_p ())
284 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
285 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
287 else
288 set_alg_hash_used_p (true);
289 default_rtl_profile ();
291 ggc_free (all.trunc);
292 ggc_free (all.shift_sub1);
293 ggc_free (all.shift_sub0);
294 ggc_free (all.shift_add);
295 ggc_free (all.shift_mult);
296 ggc_free (all.shift);
297 ggc_free (all.wide_trunc);
298 ggc_free (all.wide_lshr);
299 ggc_free (all.wide_mult);
300 ggc_free (all.zext);
301 ggc_free (all.smod_32);
302 ggc_free (all.sdiv_32);
303 ggc_free (all.udiv);
304 ggc_free (all.sdiv);
305 ggc_free (all.mult);
306 ggc_free (all.neg);
307 ggc_free (all.plus);
308 ggc_free (all.reg);
311 /* Return an rtx representing minus the value of X.
312 MODE is the intended mode of the result,
313 useful if X is a CONST_INT. */
316 negate_rtx (machine_mode mode, rtx x)
318 rtx result = simplify_unary_operation (NEG, mode, x, mode);
320 if (result == 0)
321 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
323 return result;
326 /* Whether reverse storage order is supported on the target. */
327 static int reverse_storage_order_supported = -1;
329 /* Check whether reverse storage order is supported on the target. */
331 static void
332 check_reverse_storage_order_support (void)
334 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
336 reverse_storage_order_supported = 0;
337 sorry ("reverse scalar storage order");
339 else
340 reverse_storage_order_supported = 1;
343 /* Whether reverse FP storage order is supported on the target. */
344 static int reverse_float_storage_order_supported = -1;
346 /* Check whether reverse FP storage order is supported on the target. */
348 static void
349 check_reverse_float_storage_order_support (void)
351 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
353 reverse_float_storage_order_supported = 0;
354 sorry ("reverse floating-point scalar storage order");
356 else
357 reverse_float_storage_order_supported = 1;
360 /* Return an rtx representing value of X with reverse storage order.
361 MODE is the intended mode of the result,
362 useful if X is a CONST_INT. */
365 flip_storage_order (machine_mode mode, rtx x)
367 scalar_int_mode int_mode;
368 rtx result;
370 if (mode == QImode)
371 return x;
373 if (COMPLEX_MODE_P (mode))
375 rtx real = read_complex_part (x, false);
376 rtx imag = read_complex_part (x, true);
378 real = flip_storage_order (GET_MODE_INNER (mode), real);
379 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
381 return gen_rtx_CONCAT (mode, real, imag);
384 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
385 check_reverse_storage_order_support ();
387 if (!is_a <scalar_int_mode> (mode, &int_mode))
389 if (FLOAT_MODE_P (mode)
390 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
391 check_reverse_float_storage_order_support ();
393 if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
395 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
396 return x;
398 x = gen_lowpart (int_mode, x);
401 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
402 if (result == 0)
403 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
405 if (int_mode != mode)
406 result = gen_lowpart (mode, result);
408 return result;
411 /* Adjust bitfield memory MEM so that it points to the first unit of mode
412 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
413 If MODE is BLKmode, return a reference to every byte in the bitfield.
414 Set *NEW_BITNUM to the bit position of the field within the new memory. */
416 static rtx
417 narrow_bit_field_mem (rtx mem, machine_mode mode,
418 unsigned HOST_WIDE_INT bitsize,
419 unsigned HOST_WIDE_INT bitnum,
420 unsigned HOST_WIDE_INT *new_bitnum)
422 if (mode == BLKmode)
424 *new_bitnum = bitnum % BITS_PER_UNIT;
425 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
426 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
427 / BITS_PER_UNIT);
428 return adjust_bitfield_address_size (mem, mode, offset, size);
430 else
432 unsigned int unit = GET_MODE_BITSIZE (mode);
433 *new_bitnum = bitnum % unit;
434 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
435 return adjust_bitfield_address (mem, mode, offset);
439 /* The caller wants to perform insertion or extraction PATTERN on a
440 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
441 BITREGION_START and BITREGION_END are as for store_bit_field
442 and FIELDMODE is the natural mode of the field.
444 Search for a mode that is compatible with the memory access
445 restrictions and (where applicable) with a register insertion or
446 extraction. Return the new memory on success, storing the adjusted
447 bit position in *NEW_BITNUM. Return null otherwise. */
449 static rtx
450 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
451 rtx op0, HOST_WIDE_INT bitsize,
452 HOST_WIDE_INT bitnum,
453 unsigned HOST_WIDE_INT bitregion_start,
454 unsigned HOST_WIDE_INT bitregion_end,
455 machine_mode fieldmode,
456 unsigned HOST_WIDE_INT *new_bitnum)
458 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
459 bitregion_end, MEM_ALIGN (op0),
460 MEM_VOLATILE_P (op0));
461 machine_mode best_mode;
462 if (iter.next_mode (&best_mode))
464 /* We can use a memory in BEST_MODE. See whether this is true for
465 any wider modes. All other things being equal, we prefer to
466 use the widest mode possible because it tends to expose more
467 CSE opportunities. */
468 if (!iter.prefer_smaller_modes ())
470 /* Limit the search to the mode required by the corresponding
471 register insertion or extraction instruction, if any. */
472 machine_mode limit_mode = word_mode;
473 extraction_insn insn;
474 if (get_best_reg_extraction_insn (&insn, pattern,
475 GET_MODE_BITSIZE (best_mode),
476 fieldmode))
477 limit_mode = insn.field_mode;
479 machine_mode wider_mode;
480 while (iter.next_mode (&wider_mode)
481 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
482 best_mode = wider_mode;
484 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
485 new_bitnum);
487 return NULL_RTX;
490 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
491 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
492 offset is then BITNUM / BITS_PER_UNIT. */
494 static bool
495 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
496 unsigned HOST_WIDE_INT bitsize,
497 machine_mode struct_mode)
499 if (BYTES_BIG_ENDIAN)
500 return (bitnum % BITS_PER_UNIT == 0
501 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
502 || (bitnum + bitsize) % BITS_PER_WORD == 0));
503 else
504 return bitnum % BITS_PER_WORD == 0;
507 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
508 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
509 Return false if the access would touch memory outside the range
510 BITREGION_START to BITREGION_END for conformance to the C++ memory
511 model. */
513 static bool
514 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
515 unsigned HOST_WIDE_INT bitnum,
516 machine_mode fieldmode,
517 unsigned HOST_WIDE_INT bitregion_start,
518 unsigned HOST_WIDE_INT bitregion_end)
520 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
522 /* -fstrict-volatile-bitfields must be enabled and we must have a
523 volatile MEM. */
524 if (!MEM_P (op0)
525 || !MEM_VOLATILE_P (op0)
526 || flag_strict_volatile_bitfields <= 0)
527 return false;
529 /* Non-integral modes likely only happen with packed structures.
530 Punt. */
531 if (!SCALAR_INT_MODE_P (fieldmode))
532 return false;
534 /* The bit size must not be larger than the field mode, and
535 the field mode must not be larger than a word. */
536 if (bitsize > modesize || modesize > BITS_PER_WORD)
537 return false;
539 /* Check for cases of unaligned fields that must be split. */
540 if (bitnum % modesize + bitsize > modesize)
541 return false;
543 /* The memory must be sufficiently aligned for a MODESIZE access.
544 This condition guarantees, that the memory access will not
545 touch anything after the end of the structure. */
546 if (MEM_ALIGN (op0) < modesize)
547 return false;
549 /* Check for cases where the C++ memory model applies. */
550 if (bitregion_end != 0
551 && (bitnum - bitnum % modesize < bitregion_start
552 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
553 return false;
555 return true;
558 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
559 bit number BITNUM can be treated as a simple value of mode MODE. */
561 static bool
562 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
563 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
565 return (MEM_P (op0)
566 && bitnum % BITS_PER_UNIT == 0
567 && bitsize == GET_MODE_BITSIZE (mode)
568 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
569 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
570 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
573 /* Try to use instruction INSV to store VALUE into a field of OP0.
574 BITSIZE and BITNUM are as for store_bit_field. */
576 static bool
577 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
578 unsigned HOST_WIDE_INT bitsize,
579 unsigned HOST_WIDE_INT bitnum,
580 rtx value)
582 struct expand_operand ops[4];
583 rtx value1;
584 rtx xop0 = op0;
585 rtx_insn *last = get_last_insn ();
586 bool copy_back = false;
588 machine_mode op_mode = insv->field_mode;
589 unsigned int unit = GET_MODE_BITSIZE (op_mode);
590 if (bitsize == 0 || bitsize > unit)
591 return false;
593 if (MEM_P (xop0))
594 /* Get a reference to the first byte of the field. */
595 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
596 &bitnum);
597 else
599 /* Convert from counting within OP0 to counting in OP_MODE. */
600 if (BYTES_BIG_ENDIAN)
601 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
603 /* If xop0 is a register, we need it in OP_MODE
604 to make it acceptable to the format of insv. */
605 if (GET_CODE (xop0) == SUBREG)
606 /* We can't just change the mode, because this might clobber op0,
607 and we will need the original value of op0 if insv fails. */
608 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
609 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
610 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
613 /* If the destination is a paradoxical subreg such that we need a
614 truncate to the inner mode, perform the insertion on a temporary and
615 truncate the result to the original destination. Note that we can't
616 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
617 X) 0)) is (reg:N X). */
618 if (GET_CODE (xop0) == SUBREG
619 && REG_P (SUBREG_REG (xop0))
620 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
621 op_mode))
623 rtx tem = gen_reg_rtx (op_mode);
624 emit_move_insn (tem, xop0);
625 xop0 = tem;
626 copy_back = true;
629 /* There are similar overflow check at the start of store_bit_field_1,
630 but that only check the situation where the field lies completely
631 outside the register, while there do have situation where the field
632 lies partialy in the register, we need to adjust bitsize for this
633 partial overflow situation. Without this fix, pr48335-2.c on big-endian
634 will broken on those arch support bit insert instruction, like arm, aarch64
635 etc. */
636 if (bitsize + bitnum > unit && bitnum < unit)
638 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
639 "destination object, data truncated into %wu-bit",
640 bitsize, unit - bitnum);
641 bitsize = unit - bitnum;
644 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
645 "backwards" from the size of the unit we are inserting into.
646 Otherwise, we count bits from the most significant on a
647 BYTES/BITS_BIG_ENDIAN machine. */
649 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
650 bitnum = unit - bitsize - bitnum;
652 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
653 value1 = value;
654 if (GET_MODE (value) != op_mode)
656 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
658 rtx tmp;
659 /* Optimization: Don't bother really extending VALUE
660 if it has all the bits we will actually use. However,
661 if we must narrow it, be sure we do it correctly. */
663 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
665 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
666 if (! tmp)
667 tmp = simplify_gen_subreg (op_mode,
668 force_reg (GET_MODE (value),
669 value1),
670 GET_MODE (value), 0);
672 else
674 tmp = gen_lowpart_if_possible (op_mode, value1);
675 if (! tmp)
676 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
677 value1));
679 value1 = tmp;
681 else if (CONST_INT_P (value))
682 value1 = gen_int_mode (INTVAL (value), op_mode);
683 else
684 /* Parse phase is supposed to make VALUE's data type
685 match that of the component reference, which is a type
686 at least as wide as the field; so VALUE should have
687 a mode that corresponds to that type. */
688 gcc_assert (CONSTANT_P (value));
691 create_fixed_operand (&ops[0], xop0);
692 create_integer_operand (&ops[1], bitsize);
693 create_integer_operand (&ops[2], bitnum);
694 create_input_operand (&ops[3], value1, op_mode);
695 if (maybe_expand_insn (insv->icode, 4, ops))
697 if (copy_back)
698 convert_move (op0, xop0, true);
699 return true;
701 delete_insns_since (last);
702 return false;
705 /* A subroutine of store_bit_field, with the same arguments. Return true
706 if the operation could be implemented.
708 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
709 no other way of implementing the operation. If FALLBACK_P is false,
710 return false instead. */
712 static bool
713 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
714 unsigned HOST_WIDE_INT bitnum,
715 unsigned HOST_WIDE_INT bitregion_start,
716 unsigned HOST_WIDE_INT bitregion_end,
717 machine_mode fieldmode,
718 rtx value, bool reverse, bool fallback_p)
720 rtx op0 = str_rtx;
721 rtx orig_value;
723 while (GET_CODE (op0) == SUBREG)
725 /* The following line once was done only if WORDS_BIG_ENDIAN,
726 but I think that is a mistake. WORDS_BIG_ENDIAN is
727 meaningful at a much higher level; when structures are copied
728 between memory and regs, the higher-numbered regs
729 always get higher addresses. */
730 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
731 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
732 int byte_offset = 0;
734 /* Paradoxical subregs need special handling on big-endian machines. */
735 if (paradoxical_subreg_p (op0))
737 int difference = inner_mode_size - outer_mode_size;
739 if (WORDS_BIG_ENDIAN)
740 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
741 if (BYTES_BIG_ENDIAN)
742 byte_offset += difference % UNITS_PER_WORD;
744 else
745 byte_offset = SUBREG_BYTE (op0);
747 bitnum += byte_offset * BITS_PER_UNIT;
748 op0 = SUBREG_REG (op0);
751 /* No action is needed if the target is a register and if the field
752 lies completely outside that register. This can occur if the source
753 code contains an out-of-bounds access to a small array. */
754 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
755 return true;
757 /* Use vec_set patterns for inserting parts of vectors whenever
758 available. */
759 if (VECTOR_MODE_P (GET_MODE (op0))
760 && !MEM_P (op0)
761 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
762 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
763 && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
764 && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
766 struct expand_operand ops[3];
767 machine_mode outermode = GET_MODE (op0);
768 machine_mode innermode = GET_MODE_INNER (outermode);
769 enum insn_code icode = optab_handler (vec_set_optab, outermode);
770 int pos = bitnum / GET_MODE_BITSIZE (innermode);
772 create_fixed_operand (&ops[0], op0);
773 create_input_operand (&ops[1], value, innermode);
774 create_integer_operand (&ops[2], pos);
775 if (maybe_expand_insn (icode, 3, ops))
776 return true;
779 /* If the target is a register, overwriting the entire object, or storing
780 a full-word or multi-word field can be done with just a SUBREG. */
781 if (!MEM_P (op0)
782 && bitsize == GET_MODE_BITSIZE (fieldmode)
783 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
784 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
786 /* Use the subreg machinery either to narrow OP0 to the required
787 words or to cope with mode punning between equal-sized modes.
788 In the latter case, use subreg on the rhs side, not lhs. */
789 rtx sub;
791 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
793 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
794 if (sub)
796 if (reverse)
797 sub = flip_storage_order (GET_MODE (op0), sub);
798 emit_move_insn (op0, sub);
799 return true;
802 else
804 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
805 bitnum / BITS_PER_UNIT);
806 if (sub)
808 if (reverse)
809 value = flip_storage_order (fieldmode, value);
810 emit_move_insn (sub, value);
811 return true;
816 /* If the target is memory, storing any naturally aligned field can be
817 done with a simple store. For targets that support fast unaligned
818 memory, any naturally sized, unit aligned field can be done directly. */
819 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
821 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
822 if (reverse)
823 value = flip_storage_order (fieldmode, value);
824 emit_move_insn (op0, value);
825 return true;
828 /* Make sure we are playing with integral modes. Pun with subregs
829 if we aren't. This must come after the entire register case above,
830 since that case is valid for any mode. The following cases are only
831 valid for integral modes. */
832 opt_scalar_int_mode opt_imode = int_mode_for_mode (GET_MODE (op0));
833 scalar_int_mode imode;
834 if (!opt_imode.exists (&imode) || imode != GET_MODE (op0))
836 if (MEM_P (op0))
837 op0 = adjust_bitfield_address_size (op0, opt_imode.else_blk (),
838 0, MEM_SIZE (op0));
839 else
840 op0 = gen_lowpart (op0_mode.require (), op0);
843 /* Storing an lsb-aligned field in a register
844 can be done with a movstrict instruction. */
846 if (!MEM_P (op0)
847 && !reverse
848 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
849 && bitsize == GET_MODE_BITSIZE (fieldmode)
850 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
852 struct expand_operand ops[2];
853 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
854 rtx arg0 = op0;
855 unsigned HOST_WIDE_INT subreg_off;
857 if (GET_CODE (arg0) == SUBREG)
859 /* Else we've got some float mode source being extracted into
860 a different float mode destination -- this combination of
861 subregs results in Severe Tire Damage. */
862 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
863 || GET_MODE_CLASS (fieldmode) == MODE_INT
864 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
865 arg0 = SUBREG_REG (arg0);
868 subreg_off = bitnum / BITS_PER_UNIT;
869 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
871 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
873 create_fixed_operand (&ops[0], arg0);
874 /* Shrink the source operand to FIELDMODE. */
875 create_convert_operand_to (&ops[1], value, fieldmode, false);
876 if (maybe_expand_insn (icode, 2, ops))
877 return true;
881 /* Handle fields bigger than a word. */
883 if (bitsize > BITS_PER_WORD)
885 /* Here we transfer the words of the field
886 in the order least significant first.
887 This is because the most significant word is the one which may
888 be less than full.
889 However, only do that if the value is not BLKmode. */
891 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
892 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
893 unsigned int i;
894 rtx_insn *last;
896 /* This is the mode we must force value to, so that there will be enough
897 subwords to extract. Note that fieldmode will often (always?) be
898 VOIDmode, because that is what store_field uses to indicate that this
899 is a bit field, but passing VOIDmode to operand_subword_force
900 is not allowed. */
901 fieldmode = GET_MODE (value);
902 if (fieldmode == VOIDmode)
903 fieldmode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
905 last = get_last_insn ();
906 for (i = 0; i < nwords; i++)
908 /* If I is 0, use the low-order word in both field and target;
909 if I is 1, use the next to lowest word; and so on. */
910 unsigned int wordnum = (backwards
911 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
912 - i - 1
913 : i);
914 unsigned int bit_offset = (backwards ^ reverse
915 ? MAX ((int) bitsize - ((int) i + 1)
916 * BITS_PER_WORD,
918 : (int) i * BITS_PER_WORD);
919 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
920 unsigned HOST_WIDE_INT new_bitsize =
921 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
923 /* If the remaining chunk doesn't have full wordsize we have
924 to make sure that for big-endian machines the higher order
925 bits are used. */
926 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
927 value_word = simplify_expand_binop (word_mode, lshr_optab,
928 value_word,
929 GEN_INT (BITS_PER_WORD
930 - new_bitsize),
931 NULL_RTX, true,
932 OPTAB_LIB_WIDEN);
934 if (!store_bit_field_1 (op0, new_bitsize,
935 bitnum + bit_offset,
936 bitregion_start, bitregion_end,
937 word_mode,
938 value_word, reverse, fallback_p))
940 delete_insns_since (last);
941 return false;
944 return true;
947 /* If VALUE has a floating-point or complex mode, access it as an
948 integer of the corresponding size. This can occur on a machine
949 with 64 bit registers that uses SFmode for float. It can also
950 occur for unaligned float or complex fields. */
951 orig_value = value;
952 if (GET_MODE (value) != VOIDmode
953 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
954 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
956 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)).require ());
957 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
960 /* If OP0 is a multi-word register, narrow it to the affected word.
961 If the region spans two words, defer to store_split_bit_field.
962 Don't do this if op0 is a single hard register wider than word
963 such as a float or vector register. */
964 if (!MEM_P (op0)
965 && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD
966 && (!REG_P (op0)
967 || !HARD_REGISTER_P (op0)
968 || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1))
970 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
972 if (!fallback_p)
973 return false;
975 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
976 bitregion_end, value, reverse);
977 return true;
979 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
980 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
981 gcc_assert (op0);
982 bitnum %= BITS_PER_WORD;
985 /* From here on we can assume that the field to be stored in fits
986 within a word. If the destination is a register, it too fits
987 in a word. */
989 extraction_insn insv;
990 if (!MEM_P (op0)
991 && !reverse
992 && get_best_reg_extraction_insn (&insv, EP_insv,
993 GET_MODE_BITSIZE (GET_MODE (op0)),
994 fieldmode)
995 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
996 return true;
998 /* If OP0 is a memory, try copying it to a register and seeing if a
999 cheap register alternative is available. */
1000 if (MEM_P (op0) && !reverse)
1002 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1003 fieldmode)
1004 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1005 return true;
1007 rtx_insn *last = get_last_insn ();
1009 /* Try loading part of OP0 into a register, inserting the bitfield
1010 into that, and then copying the result back to OP0. */
1011 unsigned HOST_WIDE_INT bitpos;
1012 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1013 bitregion_start, bitregion_end,
1014 fieldmode, &bitpos);
1015 if (xop0)
1017 rtx tempreg = copy_to_reg (xop0);
1018 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1019 bitregion_start, bitregion_end,
1020 fieldmode, orig_value, reverse, false))
1022 emit_move_insn (xop0, tempreg);
1023 return true;
1025 delete_insns_since (last);
1029 if (!fallback_p)
1030 return false;
1032 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1033 bitregion_end, value, reverse);
1034 return true;
1037 /* Generate code to store value from rtx VALUE
1038 into a bit-field within structure STR_RTX
1039 containing BITSIZE bits starting at bit BITNUM.
1041 BITREGION_START is bitpos of the first bitfield in this region.
1042 BITREGION_END is the bitpos of the ending bitfield in this region.
1043 These two fields are 0, if the C++ memory model does not apply,
1044 or we are not interested in keeping track of bitfield regions.
1046 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1048 If REVERSE is true, the store is to be done in reverse order. */
1050 void
1051 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1052 unsigned HOST_WIDE_INT bitnum,
1053 unsigned HOST_WIDE_INT bitregion_start,
1054 unsigned HOST_WIDE_INT bitregion_end,
1055 machine_mode fieldmode,
1056 rtx value, bool reverse)
1058 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1059 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1060 bitregion_start, bitregion_end))
1062 /* Storing of a full word can be done with a simple store.
1063 We know here that the field can be accessed with one single
1064 instruction. For targets that support unaligned memory,
1065 an unaligned access may be necessary. */
1066 if (bitsize == GET_MODE_BITSIZE (fieldmode))
1068 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1069 bitnum / BITS_PER_UNIT);
1070 if (reverse)
1071 value = flip_storage_order (fieldmode, value);
1072 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1073 emit_move_insn (str_rtx, value);
1075 else
1077 rtx temp;
1079 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1080 &bitnum);
1081 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1082 temp = copy_to_reg (str_rtx);
1083 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1084 fieldmode, value, reverse, true))
1085 gcc_unreachable ();
1087 emit_move_insn (str_rtx, temp);
1090 return;
1093 /* Under the C++0x memory model, we must not touch bits outside the
1094 bit region. Adjust the address to start at the beginning of the
1095 bit region. */
1096 if (MEM_P (str_rtx) && bitregion_start > 0)
1098 machine_mode bestmode;
1099 HOST_WIDE_INT offset, size;
1101 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1103 offset = bitregion_start / BITS_PER_UNIT;
1104 bitnum -= bitregion_start;
1105 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1106 bitregion_end -= bitregion_start;
1107 bitregion_start = 0;
1108 bestmode = get_best_mode (bitsize, bitnum,
1109 bitregion_start, bitregion_end,
1110 MEM_ALIGN (str_rtx), VOIDmode,
1111 MEM_VOLATILE_P (str_rtx));
1112 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1115 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1116 bitregion_start, bitregion_end,
1117 fieldmode, value, reverse, true))
1118 gcc_unreachable ();
1121 /* Use shifts and boolean operations to store VALUE into a bit field of
1122 width BITSIZE in OP0, starting at bit BITNUM.
1124 If REVERSE is true, the store is to be done in reverse order. */
1126 static void
1127 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1128 unsigned HOST_WIDE_INT bitnum,
1129 unsigned HOST_WIDE_INT bitregion_start,
1130 unsigned HOST_WIDE_INT bitregion_end,
1131 rtx value, bool reverse)
1133 /* There is a case not handled here:
1134 a structure with a known alignment of just a halfword
1135 and a field split across two aligned halfwords within the structure.
1136 Or likewise a structure with a known alignment of just a byte
1137 and a field split across two bytes.
1138 Such cases are not supposed to be able to occur. */
1140 if (MEM_P (op0))
1142 machine_mode mode = GET_MODE (op0);
1143 if (GET_MODE_BITSIZE (mode) == 0
1144 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1145 mode = word_mode;
1146 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1147 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1149 if (mode == VOIDmode)
1151 /* The only way this should occur is if the field spans word
1152 boundaries. */
1153 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1154 bitregion_end, value, reverse);
1155 return;
1158 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1161 store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1164 /* Helper function for store_fixed_bit_field, stores
1165 the bit field always using the MODE of OP0. */
1167 static void
1168 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169 unsigned HOST_WIDE_INT bitnum,
1170 rtx value, bool reverse)
1172 machine_mode mode;
1173 rtx temp;
1174 int all_zero = 0;
1175 int all_one = 0;
1177 mode = GET_MODE (op0);
1178 gcc_assert (SCALAR_INT_MODE_P (mode));
1180 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1183 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1184 /* BITNUM is the distance between our msb
1185 and that of the containing datum.
1186 Convert it to the distance from the lsb. */
1187 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1189 /* Now BITNUM is always the distance between our lsb
1190 and that of OP0. */
1192 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1193 we must first convert its mode to MODE. */
1195 if (CONST_INT_P (value))
1197 unsigned HOST_WIDE_INT v = UINTVAL (value);
1199 if (bitsize < HOST_BITS_PER_WIDE_INT)
1200 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1202 if (v == 0)
1203 all_zero = 1;
1204 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1205 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1206 || (bitsize == HOST_BITS_PER_WIDE_INT
1207 && v == HOST_WIDE_INT_M1U))
1208 all_one = 1;
1210 value = lshift_value (mode, v, bitnum);
1212 else
1214 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1215 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1217 if (GET_MODE (value) != mode)
1218 value = convert_to_mode (mode, value, 1);
1220 if (must_and)
1221 value = expand_binop (mode, and_optab, value,
1222 mask_rtx (mode, 0, bitsize, 0),
1223 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1224 if (bitnum > 0)
1225 value = expand_shift (LSHIFT_EXPR, mode, value,
1226 bitnum, NULL_RTX, 1);
1229 if (reverse)
1230 value = flip_storage_order (mode, value);
1232 /* Now clear the chosen bits in OP0,
1233 except that if VALUE is -1 we need not bother. */
1234 /* We keep the intermediates in registers to allow CSE to combine
1235 consecutive bitfield assignments. */
1237 temp = force_reg (mode, op0);
1239 if (! all_one)
1241 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1242 if (reverse)
1243 mask = flip_storage_order (mode, mask);
1244 temp = expand_binop (mode, and_optab, temp, mask,
1245 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1246 temp = force_reg (mode, temp);
1249 /* Now logical-or VALUE into OP0, unless it is zero. */
1251 if (! all_zero)
1253 temp = expand_binop (mode, ior_optab, temp, value,
1254 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1255 temp = force_reg (mode, temp);
1258 if (op0 != temp)
1260 op0 = copy_rtx (op0);
1261 emit_move_insn (op0, temp);
1265 /* Store a bit field that is split across multiple accessible memory objects.
1267 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1268 BITSIZE is the field width; BITPOS the position of its first bit
1269 (within the word).
1270 VALUE is the value to store.
1272 If REVERSE is true, the store is to be done in reverse order.
1274 This does not yet handle fields wider than BITS_PER_WORD. */
1276 static void
1277 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1278 unsigned HOST_WIDE_INT bitpos,
1279 unsigned HOST_WIDE_INT bitregion_start,
1280 unsigned HOST_WIDE_INT bitregion_end,
1281 rtx value, bool reverse)
1283 unsigned int unit, total_bits, bitsdone = 0;
1285 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1286 much at a time. */
1287 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1288 unit = BITS_PER_WORD;
1289 else
1290 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1292 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1294 again, and we will mutually recurse forever. */
1295 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1296 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1298 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1300 that VALUE might be a floating-point constant. */
1301 if (CONSTANT_P (value) && !CONST_INT_P (value))
1303 rtx word = gen_lowpart_common (word_mode, value);
1305 if (word && (value != word))
1306 value = word;
1307 else
1308 value = gen_lowpart_common (word_mode,
1309 force_reg (GET_MODE (value) != VOIDmode
1310 ? GET_MODE (value)
1311 : word_mode, value));
1314 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1316 while (bitsdone < bitsize)
1318 unsigned HOST_WIDE_INT thissize;
1319 unsigned HOST_WIDE_INT thispos;
1320 unsigned HOST_WIDE_INT offset;
1321 rtx part, word;
1323 offset = (bitpos + bitsdone) / unit;
1324 thispos = (bitpos + bitsdone) % unit;
1326 /* When region of bytes we can touch is restricted, decrease
1327 UNIT close to the end of the region as needed. If op0 is a REG
1328 or SUBREG of REG, don't do this, as there can't be data races
1329 on a register and we can expand shorter code in some cases. */
1330 if (bitregion_end
1331 && unit > BITS_PER_UNIT
1332 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1333 && !REG_P (op0)
1334 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1336 unit = unit / 2;
1337 continue;
1340 /* THISSIZE must not overrun a word boundary. Otherwise,
1341 store_fixed_bit_field will call us again, and we will mutually
1342 recurse forever. */
1343 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1344 thissize = MIN (thissize, unit - thispos);
1346 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1348 /* Fetch successively less significant portions. */
1349 if (CONST_INT_P (value))
1350 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1351 >> (bitsize - bitsdone - thissize))
1352 & ((HOST_WIDE_INT_1 << thissize) - 1));
1353 /* Likewise, but the source is little-endian. */
1354 else if (reverse)
1355 part = extract_fixed_bit_field (word_mode, value, thissize,
1356 bitsize - bitsdone - thissize,
1357 NULL_RTX, 1, false);
1358 else
1360 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1361 /* The args are chosen so that the last part includes the
1362 lsb. Give extract_bit_field the value it needs (with
1363 endianness compensation) to fetch the piece we want. */
1364 part = extract_fixed_bit_field (word_mode, value, thissize,
1365 total_bits - bitsize + bitsdone,
1366 NULL_RTX, 1, false);
1369 else
1371 /* Fetch successively more significant portions. */
1372 if (CONST_INT_P (value))
1373 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1374 >> bitsdone)
1375 & ((HOST_WIDE_INT_1 << thissize) - 1));
1376 /* Likewise, but the source is big-endian. */
1377 else if (reverse)
1378 part = extract_fixed_bit_field (word_mode, value, thissize,
1379 total_bits - bitsdone - thissize,
1380 NULL_RTX, 1, false);
1381 else
1382 part = extract_fixed_bit_field (word_mode, value, thissize,
1383 bitsdone, NULL_RTX, 1, false);
1386 /* If OP0 is a register, then handle OFFSET here. */
1387 if (SUBREG_P (op0) || REG_P (op0))
1389 machine_mode op0_mode = GET_MODE (op0);
1390 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1391 word = offset ? const0_rtx : op0;
1392 else
1393 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1394 GET_MODE (op0));
1395 offset &= BITS_PER_WORD / unit - 1;
1397 else
1398 word = op0;
1400 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1401 it is just an out-of-bounds access. Ignore it. */
1402 if (word != const0_rtx)
1403 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1404 bitregion_start, bitregion_end, part,
1405 reverse);
1406 bitsdone += thissize;
1410 /* A subroutine of extract_bit_field_1 that converts return value X
1411 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1412 to extract_bit_field. */
1414 static rtx
1415 convert_extracted_bit_field (rtx x, machine_mode mode,
1416 machine_mode tmode, bool unsignedp)
1418 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1419 return x;
1421 /* If the x mode is not a scalar integral, first convert to the
1422 integer mode of that size and then access it as a floating-point
1423 value via a SUBREG. */
1424 if (!SCALAR_INT_MODE_P (tmode))
1426 scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1427 x = convert_to_mode (int_mode, x, unsignedp);
1428 x = force_reg (int_mode, x);
1429 return gen_lowpart (tmode, x);
1432 return convert_to_mode (tmode, x, unsignedp);
1435 /* Try to use an ext(z)v pattern to extract a field from OP0.
1436 Return the extracted value on success, otherwise return null.
1437 EXT_MODE is the mode of the extraction and the other arguments
1438 are as for extract_bit_field. */
1440 static rtx
1441 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1442 unsigned HOST_WIDE_INT bitsize,
1443 unsigned HOST_WIDE_INT bitnum,
1444 int unsignedp, rtx target,
1445 machine_mode mode, machine_mode tmode)
1447 struct expand_operand ops[4];
1448 rtx spec_target = target;
1449 rtx spec_target_subreg = 0;
1450 machine_mode ext_mode = extv->field_mode;
1451 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1453 if (bitsize == 0 || unit < bitsize)
1454 return NULL_RTX;
1456 if (MEM_P (op0))
1457 /* Get a reference to the first byte of the field. */
1458 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1459 &bitnum);
1460 else
1462 /* Convert from counting within OP0 to counting in EXT_MODE. */
1463 if (BYTES_BIG_ENDIAN)
1464 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1466 /* If op0 is a register, we need it in EXT_MODE to make it
1467 acceptable to the format of ext(z)v. */
1468 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1469 return NULL_RTX;
1470 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1471 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1474 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1475 "backwards" from the size of the unit we are extracting from.
1476 Otherwise, we count bits from the most significant on a
1477 BYTES/BITS_BIG_ENDIAN machine. */
1479 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1480 bitnum = unit - bitsize - bitnum;
1482 if (target == 0)
1483 target = spec_target = gen_reg_rtx (tmode);
1485 if (GET_MODE (target) != ext_mode)
1487 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1488 between the mode of the extraction (word_mode) and the target
1489 mode. Instead, create a temporary and use convert_move to set
1490 the target. */
1491 if (REG_P (target)
1492 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1494 target = gen_lowpart (ext_mode, target);
1495 if (GET_MODE_PRECISION (ext_mode)
1496 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1497 spec_target_subreg = target;
1499 else
1500 target = gen_reg_rtx (ext_mode);
1503 create_output_operand (&ops[0], target, ext_mode);
1504 create_fixed_operand (&ops[1], op0);
1505 create_integer_operand (&ops[2], bitsize);
1506 create_integer_operand (&ops[3], bitnum);
1507 if (maybe_expand_insn (extv->icode, 4, ops))
1509 target = ops[0].value;
1510 if (target == spec_target)
1511 return target;
1512 if (target == spec_target_subreg)
1513 return spec_target;
1514 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1516 return NULL_RTX;
1519 /* A subroutine of extract_bit_field, with the same arguments.
1520 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1521 if we can find no other means of implementing the operation.
1522 if FALLBACK_P is false, return NULL instead. */
1524 static rtx
1525 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1526 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1527 machine_mode mode, machine_mode tmode,
1528 bool reverse, bool fallback_p, rtx *alt_rtl)
1530 rtx op0 = str_rtx;
1531 machine_mode mode1;
1533 if (tmode == VOIDmode)
1534 tmode = mode;
1536 while (GET_CODE (op0) == SUBREG)
1538 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1539 op0 = SUBREG_REG (op0);
1542 /* If we have an out-of-bounds access to a register, just return an
1543 uninitialized register of the required mode. This can occur if the
1544 source code contains an out-of-bounds access to a small array. */
1545 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1546 return gen_reg_rtx (tmode);
1548 if (REG_P (op0)
1549 && mode == GET_MODE (op0)
1550 && bitnum == 0
1551 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1553 if (reverse)
1554 op0 = flip_storage_order (mode, op0);
1555 /* We're trying to extract a full register from itself. */
1556 return op0;
1559 /* First try to check for vector from vector extractions. */
1560 if (VECTOR_MODE_P (GET_MODE (op0))
1561 && !MEM_P (op0)
1562 && VECTOR_MODE_P (tmode)
1563 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1565 machine_mode new_mode = GET_MODE (op0);
1566 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1568 new_mode = mode_for_vector (GET_MODE_INNER (tmode),
1569 GET_MODE_BITSIZE (GET_MODE (op0))
1570 / GET_MODE_UNIT_BITSIZE (tmode));
1571 if (!VECTOR_MODE_P (new_mode)
1572 || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1573 || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1574 || !targetm.vector_mode_supported_p (new_mode))
1575 new_mode = VOIDmode;
1577 if (new_mode != VOIDmode
1578 && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1579 != CODE_FOR_nothing)
1580 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1581 == bitnum / GET_MODE_BITSIZE (tmode)))
1583 struct expand_operand ops[3];
1584 machine_mode outermode = new_mode;
1585 machine_mode innermode = tmode;
1586 enum insn_code icode
1587 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1588 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1590 if (new_mode != GET_MODE (op0))
1591 op0 = gen_lowpart (new_mode, op0);
1592 create_output_operand (&ops[0], target, innermode);
1593 ops[0].target = 1;
1594 create_input_operand (&ops[1], op0, outermode);
1595 create_integer_operand (&ops[2], pos);
1596 if (maybe_expand_insn (icode, 3, ops))
1598 if (alt_rtl && ops[0].target)
1599 *alt_rtl = target;
1600 target = ops[0].value;
1601 if (GET_MODE (target) != mode)
1602 return gen_lowpart (tmode, target);
1603 return target;
1608 /* See if we can get a better vector mode before extracting. */
1609 if (VECTOR_MODE_P (GET_MODE (op0))
1610 && !MEM_P (op0)
1611 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1613 machine_mode new_mode;
1615 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1616 new_mode = MIN_MODE_VECTOR_FLOAT;
1617 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1618 new_mode = MIN_MODE_VECTOR_FRACT;
1619 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1620 new_mode = MIN_MODE_VECTOR_UFRACT;
1621 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1622 new_mode = MIN_MODE_VECTOR_ACCUM;
1623 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1624 new_mode = MIN_MODE_VECTOR_UACCUM;
1625 else
1626 new_mode = MIN_MODE_VECTOR_INT;
1628 FOR_EACH_MODE_FROM (new_mode, new_mode)
1629 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1630 && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1631 && targetm.vector_mode_supported_p (new_mode))
1632 break;
1633 if (new_mode != VOIDmode)
1634 op0 = gen_lowpart (new_mode, op0);
1637 /* Use vec_extract patterns for extracting parts of vectors whenever
1638 available. */
1639 if (VECTOR_MODE_P (GET_MODE (op0))
1640 && !MEM_P (op0)
1641 && (convert_optab_handler (vec_extract_optab, GET_MODE (op0),
1642 GET_MODE_INNER (GET_MODE (op0)))
1643 != CODE_FOR_nothing)
1644 && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1645 == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1647 struct expand_operand ops[3];
1648 machine_mode outermode = GET_MODE (op0);
1649 machine_mode innermode = GET_MODE_INNER (outermode);
1650 enum insn_code icode
1651 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1652 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1654 create_output_operand (&ops[0], target, innermode);
1655 ops[0].target = 1;
1656 create_input_operand (&ops[1], op0, outermode);
1657 create_integer_operand (&ops[2], pos);
1658 if (maybe_expand_insn (icode, 3, ops))
1660 if (alt_rtl && ops[0].target)
1661 *alt_rtl = target;
1662 target = ops[0].value;
1663 if (GET_MODE (target) != mode)
1664 return gen_lowpart (tmode, target);
1665 return target;
1669 /* Make sure we are playing with integral modes. Pun with subregs
1670 if we aren't. */
1671 opt_scalar_int_mode opt_imode = int_mode_for_mode (GET_MODE (op0));
1672 scalar_int_mode imode;
1673 if (!opt_imode.exists (&imode) || imode != GET_MODE (op0))
1675 if (MEM_P (op0))
1676 op0 = adjust_bitfield_address_size (op0, opt_imode.else_blk (),
1677 0, MEM_SIZE (op0));
1678 else if (opt_imode.exists (&imode))
1680 op0 = gen_lowpart (imode, op0);
1682 /* If we got a SUBREG, force it into a register since we
1683 aren't going to be able to do another SUBREG on it. */
1684 if (GET_CODE (op0) == SUBREG)
1685 op0 = force_reg (imode, op0);
1687 else
1689 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1690 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1691 emit_move_insn (mem, op0);
1692 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1696 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1697 If that's wrong, the solution is to test for it and set TARGET to 0
1698 if needed. */
1700 /* Get the mode of the field to use for atomic access or subreg
1701 conversion. */
1702 mode1 = mode;
1703 if (SCALAR_INT_MODE_P (tmode))
1705 machine_mode try_mode = mode_for_size (bitsize,
1706 GET_MODE_CLASS (tmode), 0);
1707 if (try_mode != BLKmode)
1708 mode1 = try_mode;
1710 gcc_assert (mode1 != BLKmode);
1712 /* Extraction of a full MODE1 value can be done with a subreg as long
1713 as the least significant bit of the value is the least significant
1714 bit of either OP0 or a word of OP0. */
1715 if (!MEM_P (op0)
1716 && !reverse
1717 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1718 && bitsize == GET_MODE_BITSIZE (mode1)
1719 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1721 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1722 bitnum / BITS_PER_UNIT);
1723 if (sub)
1724 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1727 /* Extraction of a full MODE1 value can be done with a load as long as
1728 the field is on a byte boundary and is sufficiently aligned. */
1729 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1731 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1732 if (reverse)
1733 op0 = flip_storage_order (mode1, op0);
1734 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1737 /* Handle fields bigger than a word. */
1739 if (bitsize > BITS_PER_WORD)
1741 /* Here we transfer the words of the field
1742 in the order least significant first.
1743 This is because the most significant word is the one which may
1744 be less than full. */
1746 const bool backwards = WORDS_BIG_ENDIAN;
1747 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1748 unsigned int i;
1749 rtx_insn *last;
1751 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1752 target = gen_reg_rtx (mode);
1754 /* In case we're about to clobber a base register or something
1755 (see gcc.c-torture/execute/20040625-1.c). */
1756 if (reg_mentioned_p (target, str_rtx))
1757 target = gen_reg_rtx (mode);
1759 /* Indicate for flow that the entire target reg is being set. */
1760 emit_clobber (target);
1762 last = get_last_insn ();
1763 for (i = 0; i < nwords; i++)
1765 /* If I is 0, use the low-order word in both field and target;
1766 if I is 1, use the next to lowest word; and so on. */
1767 /* Word number in TARGET to use. */
1768 unsigned int wordnum
1769 = (backwards
1770 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1771 : i);
1772 /* Offset from start of field in OP0. */
1773 unsigned int bit_offset = (backwards ^ reverse
1774 ? MAX ((int) bitsize - ((int) i + 1)
1775 * BITS_PER_WORD,
1777 : (int) i * BITS_PER_WORD);
1778 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1779 rtx result_part
1780 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1781 bitsize - i * BITS_PER_WORD),
1782 bitnum + bit_offset, 1, target_part,
1783 mode, word_mode, reverse, fallback_p, NULL);
1785 gcc_assert (target_part);
1786 if (!result_part)
1788 delete_insns_since (last);
1789 return NULL;
1792 if (result_part != target_part)
1793 emit_move_insn (target_part, result_part);
1796 if (unsignedp)
1798 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1799 need to be zero'd out. */
1800 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1802 unsigned int i, total_words;
1804 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1805 for (i = nwords; i < total_words; i++)
1806 emit_move_insn
1807 (operand_subword (target,
1808 backwards ? total_words - i - 1 : i,
1809 1, VOIDmode),
1810 const0_rtx);
1812 return target;
1815 /* Signed bit field: sign-extend with two arithmetic shifts. */
1816 target = expand_shift (LSHIFT_EXPR, mode, target,
1817 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1818 return expand_shift (RSHIFT_EXPR, mode, target,
1819 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1822 /* If OP0 is a multi-word register, narrow it to the affected word.
1823 If the region spans two words, defer to extract_split_bit_field. */
1824 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1826 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1828 if (!fallback_p)
1829 return NULL_RTX;
1830 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1831 reverse);
1832 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1834 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1835 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1836 bitnum %= BITS_PER_WORD;
1839 /* From here on we know the desired field is smaller than a word.
1840 If OP0 is a register, it too fits within a word. */
1841 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1842 extraction_insn extv;
1843 if (!MEM_P (op0)
1844 && !reverse
1845 /* ??? We could limit the structure size to the part of OP0 that
1846 contains the field, with appropriate checks for endianness
1847 and TRULY_NOOP_TRUNCATION. */
1848 && get_best_reg_extraction_insn (&extv, pattern,
1849 GET_MODE_BITSIZE (GET_MODE (op0)),
1850 tmode))
1852 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1853 unsignedp, target, mode,
1854 tmode);
1855 if (result)
1856 return result;
1859 /* If OP0 is a memory, try copying it to a register and seeing if a
1860 cheap register alternative is available. */
1861 if (MEM_P (op0) & !reverse)
1863 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1864 tmode))
1866 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1867 bitnum, unsignedp,
1868 target, mode,
1869 tmode);
1870 if (result)
1871 return result;
1874 rtx_insn *last = get_last_insn ();
1876 /* Try loading part of OP0 into a register and extracting the
1877 bitfield from that. */
1878 unsigned HOST_WIDE_INT bitpos;
1879 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1880 0, 0, tmode, &bitpos);
1881 if (xop0)
1883 xop0 = copy_to_reg (xop0);
1884 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1885 unsignedp, target,
1886 mode, tmode, reverse, false, NULL);
1887 if (result)
1888 return result;
1889 delete_insns_since (last);
1893 if (!fallback_p)
1894 return NULL;
1896 /* Find a correspondingly-sized integer field, so we can apply
1897 shifts and masks to it. */
1898 scalar_int_mode int_mode;
1899 if (!int_mode_for_mode (tmode).exists (&int_mode))
1900 /* If this fails, we should probably push op0 out to memory and then
1901 do a load. */
1902 int_mode = int_mode_for_mode (mode).require ();
1904 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1905 unsignedp, reverse);
1907 /* Complex values must be reversed piecewise, so we need to undo the global
1908 reversal, convert to the complex mode and reverse again. */
1909 if (reverse && COMPLEX_MODE_P (tmode))
1911 target = flip_storage_order (int_mode, target);
1912 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1913 target = flip_storage_order (tmode, target);
1915 else
1916 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1918 return target;
1921 /* Generate code to extract a byte-field from STR_RTX
1922 containing BITSIZE bits, starting at BITNUM,
1923 and put it in TARGET if possible (if TARGET is nonzero).
1924 Regardless of TARGET, we return the rtx for where the value is placed.
1926 STR_RTX is the structure containing the byte (a REG or MEM).
1927 UNSIGNEDP is nonzero if this is an unsigned bit field.
1928 MODE is the natural mode of the field value once extracted.
1929 TMODE is the mode the caller would like the value to have;
1930 but the value may be returned with type MODE instead.
1932 If REVERSE is true, the extraction is to be done in reverse order.
1934 If a TARGET is specified and we can store in it at no extra cost,
1935 we do so, and return TARGET.
1936 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1937 if they are equally easy. */
1940 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1941 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1942 machine_mode mode, machine_mode tmode, bool reverse,
1943 rtx *alt_rtl)
1945 machine_mode mode1;
1947 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1948 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1949 mode1 = GET_MODE (str_rtx);
1950 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1951 mode1 = GET_MODE (target);
1952 else
1953 mode1 = tmode;
1955 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1957 /* Extraction of a full MODE1 value can be done with a simple load.
1958 We know here that the field can be accessed with one single
1959 instruction. For targets that support unaligned memory,
1960 an unaligned access may be necessary. */
1961 if (bitsize == GET_MODE_BITSIZE (mode1))
1963 rtx result = adjust_bitfield_address (str_rtx, mode1,
1964 bitnum / BITS_PER_UNIT);
1965 if (reverse)
1966 result = flip_storage_order (mode1, result);
1967 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1968 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1971 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1972 &bitnum);
1973 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1974 str_rtx = copy_to_reg (str_rtx);
1977 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1978 target, mode, tmode, reverse, true, alt_rtl);
1981 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1982 from bit BITNUM of OP0.
1984 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1985 If REVERSE is true, the extraction is to be done in reverse order.
1987 If TARGET is nonzero, attempts to store the value there
1988 and return TARGET, but this is not guaranteed.
1989 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1991 static rtx
1992 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1993 unsigned HOST_WIDE_INT bitsize,
1994 unsigned HOST_WIDE_INT bitnum, rtx target,
1995 int unsignedp, bool reverse)
1997 if (MEM_P (op0))
1999 machine_mode mode
2000 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
2001 MEM_VOLATILE_P (op0));
2003 if (mode == VOIDmode)
2004 /* The only way this should occur is if the field spans word
2005 boundaries. */
2006 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
2007 reverse);
2009 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2012 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
2013 target, unsignedp, reverse);
2016 /* Helper function for extract_fixed_bit_field, extracts
2017 the bit field always using the MODE of OP0. */
2019 static rtx
2020 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
2021 unsigned HOST_WIDE_INT bitsize,
2022 unsigned HOST_WIDE_INT bitnum, rtx target,
2023 int unsignedp, bool reverse)
2025 machine_mode mode = GET_MODE (op0);
2026 gcc_assert (SCALAR_INT_MODE_P (mode));
2028 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2029 for invalid input, such as extract equivalent of f5 from
2030 gcc.dg/pr48335-2.c. */
2032 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2033 /* BITNUM is the distance between our msb and that of OP0.
2034 Convert it to the distance from the lsb. */
2035 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2037 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2038 We have reduced the big-endian case to the little-endian case. */
2039 if (reverse)
2040 op0 = flip_storage_order (mode, op0);
2042 if (unsignedp)
2044 if (bitnum)
2046 /* If the field does not already start at the lsb,
2047 shift it so it does. */
2048 /* Maybe propagate the target for the shift. */
2049 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2050 if (tmode != mode)
2051 subtarget = 0;
2052 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2054 /* Convert the value to the desired mode. */
2055 if (mode != tmode)
2056 op0 = convert_to_mode (tmode, op0, 1);
2058 /* Unless the msb of the field used to be the msb when we shifted,
2059 mask out the upper bits. */
2061 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2062 return expand_binop (GET_MODE (op0), and_optab, op0,
2063 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2064 target, 1, OPTAB_LIB_WIDEN);
2065 return op0;
2068 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2069 then arithmetic-shift its lsb to the lsb of the word. */
2070 op0 = force_reg (mode, op0);
2072 /* Find the narrowest integer mode that contains the field. */
2074 FOR_EACH_MODE_IN_CLASS (mode, MODE_INT)
2075 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2077 op0 = convert_to_mode (mode, op0, 0);
2078 break;
2081 if (mode != tmode)
2082 target = 0;
2084 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2086 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2087 /* Maybe propagate the target for the shift. */
2088 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2089 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2092 return expand_shift (RSHIFT_EXPR, mode, op0,
2093 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2096 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2097 VALUE << BITPOS. */
2099 static rtx
2100 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2101 int bitpos)
2103 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2106 /* Extract a bit field that is split across two words
2107 and return an RTX for the result.
2109 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2110 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2111 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2113 If REVERSE is true, the extraction is to be done in reverse order. */
2115 static rtx
2116 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2117 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2118 bool reverse)
2120 unsigned int unit;
2121 unsigned int bitsdone = 0;
2122 rtx result = NULL_RTX;
2123 int first = 1;
2125 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2126 much at a time. */
2127 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2128 unit = BITS_PER_WORD;
2129 else
2130 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2132 while (bitsdone < bitsize)
2134 unsigned HOST_WIDE_INT thissize;
2135 rtx part, word;
2136 unsigned HOST_WIDE_INT thispos;
2137 unsigned HOST_WIDE_INT offset;
2139 offset = (bitpos + bitsdone) / unit;
2140 thispos = (bitpos + bitsdone) % unit;
2142 /* THISSIZE must not overrun a word boundary. Otherwise,
2143 extract_fixed_bit_field will call us again, and we will mutually
2144 recurse forever. */
2145 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2146 thissize = MIN (thissize, unit - thispos);
2148 /* If OP0 is a register, then handle OFFSET here. */
2149 if (SUBREG_P (op0) || REG_P (op0))
2151 word = operand_subword_force (op0, offset, GET_MODE (op0));
2152 offset = 0;
2154 else
2155 word = op0;
2157 /* Extract the parts in bit-counting order,
2158 whose meaning is determined by BYTES_PER_UNIT.
2159 OFFSET is in UNITs, and UNIT is in bits. */
2160 part = extract_fixed_bit_field (word_mode, word, thissize,
2161 offset * unit + thispos, 0, 1, reverse);
2162 bitsdone += thissize;
2164 /* Shift this part into place for the result. */
2165 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2167 if (bitsize != bitsdone)
2168 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2169 bitsize - bitsdone, 0, 1);
2171 else
2173 if (bitsdone != thissize)
2174 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2175 bitsdone - thissize, 0, 1);
2178 if (first)
2179 result = part;
2180 else
2181 /* Combine the parts with bitwise or. This works
2182 because we extracted each part as an unsigned bit field. */
2183 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2184 OPTAB_LIB_WIDEN);
2186 first = 0;
2189 /* Unsigned bit field: we are done. */
2190 if (unsignedp)
2191 return result;
2192 /* Signed bit field: sign-extend with two arithmetic shifts. */
2193 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2194 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2195 return expand_shift (RSHIFT_EXPR, word_mode, result,
2196 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2199 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2200 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2201 MODE, fill the upper bits with zeros. Fail if the layout of either
2202 mode is unknown (as for CC modes) or if the extraction would involve
2203 unprofitable mode punning. Return the value on success, otherwise
2204 return null.
2206 This is different from gen_lowpart* in these respects:
2208 - the returned value must always be considered an rvalue
2210 - when MODE is wider than SRC_MODE, the extraction involves
2211 a zero extension
2213 - when MODE is smaller than SRC_MODE, the extraction involves
2214 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2216 In other words, this routine performs a computation, whereas the
2217 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2218 operations. */
2221 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2223 machine_mode int_mode, src_int_mode;
2225 if (mode == src_mode)
2226 return src;
2228 if (CONSTANT_P (src))
2230 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2231 fails, it will happily create (subreg (symbol_ref)) or similar
2232 invalid SUBREGs. */
2233 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2234 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2235 if (ret)
2236 return ret;
2238 if (GET_MODE (src) == VOIDmode
2239 || !validate_subreg (mode, src_mode, src, byte))
2240 return NULL_RTX;
2242 src = force_reg (GET_MODE (src), src);
2243 return gen_rtx_SUBREG (mode, src, byte);
2246 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2247 return NULL_RTX;
2249 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2250 && MODES_TIEABLE_P (mode, src_mode))
2252 rtx x = gen_lowpart_common (mode, src);
2253 if (x)
2254 return x;
2257 if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2258 || !int_mode_for_mode (mode).exists (&int_mode))
2259 return NULL_RTX;
2261 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2262 return NULL_RTX;
2263 if (!MODES_TIEABLE_P (int_mode, mode))
2264 return NULL_RTX;
2266 src = gen_lowpart (src_int_mode, src);
2267 src = convert_modes (int_mode, src_int_mode, src, true);
2268 src = gen_lowpart (mode, src);
2269 return src;
2272 /* Add INC into TARGET. */
2274 void
2275 expand_inc (rtx target, rtx inc)
2277 rtx value = expand_binop (GET_MODE (target), add_optab,
2278 target, inc,
2279 target, 0, OPTAB_LIB_WIDEN);
2280 if (value != target)
2281 emit_move_insn (target, value);
2284 /* Subtract DEC from TARGET. */
2286 void
2287 expand_dec (rtx target, rtx dec)
2289 rtx value = expand_binop (GET_MODE (target), sub_optab,
2290 target, dec,
2291 target, 0, OPTAB_LIB_WIDEN);
2292 if (value != target)
2293 emit_move_insn (target, value);
2296 /* Output a shift instruction for expression code CODE,
2297 with SHIFTED being the rtx for the value to shift,
2298 and AMOUNT the rtx for the amount to shift by.
2299 Store the result in the rtx TARGET, if that is convenient.
2300 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2301 Return the rtx for where the value is.
2302 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2303 in which case 0 is returned. */
2305 static rtx
2306 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2307 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2309 rtx op1, temp = 0;
2310 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2311 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2312 optab lshift_optab = ashl_optab;
2313 optab rshift_arith_optab = ashr_optab;
2314 optab rshift_uns_optab = lshr_optab;
2315 optab lrotate_optab = rotl_optab;
2316 optab rrotate_optab = rotr_optab;
2317 machine_mode op1_mode;
2318 machine_mode scalar_mode = mode;
2319 int attempt;
2320 bool speed = optimize_insn_for_speed_p ();
2322 if (VECTOR_MODE_P (mode))
2323 scalar_mode = GET_MODE_INNER (mode);
2324 op1 = amount;
2325 op1_mode = GET_MODE (op1);
2327 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2328 shift amount is a vector, use the vector/vector shift patterns. */
2329 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2331 lshift_optab = vashl_optab;
2332 rshift_arith_optab = vashr_optab;
2333 rshift_uns_optab = vlshr_optab;
2334 lrotate_optab = vrotl_optab;
2335 rrotate_optab = vrotr_optab;
2338 /* Previously detected shift-counts computed by NEGATE_EXPR
2339 and shifted in the other direction; but that does not work
2340 on all machines. */
2342 if (SHIFT_COUNT_TRUNCATED)
2344 if (CONST_INT_P (op1)
2345 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2346 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2347 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2348 % GET_MODE_BITSIZE (scalar_mode));
2349 else if (GET_CODE (op1) == SUBREG
2350 && subreg_lowpart_p (op1)
2351 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2352 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2353 op1 = SUBREG_REG (op1);
2356 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2357 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2358 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2359 amount instead. */
2360 if (rotate
2361 && CONST_INT_P (op1)
2362 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2363 GET_MODE_BITSIZE (scalar_mode) - 1))
2365 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2366 left = !left;
2367 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2370 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2371 Note that this is not the case for bigger values. For instance a rotation
2372 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2373 0x04030201 (bswapsi). */
2374 if (rotate
2375 && CONST_INT_P (op1)
2376 && INTVAL (op1) == BITS_PER_UNIT
2377 && GET_MODE_SIZE (scalar_mode) == 2
2378 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2379 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2380 unsignedp);
2382 if (op1 == const0_rtx)
2383 return shifted;
2385 /* Check whether its cheaper to implement a left shift by a constant
2386 bit count by a sequence of additions. */
2387 if (code == LSHIFT_EXPR
2388 && CONST_INT_P (op1)
2389 && INTVAL (op1) > 0
2390 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2391 && INTVAL (op1) < MAX_BITS_PER_WORD
2392 && (shift_cost (speed, mode, INTVAL (op1))
2393 > INTVAL (op1) * add_cost (speed, mode))
2394 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2396 int i;
2397 for (i = 0; i < INTVAL (op1); i++)
2399 temp = force_reg (mode, shifted);
2400 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2401 unsignedp, OPTAB_LIB_WIDEN);
2403 return shifted;
2406 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2408 enum optab_methods methods;
2410 if (attempt == 0)
2411 methods = OPTAB_DIRECT;
2412 else if (attempt == 1)
2413 methods = OPTAB_WIDEN;
2414 else
2415 methods = OPTAB_LIB_WIDEN;
2417 if (rotate)
2419 /* Widening does not work for rotation. */
2420 if (methods == OPTAB_WIDEN)
2421 continue;
2422 else if (methods == OPTAB_LIB_WIDEN)
2424 /* If we have been unable to open-code this by a rotation,
2425 do it as the IOR of two shifts. I.e., to rotate A
2426 by N bits, compute
2427 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2428 where C is the bitsize of A.
2430 It is theoretically possible that the target machine might
2431 not be able to perform either shift and hence we would
2432 be making two libcalls rather than just the one for the
2433 shift (similarly if IOR could not be done). We will allow
2434 this extremely unlikely lossage to avoid complicating the
2435 code below. */
2437 rtx subtarget = target == shifted ? 0 : target;
2438 rtx new_amount, other_amount;
2439 rtx temp1;
2441 new_amount = op1;
2442 if (op1 == const0_rtx)
2443 return shifted;
2444 else if (CONST_INT_P (op1))
2445 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2446 - INTVAL (op1));
2447 else
2449 other_amount
2450 = simplify_gen_unary (NEG, GET_MODE (op1),
2451 op1, GET_MODE (op1));
2452 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2453 other_amount
2454 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2455 gen_int_mode (mask, GET_MODE (op1)));
2458 shifted = force_reg (mode, shifted);
2460 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2461 mode, shifted, new_amount, 0, 1);
2462 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2463 mode, shifted, other_amount,
2464 subtarget, 1);
2465 return expand_binop (mode, ior_optab, temp, temp1, target,
2466 unsignedp, methods);
2469 temp = expand_binop (mode,
2470 left ? lrotate_optab : rrotate_optab,
2471 shifted, op1, target, unsignedp, methods);
2473 else if (unsignedp)
2474 temp = expand_binop (mode,
2475 left ? lshift_optab : rshift_uns_optab,
2476 shifted, op1, target, unsignedp, methods);
2478 /* Do arithmetic shifts.
2479 Also, if we are going to widen the operand, we can just as well
2480 use an arithmetic right-shift instead of a logical one. */
2481 if (temp == 0 && ! rotate
2482 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2484 enum optab_methods methods1 = methods;
2486 /* If trying to widen a log shift to an arithmetic shift,
2487 don't accept an arithmetic shift of the same size. */
2488 if (unsignedp)
2489 methods1 = OPTAB_MUST_WIDEN;
2491 /* Arithmetic shift */
2493 temp = expand_binop (mode,
2494 left ? lshift_optab : rshift_arith_optab,
2495 shifted, op1, target, unsignedp, methods1);
2498 /* We used to try extzv here for logical right shifts, but that was
2499 only useful for one machine, the VAX, and caused poor code
2500 generation there for lshrdi3, so the code was deleted and a
2501 define_expand for lshrsi3 was added to vax.md. */
2504 gcc_assert (temp != NULL_RTX || may_fail);
2505 return temp;
2508 /* Output a shift instruction for expression code CODE,
2509 with SHIFTED being the rtx for the value to shift,
2510 and AMOUNT the amount to shift by.
2511 Store the result in the rtx TARGET, if that is convenient.
2512 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2513 Return the rtx for where the value is. */
2516 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2517 int amount, rtx target, int unsignedp)
2519 return expand_shift_1 (code, mode,
2520 shifted, GEN_INT (amount), target, unsignedp);
2523 /* Likewise, but return 0 if that cannot be done. */
2525 static rtx
2526 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2527 int amount, rtx target, int unsignedp)
2529 return expand_shift_1 (code, mode,
2530 shifted, GEN_INT (amount), target, unsignedp, true);
2533 /* Output a shift instruction for expression code CODE,
2534 with SHIFTED being the rtx for the value to shift,
2535 and AMOUNT the tree for the amount to shift by.
2536 Store the result in the rtx TARGET, if that is convenient.
2537 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2538 Return the rtx for where the value is. */
2541 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2542 tree amount, rtx target, int unsignedp)
2544 return expand_shift_1 (code, mode,
2545 shifted, expand_normal (amount), target, unsignedp);
2549 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2550 const struct mult_cost *, machine_mode mode);
2551 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2552 const struct algorithm *, enum mult_variant);
2553 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2554 static rtx extract_high_half (machine_mode, rtx);
2555 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2556 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2557 int, int);
2558 /* Compute and return the best algorithm for multiplying by T.
2559 The algorithm must cost less than cost_limit
2560 If retval.cost >= COST_LIMIT, no algorithm was found and all
2561 other field of the returned struct are undefined.
2562 MODE is the machine mode of the multiplication. */
2564 static void
2565 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2566 const struct mult_cost *cost_limit, machine_mode mode)
2568 int m;
2569 struct algorithm *alg_in, *best_alg;
2570 struct mult_cost best_cost;
2571 struct mult_cost new_limit;
2572 int op_cost, op_latency;
2573 unsigned HOST_WIDE_INT orig_t = t;
2574 unsigned HOST_WIDE_INT q;
2575 int maxm, hash_index;
2576 bool cache_hit = false;
2577 enum alg_code cache_alg = alg_zero;
2578 bool speed = optimize_insn_for_speed_p ();
2579 machine_mode imode;
2580 struct alg_hash_entry *entry_ptr;
2582 /* Indicate that no algorithm is yet found. If no algorithm
2583 is found, this value will be returned and indicate failure. */
2584 alg_out->cost.cost = cost_limit->cost + 1;
2585 alg_out->cost.latency = cost_limit->latency + 1;
2587 if (cost_limit->cost < 0
2588 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2589 return;
2591 /* Be prepared for vector modes. */
2592 imode = GET_MODE_INNER (mode);
2594 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2596 /* Restrict the bits of "t" to the multiplication's mode. */
2597 t &= GET_MODE_MASK (imode);
2599 /* t == 1 can be done in zero cost. */
2600 if (t == 1)
2602 alg_out->ops = 1;
2603 alg_out->cost.cost = 0;
2604 alg_out->cost.latency = 0;
2605 alg_out->op[0] = alg_m;
2606 return;
2609 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2610 fail now. */
2611 if (t == 0)
2613 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2614 return;
2615 else
2617 alg_out->ops = 1;
2618 alg_out->cost.cost = zero_cost (speed);
2619 alg_out->cost.latency = zero_cost (speed);
2620 alg_out->op[0] = alg_zero;
2621 return;
2625 /* We'll be needing a couple extra algorithm structures now. */
2627 alg_in = XALLOCA (struct algorithm);
2628 best_alg = XALLOCA (struct algorithm);
2629 best_cost = *cost_limit;
2631 /* Compute the hash index. */
2632 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2634 /* See if we already know what to do for T. */
2635 entry_ptr = alg_hash_entry_ptr (hash_index);
2636 if (entry_ptr->t == t
2637 && entry_ptr->mode == mode
2638 && entry_ptr->speed == speed
2639 && entry_ptr->alg != alg_unknown)
2641 cache_alg = entry_ptr->alg;
2643 if (cache_alg == alg_impossible)
2645 /* The cache tells us that it's impossible to synthesize
2646 multiplication by T within entry_ptr->cost. */
2647 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2648 /* COST_LIMIT is at least as restrictive as the one
2649 recorded in the hash table, in which case we have no
2650 hope of synthesizing a multiplication. Just
2651 return. */
2652 return;
2654 /* If we get here, COST_LIMIT is less restrictive than the
2655 one recorded in the hash table, so we may be able to
2656 synthesize a multiplication. Proceed as if we didn't
2657 have the cache entry. */
2659 else
2661 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2662 /* The cached algorithm shows that this multiplication
2663 requires more cost than COST_LIMIT. Just return. This
2664 way, we don't clobber this cache entry with
2665 alg_impossible but retain useful information. */
2666 return;
2668 cache_hit = true;
2670 switch (cache_alg)
2672 case alg_shift:
2673 goto do_alg_shift;
2675 case alg_add_t_m2:
2676 case alg_sub_t_m2:
2677 goto do_alg_addsub_t_m2;
2679 case alg_add_factor:
2680 case alg_sub_factor:
2681 goto do_alg_addsub_factor;
2683 case alg_add_t2_m:
2684 goto do_alg_add_t2_m;
2686 case alg_sub_t2_m:
2687 goto do_alg_sub_t2_m;
2689 default:
2690 gcc_unreachable ();
2695 /* If we have a group of zero bits at the low-order part of T, try
2696 multiplying by the remaining bits and then doing a shift. */
2698 if ((t & 1) == 0)
2700 do_alg_shift:
2701 m = ctz_or_zero (t); /* m = number of low zero bits */
2702 if (m < maxm)
2704 q = t >> m;
2705 /* The function expand_shift will choose between a shift and
2706 a sequence of additions, so the observed cost is given as
2707 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2708 op_cost = m * add_cost (speed, mode);
2709 if (shift_cost (speed, mode, m) < op_cost)
2710 op_cost = shift_cost (speed, mode, m);
2711 new_limit.cost = best_cost.cost - op_cost;
2712 new_limit.latency = best_cost.latency - op_cost;
2713 synth_mult (alg_in, q, &new_limit, mode);
2715 alg_in->cost.cost += op_cost;
2716 alg_in->cost.latency += op_cost;
2717 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2719 best_cost = alg_in->cost;
2720 std::swap (alg_in, best_alg);
2721 best_alg->log[best_alg->ops] = m;
2722 best_alg->op[best_alg->ops] = alg_shift;
2725 /* See if treating ORIG_T as a signed number yields a better
2726 sequence. Try this sequence only for a negative ORIG_T
2727 as it would be useless for a non-negative ORIG_T. */
2728 if ((HOST_WIDE_INT) orig_t < 0)
2730 /* Shift ORIG_T as follows because a right shift of a
2731 negative-valued signed type is implementation
2732 defined. */
2733 q = ~(~orig_t >> m);
2734 /* The function expand_shift will choose between a shift
2735 and a sequence of additions, so the observed cost is
2736 given as MIN (m * add_cost(speed, mode),
2737 shift_cost(speed, mode, m)). */
2738 op_cost = m * add_cost (speed, mode);
2739 if (shift_cost (speed, mode, m) < op_cost)
2740 op_cost = shift_cost (speed, mode, m);
2741 new_limit.cost = best_cost.cost - op_cost;
2742 new_limit.latency = best_cost.latency - op_cost;
2743 synth_mult (alg_in, q, &new_limit, mode);
2745 alg_in->cost.cost += op_cost;
2746 alg_in->cost.latency += op_cost;
2747 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2749 best_cost = alg_in->cost;
2750 std::swap (alg_in, best_alg);
2751 best_alg->log[best_alg->ops] = m;
2752 best_alg->op[best_alg->ops] = alg_shift;
2756 if (cache_hit)
2757 goto done;
2760 /* If we have an odd number, add or subtract one. */
2761 if ((t & 1) != 0)
2763 unsigned HOST_WIDE_INT w;
2765 do_alg_addsub_t_m2:
2766 for (w = 1; (w & t) != 0; w <<= 1)
2768 /* If T was -1, then W will be zero after the loop. This is another
2769 case where T ends with ...111. Handling this with (T + 1) and
2770 subtract 1 produces slightly better code and results in algorithm
2771 selection much faster than treating it like the ...0111 case
2772 below. */
2773 if (w == 0
2774 || (w > 2
2775 /* Reject the case where t is 3.
2776 Thus we prefer addition in that case. */
2777 && t != 3))
2779 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2781 op_cost = add_cost (speed, mode);
2782 new_limit.cost = best_cost.cost - op_cost;
2783 new_limit.latency = best_cost.latency - op_cost;
2784 synth_mult (alg_in, t + 1, &new_limit, mode);
2786 alg_in->cost.cost += op_cost;
2787 alg_in->cost.latency += op_cost;
2788 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2790 best_cost = alg_in->cost;
2791 std::swap (alg_in, best_alg);
2792 best_alg->log[best_alg->ops] = 0;
2793 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2796 else
2798 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2800 op_cost = add_cost (speed, mode);
2801 new_limit.cost = best_cost.cost - op_cost;
2802 new_limit.latency = best_cost.latency - op_cost;
2803 synth_mult (alg_in, t - 1, &new_limit, mode);
2805 alg_in->cost.cost += op_cost;
2806 alg_in->cost.latency += op_cost;
2807 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2809 best_cost = alg_in->cost;
2810 std::swap (alg_in, best_alg);
2811 best_alg->log[best_alg->ops] = 0;
2812 best_alg->op[best_alg->ops] = alg_add_t_m2;
2816 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2817 quickly with a - a * n for some appropriate constant n. */
2818 m = exact_log2 (-orig_t + 1);
2819 if (m >= 0 && m < maxm)
2821 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2822 /* If the target has a cheap shift-and-subtract insn use
2823 that in preference to a shift insn followed by a sub insn.
2824 Assume that the shift-and-sub is "atomic" with a latency
2825 equal to it's cost, otherwise assume that on superscalar
2826 hardware the shift may be executed concurrently with the
2827 earlier steps in the algorithm. */
2828 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2830 op_cost = shiftsub1_cost (speed, mode, m);
2831 op_latency = op_cost;
2833 else
2834 op_latency = add_cost (speed, mode);
2836 new_limit.cost = best_cost.cost - op_cost;
2837 new_limit.latency = best_cost.latency - op_latency;
2838 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2839 &new_limit, mode);
2841 alg_in->cost.cost += op_cost;
2842 alg_in->cost.latency += op_latency;
2843 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2845 best_cost = alg_in->cost;
2846 std::swap (alg_in, best_alg);
2847 best_alg->log[best_alg->ops] = m;
2848 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2852 if (cache_hit)
2853 goto done;
2856 /* Look for factors of t of the form
2857 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2858 If we find such a factor, we can multiply by t using an algorithm that
2859 multiplies by q, shift the result by m and add/subtract it to itself.
2861 We search for large factors first and loop down, even if large factors
2862 are less probable than small; if we find a large factor we will find a
2863 good sequence quickly, and therefore be able to prune (by decreasing
2864 COST_LIMIT) the search. */
2866 do_alg_addsub_factor:
2867 for (m = floor_log2 (t - 1); m >= 2; m--)
2869 unsigned HOST_WIDE_INT d;
2871 d = (HOST_WIDE_INT_1U << m) + 1;
2872 if (t % d == 0 && t > d && m < maxm
2873 && (!cache_hit || cache_alg == alg_add_factor))
2875 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2876 if (shiftadd_cost (speed, mode, m) <= op_cost)
2877 op_cost = shiftadd_cost (speed, mode, m);
2879 op_latency = op_cost;
2882 new_limit.cost = best_cost.cost - op_cost;
2883 new_limit.latency = best_cost.latency - op_latency;
2884 synth_mult (alg_in, t / d, &new_limit, mode);
2886 alg_in->cost.cost += op_cost;
2887 alg_in->cost.latency += op_latency;
2888 if (alg_in->cost.latency < op_cost)
2889 alg_in->cost.latency = op_cost;
2890 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2892 best_cost = alg_in->cost;
2893 std::swap (alg_in, best_alg);
2894 best_alg->log[best_alg->ops] = m;
2895 best_alg->op[best_alg->ops] = alg_add_factor;
2897 /* Other factors will have been taken care of in the recursion. */
2898 break;
2901 d = (HOST_WIDE_INT_1U << m) - 1;
2902 if (t % d == 0 && t > d && m < maxm
2903 && (!cache_hit || cache_alg == alg_sub_factor))
2905 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2906 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2907 op_cost = shiftsub0_cost (speed, mode, m);
2909 op_latency = op_cost;
2911 new_limit.cost = best_cost.cost - op_cost;
2912 new_limit.latency = best_cost.latency - op_latency;
2913 synth_mult (alg_in, t / d, &new_limit, mode);
2915 alg_in->cost.cost += op_cost;
2916 alg_in->cost.latency += op_latency;
2917 if (alg_in->cost.latency < op_cost)
2918 alg_in->cost.latency = op_cost;
2919 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2921 best_cost = alg_in->cost;
2922 std::swap (alg_in, best_alg);
2923 best_alg->log[best_alg->ops] = m;
2924 best_alg->op[best_alg->ops] = alg_sub_factor;
2926 break;
2929 if (cache_hit)
2930 goto done;
2932 /* Try shift-and-add (load effective address) instructions,
2933 i.e. do a*3, a*5, a*9. */
2934 if ((t & 1) != 0)
2936 do_alg_add_t2_m:
2937 q = t - 1;
2938 m = ctz_hwi (q);
2939 if (q && m < maxm)
2941 op_cost = shiftadd_cost (speed, mode, m);
2942 new_limit.cost = best_cost.cost - op_cost;
2943 new_limit.latency = best_cost.latency - op_cost;
2944 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2946 alg_in->cost.cost += op_cost;
2947 alg_in->cost.latency += op_cost;
2948 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2950 best_cost = alg_in->cost;
2951 std::swap (alg_in, best_alg);
2952 best_alg->log[best_alg->ops] = m;
2953 best_alg->op[best_alg->ops] = alg_add_t2_m;
2956 if (cache_hit)
2957 goto done;
2959 do_alg_sub_t2_m:
2960 q = t + 1;
2961 m = ctz_hwi (q);
2962 if (q && m < maxm)
2964 op_cost = shiftsub0_cost (speed, mode, m);
2965 new_limit.cost = best_cost.cost - op_cost;
2966 new_limit.latency = best_cost.latency - op_cost;
2967 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2969 alg_in->cost.cost += op_cost;
2970 alg_in->cost.latency += op_cost;
2971 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2973 best_cost = alg_in->cost;
2974 std::swap (alg_in, best_alg);
2975 best_alg->log[best_alg->ops] = m;
2976 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2979 if (cache_hit)
2980 goto done;
2983 done:
2984 /* If best_cost has not decreased, we have not found any algorithm. */
2985 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2987 /* We failed to find an algorithm. Record alg_impossible for
2988 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2989 we are asked to find an algorithm for T within the same or
2990 lower COST_LIMIT, we can immediately return to the
2991 caller. */
2992 entry_ptr->t = t;
2993 entry_ptr->mode = mode;
2994 entry_ptr->speed = speed;
2995 entry_ptr->alg = alg_impossible;
2996 entry_ptr->cost = *cost_limit;
2997 return;
3000 /* Cache the result. */
3001 if (!cache_hit)
3003 entry_ptr->t = t;
3004 entry_ptr->mode = mode;
3005 entry_ptr->speed = speed;
3006 entry_ptr->alg = best_alg->op[best_alg->ops];
3007 entry_ptr->cost.cost = best_cost.cost;
3008 entry_ptr->cost.latency = best_cost.latency;
3011 /* If we are getting a too long sequence for `struct algorithm'
3012 to record, make this search fail. */
3013 if (best_alg->ops == MAX_BITS_PER_WORD)
3014 return;
3016 /* Copy the algorithm from temporary space to the space at alg_out.
3017 We avoid using structure assignment because the majority of
3018 best_alg is normally undefined, and this is a critical function. */
3019 alg_out->ops = best_alg->ops + 1;
3020 alg_out->cost = best_cost;
3021 memcpy (alg_out->op, best_alg->op,
3022 alg_out->ops * sizeof *alg_out->op);
3023 memcpy (alg_out->log, best_alg->log,
3024 alg_out->ops * sizeof *alg_out->log);
3027 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3028 Try three variations:
3030 - a shift/add sequence based on VAL itself
3031 - a shift/add sequence based on -VAL, followed by a negation
3032 - a shift/add sequence based on VAL - 1, followed by an addition.
3034 Return true if the cheapest of these cost less than MULT_COST,
3035 describing the algorithm in *ALG and final fixup in *VARIANT. */
3037 bool
3038 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3039 struct algorithm *alg, enum mult_variant *variant,
3040 int mult_cost)
3042 struct algorithm alg2;
3043 struct mult_cost limit;
3044 int op_cost;
3045 bool speed = optimize_insn_for_speed_p ();
3047 /* Fail quickly for impossible bounds. */
3048 if (mult_cost < 0)
3049 return false;
3051 /* Ensure that mult_cost provides a reasonable upper bound.
3052 Any constant multiplication can be performed with less
3053 than 2 * bits additions. */
3054 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3055 if (mult_cost > op_cost)
3056 mult_cost = op_cost;
3058 *variant = basic_variant;
3059 limit.cost = mult_cost;
3060 limit.latency = mult_cost;
3061 synth_mult (alg, val, &limit, mode);
3063 /* This works only if the inverted value actually fits in an
3064 `unsigned int' */
3065 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3067 op_cost = neg_cost (speed, mode);
3068 if (MULT_COST_LESS (&alg->cost, mult_cost))
3070 limit.cost = alg->cost.cost - op_cost;
3071 limit.latency = alg->cost.latency - op_cost;
3073 else
3075 limit.cost = mult_cost - op_cost;
3076 limit.latency = mult_cost - op_cost;
3079 synth_mult (&alg2, -val, &limit, mode);
3080 alg2.cost.cost += op_cost;
3081 alg2.cost.latency += op_cost;
3082 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3083 *alg = alg2, *variant = negate_variant;
3086 /* This proves very useful for division-by-constant. */
3087 op_cost = add_cost (speed, mode);
3088 if (MULT_COST_LESS (&alg->cost, mult_cost))
3090 limit.cost = alg->cost.cost - op_cost;
3091 limit.latency = alg->cost.latency - op_cost;
3093 else
3095 limit.cost = mult_cost - op_cost;
3096 limit.latency = mult_cost - op_cost;
3099 synth_mult (&alg2, val - 1, &limit, mode);
3100 alg2.cost.cost += op_cost;
3101 alg2.cost.latency += op_cost;
3102 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3103 *alg = alg2, *variant = add_variant;
3105 return MULT_COST_LESS (&alg->cost, mult_cost);
3108 /* A subroutine of expand_mult, used for constant multiplications.
3109 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3110 convenient. Use the shift/add sequence described by ALG and apply
3111 the final fixup specified by VARIANT. */
3113 static rtx
3114 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3115 rtx target, const struct algorithm *alg,
3116 enum mult_variant variant)
3118 unsigned HOST_WIDE_INT val_so_far;
3119 rtx_insn *insn;
3120 rtx accum, tem;
3121 int opno;
3122 machine_mode nmode;
3124 /* Avoid referencing memory over and over and invalid sharing
3125 on SUBREGs. */
3126 op0 = force_reg (mode, op0);
3128 /* ACCUM starts out either as OP0 or as a zero, depending on
3129 the first operation. */
3131 if (alg->op[0] == alg_zero)
3133 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3134 val_so_far = 0;
3136 else if (alg->op[0] == alg_m)
3138 accum = copy_to_mode_reg (mode, op0);
3139 val_so_far = 1;
3141 else
3142 gcc_unreachable ();
3144 for (opno = 1; opno < alg->ops; opno++)
3146 int log = alg->log[opno];
3147 rtx shift_subtarget = optimize ? 0 : accum;
3148 rtx add_target
3149 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3150 && !optimize)
3151 ? target : 0;
3152 rtx accum_target = optimize ? 0 : accum;
3153 rtx accum_inner;
3155 switch (alg->op[opno])
3157 case alg_shift:
3158 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3159 /* REG_EQUAL note will be attached to the following insn. */
3160 emit_move_insn (accum, tem);
3161 val_so_far <<= log;
3162 break;
3164 case alg_add_t_m2:
3165 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3166 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3167 add_target ? add_target : accum_target);
3168 val_so_far += HOST_WIDE_INT_1U << log;
3169 break;
3171 case alg_sub_t_m2:
3172 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3173 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3174 add_target ? add_target : accum_target);
3175 val_so_far -= HOST_WIDE_INT_1U << log;
3176 break;
3178 case alg_add_t2_m:
3179 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3180 log, shift_subtarget, 0);
3181 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3182 add_target ? add_target : accum_target);
3183 val_so_far = (val_so_far << log) + 1;
3184 break;
3186 case alg_sub_t2_m:
3187 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3188 log, shift_subtarget, 0);
3189 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3190 add_target ? add_target : accum_target);
3191 val_so_far = (val_so_far << log) - 1;
3192 break;
3194 case alg_add_factor:
3195 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3196 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3197 add_target ? add_target : accum_target);
3198 val_so_far += val_so_far << log;
3199 break;
3201 case alg_sub_factor:
3202 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3203 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3204 (add_target
3205 ? add_target : (optimize ? 0 : tem)));
3206 val_so_far = (val_so_far << log) - val_so_far;
3207 break;
3209 default:
3210 gcc_unreachable ();
3213 if (SCALAR_INT_MODE_P (mode))
3215 /* Write a REG_EQUAL note on the last insn so that we can cse
3216 multiplication sequences. Note that if ACCUM is a SUBREG,
3217 we've set the inner register and must properly indicate that. */
3218 tem = op0, nmode = mode;
3219 accum_inner = accum;
3220 if (GET_CODE (accum) == SUBREG)
3222 accum_inner = SUBREG_REG (accum);
3223 nmode = GET_MODE (accum_inner);
3224 tem = gen_lowpart (nmode, op0);
3227 insn = get_last_insn ();
3228 set_dst_reg_note (insn, REG_EQUAL,
3229 gen_rtx_MULT (nmode, tem,
3230 gen_int_mode (val_so_far, nmode)),
3231 accum_inner);
3235 if (variant == negate_variant)
3237 val_so_far = -val_so_far;
3238 accum = expand_unop (mode, neg_optab, accum, target, 0);
3240 else if (variant == add_variant)
3242 val_so_far = val_so_far + 1;
3243 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3246 /* Compare only the bits of val and val_so_far that are significant
3247 in the result mode, to avoid sign-/zero-extension confusion. */
3248 nmode = GET_MODE_INNER (mode);
3249 val &= GET_MODE_MASK (nmode);
3250 val_so_far &= GET_MODE_MASK (nmode);
3251 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3253 return accum;
3256 /* Perform a multiplication and return an rtx for the result.
3257 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3258 TARGET is a suggestion for where to store the result (an rtx).
3260 We check specially for a constant integer as OP1.
3261 If you want this check for OP0 as well, then before calling
3262 you should swap the two operands if OP0 would be constant. */
3265 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3266 int unsignedp)
3268 enum mult_variant variant;
3269 struct algorithm algorithm;
3270 rtx scalar_op1;
3271 int max_cost;
3272 bool speed = optimize_insn_for_speed_p ();
3273 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3275 if (CONSTANT_P (op0))
3276 std::swap (op0, op1);
3278 /* For vectors, there are several simplifications that can be made if
3279 all elements of the vector constant are identical. */
3280 scalar_op1 = unwrap_const_vec_duplicate (op1);
3282 if (INTEGRAL_MODE_P (mode))
3284 rtx fake_reg;
3285 HOST_WIDE_INT coeff;
3286 bool is_neg;
3287 int mode_bitsize;
3289 if (op1 == CONST0_RTX (mode))
3290 return op1;
3291 if (op1 == CONST1_RTX (mode))
3292 return op0;
3293 if (op1 == CONSTM1_RTX (mode))
3294 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3295 op0, target, 0);
3297 if (do_trapv)
3298 goto skip_synth;
3300 /* If mode is integer vector mode, check if the backend supports
3301 vector lshift (by scalar or vector) at all. If not, we can't use
3302 synthetized multiply. */
3303 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3304 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3305 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3306 goto skip_synth;
3308 /* These are the operations that are potentially turned into
3309 a sequence of shifts and additions. */
3310 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3312 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3313 less than or equal in size to `unsigned int' this doesn't matter.
3314 If the mode is larger than `unsigned int', then synth_mult works
3315 only if the constant value exactly fits in an `unsigned int' without
3316 any truncation. This means that multiplying by negative values does
3317 not work; results are off by 2^32 on a 32 bit machine. */
3318 if (CONST_INT_P (scalar_op1))
3320 coeff = INTVAL (scalar_op1);
3321 is_neg = coeff < 0;
3323 #if TARGET_SUPPORTS_WIDE_INT
3324 else if (CONST_WIDE_INT_P (scalar_op1))
3325 #else
3326 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3327 #endif
3329 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3330 /* Perfect power of 2 (other than 1, which is handled above). */
3331 if (shift > 0)
3332 return expand_shift (LSHIFT_EXPR, mode, op0,
3333 shift, target, unsignedp);
3334 else
3335 goto skip_synth;
3337 else
3338 goto skip_synth;
3340 /* We used to test optimize here, on the grounds that it's better to
3341 produce a smaller program when -O is not used. But this causes
3342 such a terrible slowdown sometimes that it seems better to always
3343 use synth_mult. */
3345 /* Special case powers of two. */
3346 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3347 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3348 return expand_shift (LSHIFT_EXPR, mode, op0,
3349 floor_log2 (coeff), target, unsignedp);
3351 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3353 /* Attempt to handle multiplication of DImode values by negative
3354 coefficients, by performing the multiplication by a positive
3355 multiplier and then inverting the result. */
3356 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3358 /* Its safe to use -coeff even for INT_MIN, as the
3359 result is interpreted as an unsigned coefficient.
3360 Exclude cost of op0 from max_cost to match the cost
3361 calculation of the synth_mult. */
3362 coeff = -(unsigned HOST_WIDE_INT) coeff;
3363 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3364 mode, speed)
3365 - neg_cost (speed, mode));
3366 if (max_cost <= 0)
3367 goto skip_synth;
3369 /* Special case powers of two. */
3370 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3372 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3373 floor_log2 (coeff), target, unsignedp);
3374 return expand_unop (mode, neg_optab, temp, target, 0);
3377 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3378 max_cost))
3380 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3381 &algorithm, variant);
3382 return expand_unop (mode, neg_optab, temp, target, 0);
3384 goto skip_synth;
3387 /* Exclude cost of op0 from max_cost to match the cost
3388 calculation of the synth_mult. */
3389 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3390 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3391 return expand_mult_const (mode, op0, coeff, target,
3392 &algorithm, variant);
3394 skip_synth:
3396 /* Expand x*2.0 as x+x. */
3397 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3398 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3400 op0 = force_reg (GET_MODE (op0), op0);
3401 return expand_binop (mode, add_optab, op0, op0,
3402 target, unsignedp, OPTAB_LIB_WIDEN);
3405 /* This used to use umul_optab if unsigned, but for non-widening multiply
3406 there is no difference between signed and unsigned. */
3407 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3408 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3409 gcc_assert (op0);
3410 return op0;
3413 /* Return a cost estimate for multiplying a register by the given
3414 COEFFicient in the given MODE and SPEED. */
3417 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3419 int max_cost;
3420 struct algorithm algorithm;
3421 enum mult_variant variant;
3423 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3424 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3425 mode, speed);
3426 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3427 return algorithm.cost.cost;
3428 else
3429 return max_cost;
3432 /* Perform a widening multiplication and return an rtx for the result.
3433 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3434 TARGET is a suggestion for where to store the result (an rtx).
3435 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3436 or smul_widen_optab.
3438 We check specially for a constant integer as OP1, comparing the
3439 cost of a widening multiply against the cost of a sequence of shifts
3440 and adds. */
3443 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3444 int unsignedp, optab this_optab)
3446 bool speed = optimize_insn_for_speed_p ();
3447 rtx cop1;
3449 if (CONST_INT_P (op1)
3450 && GET_MODE (op0) != VOIDmode
3451 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3452 this_optab == umul_widen_optab))
3453 && CONST_INT_P (cop1)
3454 && (INTVAL (cop1) >= 0
3455 || HWI_COMPUTABLE_MODE_P (mode)))
3457 HOST_WIDE_INT coeff = INTVAL (cop1);
3458 int max_cost;
3459 enum mult_variant variant;
3460 struct algorithm algorithm;
3462 if (coeff == 0)
3463 return CONST0_RTX (mode);
3465 /* Special case powers of two. */
3466 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3468 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3469 return expand_shift (LSHIFT_EXPR, mode, op0,
3470 floor_log2 (coeff), target, unsignedp);
3473 /* Exclude cost of op0 from max_cost to match the cost
3474 calculation of the synth_mult. */
3475 max_cost = mul_widen_cost (speed, mode);
3476 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3477 max_cost))
3479 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3480 return expand_mult_const (mode, op0, coeff, target,
3481 &algorithm, variant);
3484 return expand_binop (mode, this_optab, op0, op1, target,
3485 unsignedp, OPTAB_LIB_WIDEN);
3488 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3489 replace division by D, and put the least significant N bits of the result
3490 in *MULTIPLIER_PTR and return the most significant bit.
3492 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3493 needed precision is in PRECISION (should be <= N).
3495 PRECISION should be as small as possible so this function can choose
3496 multiplier more freely.
3498 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3499 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3501 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3502 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3504 unsigned HOST_WIDE_INT
3505 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3506 unsigned HOST_WIDE_INT *multiplier_ptr,
3507 int *post_shift_ptr, int *lgup_ptr)
3509 int lgup, post_shift;
3510 int pow, pow2;
3512 /* lgup = ceil(log2(divisor)); */
3513 lgup = ceil_log2 (d);
3515 gcc_assert (lgup <= n);
3517 pow = n + lgup;
3518 pow2 = n + lgup - precision;
3520 /* mlow = 2^(N + lgup)/d */
3521 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3522 wide_int mlow = wi::udiv_trunc (val, d);
3524 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3525 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3526 wide_int mhigh = wi::udiv_trunc (val, d);
3528 /* If precision == N, then mlow, mhigh exceed 2^N
3529 (but they do not exceed 2^(N+1)). */
3531 /* Reduce to lowest terms. */
3532 for (post_shift = lgup; post_shift > 0; post_shift--)
3534 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3535 HOST_BITS_PER_WIDE_INT);
3536 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3537 HOST_BITS_PER_WIDE_INT);
3538 if (ml_lo >= mh_lo)
3539 break;
3541 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3542 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3545 *post_shift_ptr = post_shift;
3546 *lgup_ptr = lgup;
3547 if (n < HOST_BITS_PER_WIDE_INT)
3549 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3550 *multiplier_ptr = mhigh.to_uhwi () & mask;
3551 return mhigh.to_uhwi () >= mask;
3553 else
3555 *multiplier_ptr = mhigh.to_uhwi ();
3556 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3560 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3561 congruent to 1 (mod 2**N). */
3563 static unsigned HOST_WIDE_INT
3564 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3566 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3568 /* The algorithm notes that the choice y = x satisfies
3569 x*y == 1 mod 2^3, since x is assumed odd.
3570 Each iteration doubles the number of bits of significance in y. */
3572 unsigned HOST_WIDE_INT mask;
3573 unsigned HOST_WIDE_INT y = x;
3574 int nbit = 3;
3576 mask = (n == HOST_BITS_PER_WIDE_INT
3577 ? HOST_WIDE_INT_M1U
3578 : (HOST_WIDE_INT_1U << n) - 1);
3580 while (nbit < n)
3582 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3583 nbit *= 2;
3585 return y;
3588 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3589 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3590 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3591 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3592 become signed.
3594 The result is put in TARGET if that is convenient.
3596 MODE is the mode of operation. */
3599 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3600 rtx op1, rtx target, int unsignedp)
3602 rtx tem;
3603 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3605 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3606 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3607 tem = expand_and (mode, tem, op1, NULL_RTX);
3608 adj_operand
3609 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3610 adj_operand);
3612 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3613 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3614 tem = expand_and (mode, tem, op0, NULL_RTX);
3615 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3616 target);
3618 return target;
3621 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3623 static rtx
3624 extract_high_half (machine_mode mode, rtx op)
3626 machine_mode wider_mode;
3628 if (mode == word_mode)
3629 return gen_highpart (mode, op);
3631 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3633 wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3634 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3635 GET_MODE_BITSIZE (mode), 0, 1);
3636 return convert_modes (mode, wider_mode, op, 0);
3639 /* Like expmed_mult_highpart, but only consider using a multiplication
3640 optab. OP1 is an rtx for the constant operand. */
3642 static rtx
3643 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3644 rtx target, int unsignedp, int max_cost)
3646 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3647 machine_mode wider_mode;
3648 optab moptab;
3649 rtx tem;
3650 int size;
3651 bool speed = optimize_insn_for_speed_p ();
3653 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3655 wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3656 size = GET_MODE_BITSIZE (mode);
3658 /* Firstly, try using a multiplication insn that only generates the needed
3659 high part of the product, and in the sign flavor of unsignedp. */
3660 if (mul_highpart_cost (speed, mode) < max_cost)
3662 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3663 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3664 unsignedp, OPTAB_DIRECT);
3665 if (tem)
3666 return tem;
3669 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3670 Need to adjust the result after the multiplication. */
3671 if (size - 1 < BITS_PER_WORD
3672 && (mul_highpart_cost (speed, mode)
3673 + 2 * shift_cost (speed, mode, size-1)
3674 + 4 * add_cost (speed, mode) < max_cost))
3676 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3677 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3678 unsignedp, OPTAB_DIRECT);
3679 if (tem)
3680 /* We used the wrong signedness. Adjust the result. */
3681 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3682 tem, unsignedp);
3685 /* Try widening multiplication. */
3686 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3687 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3688 && mul_widen_cost (speed, wider_mode) < max_cost)
3690 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3691 unsignedp, OPTAB_WIDEN);
3692 if (tem)
3693 return extract_high_half (mode, tem);
3696 /* Try widening the mode and perform a non-widening multiplication. */
3697 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3698 && size - 1 < BITS_PER_WORD
3699 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3700 < max_cost))
3702 rtx_insn *insns;
3703 rtx wop0, wop1;
3705 /* We need to widen the operands, for example to ensure the
3706 constant multiplier is correctly sign or zero extended.
3707 Use a sequence to clean-up any instructions emitted by
3708 the conversions if things don't work out. */
3709 start_sequence ();
3710 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3711 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3712 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3713 unsignedp, OPTAB_WIDEN);
3714 insns = get_insns ();
3715 end_sequence ();
3717 if (tem)
3719 emit_insn (insns);
3720 return extract_high_half (mode, tem);
3724 /* Try widening multiplication of opposite signedness, and adjust. */
3725 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3726 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3727 && size - 1 < BITS_PER_WORD
3728 && (mul_widen_cost (speed, wider_mode)
3729 + 2 * shift_cost (speed, mode, size-1)
3730 + 4 * add_cost (speed, mode) < max_cost))
3732 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3733 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3734 if (tem != 0)
3736 tem = extract_high_half (mode, tem);
3737 /* We used the wrong signedness. Adjust the result. */
3738 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3739 target, unsignedp);
3743 return 0;
3746 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3747 putting the high half of the result in TARGET if that is convenient,
3748 and return where the result is. If the operation can not be performed,
3749 0 is returned.
3751 MODE is the mode of operation and result.
3753 UNSIGNEDP nonzero means unsigned multiply.
3755 MAX_COST is the total allowed cost for the expanded RTL. */
3757 static rtx
3758 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3759 rtx target, int unsignedp, int max_cost)
3761 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3762 unsigned HOST_WIDE_INT cnst1;
3763 int extra_cost;
3764 bool sign_adjust = false;
3765 enum mult_variant variant;
3766 struct algorithm alg;
3767 rtx tem;
3768 bool speed = optimize_insn_for_speed_p ();
3770 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3771 /* We can't support modes wider than HOST_BITS_PER_INT. */
3772 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3774 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3776 /* We can't optimize modes wider than BITS_PER_WORD.
3777 ??? We might be able to perform double-word arithmetic if
3778 mode == word_mode, however all the cost calculations in
3779 synth_mult etc. assume single-word operations. */
3780 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3781 return expmed_mult_highpart_optab (mode, op0, op1, target,
3782 unsignedp, max_cost);
3784 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3786 /* Check whether we try to multiply by a negative constant. */
3787 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3789 sign_adjust = true;
3790 extra_cost += add_cost (speed, mode);
3793 /* See whether shift/add multiplication is cheap enough. */
3794 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3795 max_cost - extra_cost))
3797 /* See whether the specialized multiplication optabs are
3798 cheaper than the shift/add version. */
3799 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3800 alg.cost.cost + extra_cost);
3801 if (tem)
3802 return tem;
3804 tem = convert_to_mode (wider_mode, op0, unsignedp);
3805 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3806 tem = extract_high_half (mode, tem);
3808 /* Adjust result for signedness. */
3809 if (sign_adjust)
3810 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3812 return tem;
3814 return expmed_mult_highpart_optab (mode, op0, op1, target,
3815 unsignedp, max_cost);
3819 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3821 static rtx
3822 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3824 rtx result, temp, shift;
3825 rtx_code_label *label;
3826 int logd;
3827 int prec = GET_MODE_PRECISION (mode);
3829 logd = floor_log2 (d);
3830 result = gen_reg_rtx (mode);
3832 /* Avoid conditional branches when they're expensive. */
3833 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3834 && optimize_insn_for_speed_p ())
3836 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3837 mode, 0, -1);
3838 if (signmask)
3840 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3841 signmask = force_reg (mode, signmask);
3842 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3844 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3845 which instruction sequence to use. If logical right shifts
3846 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3847 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3849 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3850 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3851 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3852 > COSTS_N_INSNS (2)))
3854 temp = expand_binop (mode, xor_optab, op0, signmask,
3855 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3856 temp = expand_binop (mode, sub_optab, temp, signmask,
3857 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3858 temp = expand_binop (mode, and_optab, temp,
3859 gen_int_mode (masklow, mode),
3860 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3861 temp = expand_binop (mode, xor_optab, temp, signmask,
3862 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3863 temp = expand_binop (mode, sub_optab, temp, signmask,
3864 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3866 else
3868 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3869 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3870 signmask = force_reg (mode, signmask);
3872 temp = expand_binop (mode, add_optab, op0, signmask,
3873 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3874 temp = expand_binop (mode, and_optab, temp,
3875 gen_int_mode (masklow, mode),
3876 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3877 temp = expand_binop (mode, sub_optab, temp, signmask,
3878 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3880 return temp;
3884 /* Mask contains the mode's signbit and the significant bits of the
3885 modulus. By including the signbit in the operation, many targets
3886 can avoid an explicit compare operation in the following comparison
3887 against zero. */
3888 wide_int mask = wi::mask (logd, false, prec);
3889 mask = wi::set_bit (mask, prec - 1);
3891 temp = expand_binop (mode, and_optab, op0,
3892 immed_wide_int_const (mask, mode),
3893 result, 1, OPTAB_LIB_WIDEN);
3894 if (temp != result)
3895 emit_move_insn (result, temp);
3897 label = gen_label_rtx ();
3898 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3900 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3901 0, OPTAB_LIB_WIDEN);
3903 mask = wi::mask (logd, true, prec);
3904 temp = expand_binop (mode, ior_optab, temp,
3905 immed_wide_int_const (mask, mode),
3906 result, 1, OPTAB_LIB_WIDEN);
3907 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3908 0, OPTAB_LIB_WIDEN);
3909 if (temp != result)
3910 emit_move_insn (result, temp);
3911 emit_label (label);
3912 return result;
3915 /* Expand signed division of OP0 by a power of two D in mode MODE.
3916 This routine is only called for positive values of D. */
3918 static rtx
3919 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3921 rtx temp;
3922 rtx_code_label *label;
3923 int logd;
3925 logd = floor_log2 (d);
3927 if (d == 2
3928 && BRANCH_COST (optimize_insn_for_speed_p (),
3929 false) >= 1)
3931 temp = gen_reg_rtx (mode);
3932 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3933 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3934 0, OPTAB_LIB_WIDEN);
3935 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3938 if (HAVE_conditional_move
3939 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3941 rtx temp2;
3943 start_sequence ();
3944 temp2 = copy_to_mode_reg (mode, op0);
3945 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3946 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3947 temp = force_reg (mode, temp);
3949 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3950 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3951 mode, temp, temp2, mode, 0);
3952 if (temp2)
3954 rtx_insn *seq = get_insns ();
3955 end_sequence ();
3956 emit_insn (seq);
3957 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3959 end_sequence ();
3962 if (BRANCH_COST (optimize_insn_for_speed_p (),
3963 false) >= 2)
3965 int ushift = GET_MODE_BITSIZE (mode) - logd;
3967 temp = gen_reg_rtx (mode);
3968 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3969 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3970 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3971 > COSTS_N_INSNS (1))
3972 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3973 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3974 else
3975 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3976 ushift, NULL_RTX, 1);
3977 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3978 0, OPTAB_LIB_WIDEN);
3979 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3982 label = gen_label_rtx ();
3983 temp = copy_to_mode_reg (mode, op0);
3984 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3985 expand_inc (temp, gen_int_mode (d - 1, mode));
3986 emit_label (label);
3987 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3990 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3991 if that is convenient, and returning where the result is.
3992 You may request either the quotient or the remainder as the result;
3993 specify REM_FLAG nonzero to get the remainder.
3995 CODE is the expression code for which kind of division this is;
3996 it controls how rounding is done. MODE is the machine mode to use.
3997 UNSIGNEDP nonzero means do unsigned division. */
3999 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4000 and then correct it by or'ing in missing high bits
4001 if result of ANDI is nonzero.
4002 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4003 This could optimize to a bfexts instruction.
4004 But C doesn't use these operations, so their optimizations are
4005 left for later. */
4006 /* ??? For modulo, we don't actually need the highpart of the first product,
4007 the low part will do nicely. And for small divisors, the second multiply
4008 can also be a low-part only multiply or even be completely left out.
4009 E.g. to calculate the remainder of a division by 3 with a 32 bit
4010 multiply, multiply with 0x55555556 and extract the upper two bits;
4011 the result is exact for inputs up to 0x1fffffff.
4012 The input range can be reduced by using cross-sum rules.
4013 For odd divisors >= 3, the following table gives right shift counts
4014 so that if a number is shifted by an integer multiple of the given
4015 amount, the remainder stays the same:
4016 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4017 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4018 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4019 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4020 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4022 Cross-sum rules for even numbers can be derived by leaving as many bits
4023 to the right alone as the divisor has zeros to the right.
4024 E.g. if x is an unsigned 32 bit number:
4025 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4029 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4030 rtx op0, rtx op1, rtx target, int unsignedp)
4032 machine_mode compute_mode;
4033 rtx tquotient;
4034 rtx quotient = 0, remainder = 0;
4035 rtx_insn *last;
4036 int size;
4037 rtx_insn *insn;
4038 optab optab1, optab2;
4039 int op1_is_constant, op1_is_pow2 = 0;
4040 int max_cost, extra_cost;
4041 static HOST_WIDE_INT last_div_const = 0;
4042 bool speed = optimize_insn_for_speed_p ();
4044 op1_is_constant = CONST_INT_P (op1);
4045 if (op1_is_constant)
4047 wide_int ext_op1 = rtx_mode_t (op1, mode);
4048 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4049 || (! unsignedp
4050 && wi::popcount (wi::neg (ext_op1)) == 1));
4054 This is the structure of expand_divmod:
4056 First comes code to fix up the operands so we can perform the operations
4057 correctly and efficiently.
4059 Second comes a switch statement with code specific for each rounding mode.
4060 For some special operands this code emits all RTL for the desired
4061 operation, for other cases, it generates only a quotient and stores it in
4062 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4063 to indicate that it has not done anything.
4065 Last comes code that finishes the operation. If QUOTIENT is set and
4066 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4067 QUOTIENT is not set, it is computed using trunc rounding.
4069 We try to generate special code for division and remainder when OP1 is a
4070 constant. If |OP1| = 2**n we can use shifts and some other fast
4071 operations. For other values of OP1, we compute a carefully selected
4072 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4073 by m.
4075 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4076 half of the product. Different strategies for generating the product are
4077 implemented in expmed_mult_highpart.
4079 If what we actually want is the remainder, we generate that by another
4080 by-constant multiplication and a subtraction. */
4082 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4083 code below will malfunction if we are, so check here and handle
4084 the special case if so. */
4085 if (op1 == const1_rtx)
4086 return rem_flag ? const0_rtx : op0;
4088 /* When dividing by -1, we could get an overflow.
4089 negv_optab can handle overflows. */
4090 if (! unsignedp && op1 == constm1_rtx)
4092 if (rem_flag)
4093 return const0_rtx;
4094 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4095 ? negv_optab : neg_optab, op0, target, 0);
4098 if (target
4099 /* Don't use the function value register as a target
4100 since we have to read it as well as write it,
4101 and function-inlining gets confused by this. */
4102 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4103 /* Don't clobber an operand while doing a multi-step calculation. */
4104 || ((rem_flag || op1_is_constant)
4105 && (reg_mentioned_p (target, op0)
4106 || (MEM_P (op0) && MEM_P (target))))
4107 || reg_mentioned_p (target, op1)
4108 || (MEM_P (op1) && MEM_P (target))))
4109 target = 0;
4111 /* Get the mode in which to perform this computation. Normally it will
4112 be MODE, but sometimes we can't do the desired operation in MODE.
4113 If so, pick a wider mode in which we can do the operation. Convert
4114 to that mode at the start to avoid repeated conversions.
4116 First see what operations we need. These depend on the expression
4117 we are evaluating. (We assume that divxx3 insns exist under the
4118 same conditions that modxx3 insns and that these insns don't normally
4119 fail. If these assumptions are not correct, we may generate less
4120 efficient code in some cases.)
4122 Then see if we find a mode in which we can open-code that operation
4123 (either a division, modulus, or shift). Finally, check for the smallest
4124 mode for which we can do the operation with a library call. */
4126 /* We might want to refine this now that we have division-by-constant
4127 optimization. Since expmed_mult_highpart tries so many variants, it is
4128 not straightforward to generalize this. Maybe we should make an array
4129 of possible modes in init_expmed? Save this for GCC 2.7. */
4131 optab1 = (op1_is_pow2
4132 ? (unsignedp ? lshr_optab : ashr_optab)
4133 : (unsignedp ? udiv_optab : sdiv_optab));
4134 optab2 = (op1_is_pow2 ? optab1
4135 : (unsignedp ? udivmod_optab : sdivmod_optab));
4137 FOR_EACH_MODE_FROM (compute_mode, mode)
4138 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4139 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4140 break;
4142 if (compute_mode == VOIDmode)
4143 FOR_EACH_MODE_FROM (compute_mode, mode)
4144 if (optab_libfunc (optab1, compute_mode)
4145 || optab_libfunc (optab2, compute_mode))
4146 break;
4148 /* If we still couldn't find a mode, use MODE, but expand_binop will
4149 probably die. */
4150 if (compute_mode == VOIDmode)
4151 compute_mode = mode;
4153 if (target && GET_MODE (target) == compute_mode)
4154 tquotient = target;
4155 else
4156 tquotient = gen_reg_rtx (compute_mode);
4158 size = GET_MODE_BITSIZE (compute_mode);
4159 #if 0
4160 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4161 (mode), and thereby get better code when OP1 is a constant. Do that
4162 later. It will require going over all usages of SIZE below. */
4163 size = GET_MODE_BITSIZE (mode);
4164 #endif
4166 /* Only deduct something for a REM if the last divide done was
4167 for a different constant. Then set the constant of the last
4168 divide. */
4169 max_cost = (unsignedp
4170 ? udiv_cost (speed, compute_mode)
4171 : sdiv_cost (speed, compute_mode));
4172 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4173 && INTVAL (op1) == last_div_const))
4174 max_cost -= (mul_cost (speed, compute_mode)
4175 + add_cost (speed, compute_mode));
4177 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4179 /* Now convert to the best mode to use. */
4180 if (compute_mode != mode)
4182 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4183 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4185 /* convert_modes may have placed op1 into a register, so we
4186 must recompute the following. */
4187 op1_is_constant = CONST_INT_P (op1);
4188 if (op1_is_constant)
4190 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4191 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4192 || (! unsignedp
4193 && wi::popcount (wi::neg (ext_op1)) == 1));
4195 else
4196 op1_is_pow2 = 0;
4199 /* If one of the operands is a volatile MEM, copy it into a register. */
4201 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4202 op0 = force_reg (compute_mode, op0);
4203 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4204 op1 = force_reg (compute_mode, op1);
4206 /* If we need the remainder or if OP1 is constant, we need to
4207 put OP0 in a register in case it has any queued subexpressions. */
4208 if (rem_flag || op1_is_constant)
4209 op0 = force_reg (compute_mode, op0);
4211 last = get_last_insn ();
4213 /* Promote floor rounding to trunc rounding for unsigned operations. */
4214 if (unsignedp)
4216 if (code == FLOOR_DIV_EXPR)
4217 code = TRUNC_DIV_EXPR;
4218 if (code == FLOOR_MOD_EXPR)
4219 code = TRUNC_MOD_EXPR;
4220 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4221 code = TRUNC_DIV_EXPR;
4224 if (op1 != const0_rtx)
4225 switch (code)
4227 case TRUNC_MOD_EXPR:
4228 case TRUNC_DIV_EXPR:
4229 if (op1_is_constant)
4231 if (unsignedp)
4233 unsigned HOST_WIDE_INT mh, ml;
4234 int pre_shift, post_shift;
4235 int dummy;
4236 wide_int wd = rtx_mode_t (op1, compute_mode);
4237 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4239 if (wi::popcount (wd) == 1)
4241 pre_shift = floor_log2 (d);
4242 if (rem_flag)
4244 unsigned HOST_WIDE_INT mask
4245 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4246 remainder
4247 = expand_binop (compute_mode, and_optab, op0,
4248 gen_int_mode (mask, compute_mode),
4249 remainder, 1,
4250 OPTAB_LIB_WIDEN);
4251 if (remainder)
4252 return gen_lowpart (mode, remainder);
4254 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4255 pre_shift, tquotient, 1);
4257 else if (size <= HOST_BITS_PER_WIDE_INT)
4259 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4261 /* Most significant bit of divisor is set; emit an scc
4262 insn. */
4263 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4264 compute_mode, 1, 1);
4266 else
4268 /* Find a suitable multiplier and right shift count
4269 instead of multiplying with D. */
4271 mh = choose_multiplier (d, size, size,
4272 &ml, &post_shift, &dummy);
4274 /* If the suggested multiplier is more than SIZE bits,
4275 we can do better for even divisors, using an
4276 initial right shift. */
4277 if (mh != 0 && (d & 1) == 0)
4279 pre_shift = ctz_or_zero (d);
4280 mh = choose_multiplier (d >> pre_shift, size,
4281 size - pre_shift,
4282 &ml, &post_shift, &dummy);
4283 gcc_assert (!mh);
4285 else
4286 pre_shift = 0;
4288 if (mh != 0)
4290 rtx t1, t2, t3, t4;
4292 if (post_shift - 1 >= BITS_PER_WORD)
4293 goto fail1;
4295 extra_cost
4296 = (shift_cost (speed, compute_mode, post_shift - 1)
4297 + shift_cost (speed, compute_mode, 1)
4298 + 2 * add_cost (speed, compute_mode));
4299 t1 = expmed_mult_highpart
4300 (compute_mode, op0,
4301 gen_int_mode (ml, compute_mode),
4302 NULL_RTX, 1, max_cost - extra_cost);
4303 if (t1 == 0)
4304 goto fail1;
4305 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4306 op0, t1),
4307 NULL_RTX);
4308 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4309 t2, 1, NULL_RTX, 1);
4310 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4311 t1, t3),
4312 NULL_RTX);
4313 quotient = expand_shift
4314 (RSHIFT_EXPR, compute_mode, t4,
4315 post_shift - 1, tquotient, 1);
4317 else
4319 rtx t1, t2;
4321 if (pre_shift >= BITS_PER_WORD
4322 || post_shift >= BITS_PER_WORD)
4323 goto fail1;
4325 t1 = expand_shift
4326 (RSHIFT_EXPR, compute_mode, op0,
4327 pre_shift, NULL_RTX, 1);
4328 extra_cost
4329 = (shift_cost (speed, compute_mode, pre_shift)
4330 + shift_cost (speed, compute_mode, post_shift));
4331 t2 = expmed_mult_highpart
4332 (compute_mode, t1,
4333 gen_int_mode (ml, compute_mode),
4334 NULL_RTX, 1, max_cost - extra_cost);
4335 if (t2 == 0)
4336 goto fail1;
4337 quotient = expand_shift
4338 (RSHIFT_EXPR, compute_mode, t2,
4339 post_shift, tquotient, 1);
4343 else /* Too wide mode to use tricky code */
4344 break;
4346 insn = get_last_insn ();
4347 if (insn != last)
4348 set_dst_reg_note (insn, REG_EQUAL,
4349 gen_rtx_UDIV (compute_mode, op0, op1),
4350 quotient);
4352 else /* TRUNC_DIV, signed */
4354 unsigned HOST_WIDE_INT ml;
4355 int lgup, post_shift;
4356 rtx mlr;
4357 HOST_WIDE_INT d = INTVAL (op1);
4358 unsigned HOST_WIDE_INT abs_d;
4360 /* Since d might be INT_MIN, we have to cast to
4361 unsigned HOST_WIDE_INT before negating to avoid
4362 undefined signed overflow. */
4363 abs_d = (d >= 0
4364 ? (unsigned HOST_WIDE_INT) d
4365 : - (unsigned HOST_WIDE_INT) d);
4367 /* n rem d = n rem -d */
4368 if (rem_flag && d < 0)
4370 d = abs_d;
4371 op1 = gen_int_mode (abs_d, compute_mode);
4374 if (d == 1)
4375 quotient = op0;
4376 else if (d == -1)
4377 quotient = expand_unop (compute_mode, neg_optab, op0,
4378 tquotient, 0);
4379 else if (size <= HOST_BITS_PER_WIDE_INT
4380 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4382 /* This case is not handled correctly below. */
4383 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4384 compute_mode, 1, 1);
4385 if (quotient == 0)
4386 goto fail1;
4388 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4389 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4390 && (rem_flag
4391 ? smod_pow2_cheap (speed, compute_mode)
4392 : sdiv_pow2_cheap (speed, compute_mode))
4393 /* We assume that cheap metric is true if the
4394 optab has an expander for this mode. */
4395 && ((optab_handler ((rem_flag ? smod_optab
4396 : sdiv_optab),
4397 compute_mode)
4398 != CODE_FOR_nothing)
4399 || (optab_handler (sdivmod_optab,
4400 compute_mode)
4401 != CODE_FOR_nothing)))
4403 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4404 && (size <= HOST_BITS_PER_WIDE_INT
4405 || abs_d != (unsigned HOST_WIDE_INT) d))
4407 if (rem_flag)
4409 remainder = expand_smod_pow2 (compute_mode, op0, d);
4410 if (remainder)
4411 return gen_lowpart (mode, remainder);
4414 if (sdiv_pow2_cheap (speed, compute_mode)
4415 && ((optab_handler (sdiv_optab, compute_mode)
4416 != CODE_FOR_nothing)
4417 || (optab_handler (sdivmod_optab, compute_mode)
4418 != CODE_FOR_nothing)))
4419 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4420 compute_mode, op0,
4421 gen_int_mode (abs_d,
4422 compute_mode),
4423 NULL_RTX, 0);
4424 else
4425 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4427 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4428 negate the quotient. */
4429 if (d < 0)
4431 insn = get_last_insn ();
4432 if (insn != last
4433 && abs_d < (HOST_WIDE_INT_1U
4434 << (HOST_BITS_PER_WIDE_INT - 1)))
4435 set_dst_reg_note (insn, REG_EQUAL,
4436 gen_rtx_DIV (compute_mode, op0,
4437 gen_int_mode
4438 (abs_d,
4439 compute_mode)),
4440 quotient);
4442 quotient = expand_unop (compute_mode, neg_optab,
4443 quotient, quotient, 0);
4446 else if (size <= HOST_BITS_PER_WIDE_INT)
4448 choose_multiplier (abs_d, size, size - 1,
4449 &ml, &post_shift, &lgup);
4450 if (ml < HOST_WIDE_INT_1U << (size - 1))
4452 rtx t1, t2, t3;
4454 if (post_shift >= BITS_PER_WORD
4455 || size - 1 >= BITS_PER_WORD)
4456 goto fail1;
4458 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4459 + shift_cost (speed, compute_mode, size - 1)
4460 + add_cost (speed, compute_mode));
4461 t1 = expmed_mult_highpart
4462 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4463 NULL_RTX, 0, max_cost - extra_cost);
4464 if (t1 == 0)
4465 goto fail1;
4466 t2 = expand_shift
4467 (RSHIFT_EXPR, compute_mode, t1,
4468 post_shift, NULL_RTX, 0);
4469 t3 = expand_shift
4470 (RSHIFT_EXPR, compute_mode, op0,
4471 size - 1, NULL_RTX, 0);
4472 if (d < 0)
4473 quotient
4474 = force_operand (gen_rtx_MINUS (compute_mode,
4475 t3, t2),
4476 tquotient);
4477 else
4478 quotient
4479 = force_operand (gen_rtx_MINUS (compute_mode,
4480 t2, t3),
4481 tquotient);
4483 else
4485 rtx t1, t2, t3, t4;
4487 if (post_shift >= BITS_PER_WORD
4488 || size - 1 >= BITS_PER_WORD)
4489 goto fail1;
4491 ml |= HOST_WIDE_INT_M1U << (size - 1);
4492 mlr = gen_int_mode (ml, compute_mode);
4493 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4494 + shift_cost (speed, compute_mode, size - 1)
4495 + 2 * add_cost (speed, compute_mode));
4496 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4497 NULL_RTX, 0,
4498 max_cost - extra_cost);
4499 if (t1 == 0)
4500 goto fail1;
4501 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4502 t1, op0),
4503 NULL_RTX);
4504 t3 = expand_shift
4505 (RSHIFT_EXPR, compute_mode, t2,
4506 post_shift, NULL_RTX, 0);
4507 t4 = expand_shift
4508 (RSHIFT_EXPR, compute_mode, op0,
4509 size - 1, NULL_RTX, 0);
4510 if (d < 0)
4511 quotient
4512 = force_operand (gen_rtx_MINUS (compute_mode,
4513 t4, t3),
4514 tquotient);
4515 else
4516 quotient
4517 = force_operand (gen_rtx_MINUS (compute_mode,
4518 t3, t4),
4519 tquotient);
4522 else /* Too wide mode to use tricky code */
4523 break;
4525 insn = get_last_insn ();
4526 if (insn != last)
4527 set_dst_reg_note (insn, REG_EQUAL,
4528 gen_rtx_DIV (compute_mode, op0, op1),
4529 quotient);
4531 break;
4533 fail1:
4534 delete_insns_since (last);
4535 break;
4537 case FLOOR_DIV_EXPR:
4538 case FLOOR_MOD_EXPR:
4539 /* We will come here only for signed operations. */
4540 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4542 unsigned HOST_WIDE_INT mh, ml;
4543 int pre_shift, lgup, post_shift;
4544 HOST_WIDE_INT d = INTVAL (op1);
4546 if (d > 0)
4548 /* We could just as easily deal with negative constants here,
4549 but it does not seem worth the trouble for GCC 2.6. */
4550 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4552 pre_shift = floor_log2 (d);
4553 if (rem_flag)
4555 unsigned HOST_WIDE_INT mask
4556 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4557 remainder = expand_binop
4558 (compute_mode, and_optab, op0,
4559 gen_int_mode (mask, compute_mode),
4560 remainder, 0, OPTAB_LIB_WIDEN);
4561 if (remainder)
4562 return gen_lowpart (mode, remainder);
4564 quotient = expand_shift
4565 (RSHIFT_EXPR, compute_mode, op0,
4566 pre_shift, tquotient, 0);
4568 else
4570 rtx t1, t2, t3, t4;
4572 mh = choose_multiplier (d, size, size - 1,
4573 &ml, &post_shift, &lgup);
4574 gcc_assert (!mh);
4576 if (post_shift < BITS_PER_WORD
4577 && size - 1 < BITS_PER_WORD)
4579 t1 = expand_shift
4580 (RSHIFT_EXPR, compute_mode, op0,
4581 size - 1, NULL_RTX, 0);
4582 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4583 NULL_RTX, 0, OPTAB_WIDEN);
4584 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4585 + shift_cost (speed, compute_mode, size - 1)
4586 + 2 * add_cost (speed, compute_mode));
4587 t3 = expmed_mult_highpart
4588 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4589 NULL_RTX, 1, max_cost - extra_cost);
4590 if (t3 != 0)
4592 t4 = expand_shift
4593 (RSHIFT_EXPR, compute_mode, t3,
4594 post_shift, NULL_RTX, 1);
4595 quotient = expand_binop (compute_mode, xor_optab,
4596 t4, t1, tquotient, 0,
4597 OPTAB_WIDEN);
4602 else
4604 rtx nsign, t1, t2, t3, t4;
4605 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4606 op0, constm1_rtx), NULL_RTX);
4607 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4608 0, OPTAB_WIDEN);
4609 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4610 size - 1, NULL_RTX, 0);
4611 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4612 NULL_RTX);
4613 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4614 NULL_RTX, 0);
4615 if (t4)
4617 rtx t5;
4618 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4619 NULL_RTX, 0);
4620 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4621 t4, t5),
4622 tquotient);
4627 if (quotient != 0)
4628 break;
4629 delete_insns_since (last);
4631 /* Try using an instruction that produces both the quotient and
4632 remainder, using truncation. We can easily compensate the quotient
4633 or remainder to get floor rounding, once we have the remainder.
4634 Notice that we compute also the final remainder value here,
4635 and return the result right away. */
4636 if (target == 0 || GET_MODE (target) != compute_mode)
4637 target = gen_reg_rtx (compute_mode);
4639 if (rem_flag)
4641 remainder
4642 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4643 quotient = gen_reg_rtx (compute_mode);
4645 else
4647 quotient
4648 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4649 remainder = gen_reg_rtx (compute_mode);
4652 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4653 quotient, remainder, 0))
4655 /* This could be computed with a branch-less sequence.
4656 Save that for later. */
4657 rtx tem;
4658 rtx_code_label *label = gen_label_rtx ();
4659 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4660 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4661 NULL_RTX, 0, OPTAB_WIDEN);
4662 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4663 expand_dec (quotient, const1_rtx);
4664 expand_inc (remainder, op1);
4665 emit_label (label);
4666 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4669 /* No luck with division elimination or divmod. Have to do it
4670 by conditionally adjusting op0 *and* the result. */
4672 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4673 rtx adjusted_op0;
4674 rtx tem;
4676 quotient = gen_reg_rtx (compute_mode);
4677 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4678 label1 = gen_label_rtx ();
4679 label2 = gen_label_rtx ();
4680 label3 = gen_label_rtx ();
4681 label4 = gen_label_rtx ();
4682 label5 = gen_label_rtx ();
4683 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4684 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4685 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4686 quotient, 0, OPTAB_LIB_WIDEN);
4687 if (tem != quotient)
4688 emit_move_insn (quotient, tem);
4689 emit_jump_insn (targetm.gen_jump (label5));
4690 emit_barrier ();
4691 emit_label (label1);
4692 expand_inc (adjusted_op0, const1_rtx);
4693 emit_jump_insn (targetm.gen_jump (label4));
4694 emit_barrier ();
4695 emit_label (label2);
4696 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4697 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4698 quotient, 0, OPTAB_LIB_WIDEN);
4699 if (tem != quotient)
4700 emit_move_insn (quotient, tem);
4701 emit_jump_insn (targetm.gen_jump (label5));
4702 emit_barrier ();
4703 emit_label (label3);
4704 expand_dec (adjusted_op0, const1_rtx);
4705 emit_label (label4);
4706 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4707 quotient, 0, OPTAB_LIB_WIDEN);
4708 if (tem != quotient)
4709 emit_move_insn (quotient, tem);
4710 expand_dec (quotient, const1_rtx);
4711 emit_label (label5);
4713 break;
4715 case CEIL_DIV_EXPR:
4716 case CEIL_MOD_EXPR:
4717 if (unsignedp)
4719 if (op1_is_constant
4720 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4721 && (size <= HOST_BITS_PER_WIDE_INT
4722 || INTVAL (op1) >= 0))
4724 rtx t1, t2, t3;
4725 unsigned HOST_WIDE_INT d = INTVAL (op1);
4726 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4727 floor_log2 (d), tquotient, 1);
4728 t2 = expand_binop (compute_mode, and_optab, op0,
4729 gen_int_mode (d - 1, compute_mode),
4730 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4731 t3 = gen_reg_rtx (compute_mode);
4732 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4733 compute_mode, 1, 1);
4734 if (t3 == 0)
4736 rtx_code_label *lab;
4737 lab = gen_label_rtx ();
4738 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4739 expand_inc (t1, const1_rtx);
4740 emit_label (lab);
4741 quotient = t1;
4743 else
4744 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4745 t1, t3),
4746 tquotient);
4747 break;
4750 /* Try using an instruction that produces both the quotient and
4751 remainder, using truncation. We can easily compensate the
4752 quotient or remainder to get ceiling rounding, once we have the
4753 remainder. Notice that we compute also the final remainder
4754 value here, and return the result right away. */
4755 if (target == 0 || GET_MODE (target) != compute_mode)
4756 target = gen_reg_rtx (compute_mode);
4758 if (rem_flag)
4760 remainder = (REG_P (target)
4761 ? target : gen_reg_rtx (compute_mode));
4762 quotient = gen_reg_rtx (compute_mode);
4764 else
4766 quotient = (REG_P (target)
4767 ? target : gen_reg_rtx (compute_mode));
4768 remainder = gen_reg_rtx (compute_mode);
4771 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4772 remainder, 1))
4774 /* This could be computed with a branch-less sequence.
4775 Save that for later. */
4776 rtx_code_label *label = gen_label_rtx ();
4777 do_cmp_and_jump (remainder, const0_rtx, EQ,
4778 compute_mode, label);
4779 expand_inc (quotient, const1_rtx);
4780 expand_dec (remainder, op1);
4781 emit_label (label);
4782 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4785 /* No luck with division elimination or divmod. Have to do it
4786 by conditionally adjusting op0 *and* the result. */
4788 rtx_code_label *label1, *label2;
4789 rtx adjusted_op0, tem;
4791 quotient = gen_reg_rtx (compute_mode);
4792 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4793 label1 = gen_label_rtx ();
4794 label2 = gen_label_rtx ();
4795 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4796 compute_mode, label1);
4797 emit_move_insn (quotient, const0_rtx);
4798 emit_jump_insn (targetm.gen_jump (label2));
4799 emit_barrier ();
4800 emit_label (label1);
4801 expand_dec (adjusted_op0, const1_rtx);
4802 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4803 quotient, 1, OPTAB_LIB_WIDEN);
4804 if (tem != quotient)
4805 emit_move_insn (quotient, tem);
4806 expand_inc (quotient, const1_rtx);
4807 emit_label (label2);
4810 else /* signed */
4812 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4813 && INTVAL (op1) >= 0)
4815 /* This is extremely similar to the code for the unsigned case
4816 above. For 2.7 we should merge these variants, but for
4817 2.6.1 I don't want to touch the code for unsigned since that
4818 get used in C. The signed case will only be used by other
4819 languages (Ada). */
4821 rtx t1, t2, t3;
4822 unsigned HOST_WIDE_INT d = INTVAL (op1);
4823 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4824 floor_log2 (d), tquotient, 0);
4825 t2 = expand_binop (compute_mode, and_optab, op0,
4826 gen_int_mode (d - 1, compute_mode),
4827 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4828 t3 = gen_reg_rtx (compute_mode);
4829 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4830 compute_mode, 1, 1);
4831 if (t3 == 0)
4833 rtx_code_label *lab;
4834 lab = gen_label_rtx ();
4835 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4836 expand_inc (t1, const1_rtx);
4837 emit_label (lab);
4838 quotient = t1;
4840 else
4841 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4842 t1, t3),
4843 tquotient);
4844 break;
4847 /* Try using an instruction that produces both the quotient and
4848 remainder, using truncation. We can easily compensate the
4849 quotient or remainder to get ceiling rounding, once we have the
4850 remainder. Notice that we compute also the final remainder
4851 value here, and return the result right away. */
4852 if (target == 0 || GET_MODE (target) != compute_mode)
4853 target = gen_reg_rtx (compute_mode);
4854 if (rem_flag)
4856 remainder= (REG_P (target)
4857 ? target : gen_reg_rtx (compute_mode));
4858 quotient = gen_reg_rtx (compute_mode);
4860 else
4862 quotient = (REG_P (target)
4863 ? target : gen_reg_rtx (compute_mode));
4864 remainder = gen_reg_rtx (compute_mode);
4867 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4868 remainder, 0))
4870 /* This could be computed with a branch-less sequence.
4871 Save that for later. */
4872 rtx tem;
4873 rtx_code_label *label = gen_label_rtx ();
4874 do_cmp_and_jump (remainder, const0_rtx, EQ,
4875 compute_mode, label);
4876 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4877 NULL_RTX, 0, OPTAB_WIDEN);
4878 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4879 expand_inc (quotient, const1_rtx);
4880 expand_dec (remainder, op1);
4881 emit_label (label);
4882 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4885 /* No luck with division elimination or divmod. Have to do it
4886 by conditionally adjusting op0 *and* the result. */
4888 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4889 rtx adjusted_op0;
4890 rtx tem;
4892 quotient = gen_reg_rtx (compute_mode);
4893 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4894 label1 = gen_label_rtx ();
4895 label2 = gen_label_rtx ();
4896 label3 = gen_label_rtx ();
4897 label4 = gen_label_rtx ();
4898 label5 = gen_label_rtx ();
4899 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4900 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4901 compute_mode, label1);
4902 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4903 quotient, 0, OPTAB_LIB_WIDEN);
4904 if (tem != quotient)
4905 emit_move_insn (quotient, tem);
4906 emit_jump_insn (targetm.gen_jump (label5));
4907 emit_barrier ();
4908 emit_label (label1);
4909 expand_dec (adjusted_op0, const1_rtx);
4910 emit_jump_insn (targetm.gen_jump (label4));
4911 emit_barrier ();
4912 emit_label (label2);
4913 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4914 compute_mode, label3);
4915 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4916 quotient, 0, OPTAB_LIB_WIDEN);
4917 if (tem != quotient)
4918 emit_move_insn (quotient, tem);
4919 emit_jump_insn (targetm.gen_jump (label5));
4920 emit_barrier ();
4921 emit_label (label3);
4922 expand_inc (adjusted_op0, const1_rtx);
4923 emit_label (label4);
4924 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4925 quotient, 0, OPTAB_LIB_WIDEN);
4926 if (tem != quotient)
4927 emit_move_insn (quotient, tem);
4928 expand_inc (quotient, const1_rtx);
4929 emit_label (label5);
4932 break;
4934 case EXACT_DIV_EXPR:
4935 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4937 HOST_WIDE_INT d = INTVAL (op1);
4938 unsigned HOST_WIDE_INT ml;
4939 int pre_shift;
4940 rtx t1;
4942 pre_shift = ctz_or_zero (d);
4943 ml = invert_mod2n (d >> pre_shift, size);
4944 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4945 pre_shift, NULL_RTX, unsignedp);
4946 quotient = expand_mult (compute_mode, t1,
4947 gen_int_mode (ml, compute_mode),
4948 NULL_RTX, 1);
4950 insn = get_last_insn ();
4951 set_dst_reg_note (insn, REG_EQUAL,
4952 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4953 compute_mode, op0, op1),
4954 quotient);
4956 break;
4958 case ROUND_DIV_EXPR:
4959 case ROUND_MOD_EXPR:
4960 if (unsignedp)
4962 rtx tem;
4963 rtx_code_label *label;
4964 label = gen_label_rtx ();
4965 quotient = gen_reg_rtx (compute_mode);
4966 remainder = gen_reg_rtx (compute_mode);
4967 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4969 rtx tem;
4970 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4971 quotient, 1, OPTAB_LIB_WIDEN);
4972 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4973 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4974 remainder, 1, OPTAB_LIB_WIDEN);
4976 tem = plus_constant (compute_mode, op1, -1);
4977 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4978 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4979 expand_inc (quotient, const1_rtx);
4980 expand_dec (remainder, op1);
4981 emit_label (label);
4983 else
4985 rtx abs_rem, abs_op1, tem, mask;
4986 rtx_code_label *label;
4987 label = gen_label_rtx ();
4988 quotient = gen_reg_rtx (compute_mode);
4989 remainder = gen_reg_rtx (compute_mode);
4990 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4992 rtx tem;
4993 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4994 quotient, 0, OPTAB_LIB_WIDEN);
4995 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4996 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4997 remainder, 0, OPTAB_LIB_WIDEN);
4999 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
5000 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
5001 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
5002 1, NULL_RTX, 1);
5003 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
5004 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5005 NULL_RTX, 0, OPTAB_WIDEN);
5006 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
5007 size - 1, NULL_RTX, 0);
5008 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
5009 NULL_RTX, 0, OPTAB_WIDEN);
5010 tem = expand_binop (compute_mode, sub_optab, tem, mask,
5011 NULL_RTX, 0, OPTAB_WIDEN);
5012 expand_inc (quotient, tem);
5013 tem = expand_binop (compute_mode, xor_optab, mask, op1,
5014 NULL_RTX, 0, OPTAB_WIDEN);
5015 tem = expand_binop (compute_mode, sub_optab, tem, mask,
5016 NULL_RTX, 0, OPTAB_WIDEN);
5017 expand_dec (remainder, tem);
5018 emit_label (label);
5020 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5022 default:
5023 gcc_unreachable ();
5026 if (quotient == 0)
5028 if (target && GET_MODE (target) != compute_mode)
5029 target = 0;
5031 if (rem_flag)
5033 /* Try to produce the remainder without producing the quotient.
5034 If we seem to have a divmod pattern that does not require widening,
5035 don't try widening here. We should really have a WIDEN argument
5036 to expand_twoval_binop, since what we'd really like to do here is
5037 1) try a mod insn in compute_mode
5038 2) try a divmod insn in compute_mode
5039 3) try a div insn in compute_mode and multiply-subtract to get
5040 remainder
5041 4) try the same things with widening allowed. */
5042 remainder
5043 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5044 op0, op1, target,
5045 unsignedp,
5046 ((optab_handler (optab2, compute_mode)
5047 != CODE_FOR_nothing)
5048 ? OPTAB_DIRECT : OPTAB_WIDEN));
5049 if (remainder == 0)
5051 /* No luck there. Can we do remainder and divide at once
5052 without a library call? */
5053 remainder = gen_reg_rtx (compute_mode);
5054 if (! expand_twoval_binop ((unsignedp
5055 ? udivmod_optab
5056 : sdivmod_optab),
5057 op0, op1,
5058 NULL_RTX, remainder, unsignedp))
5059 remainder = 0;
5062 if (remainder)
5063 return gen_lowpart (mode, remainder);
5066 /* Produce the quotient. Try a quotient insn, but not a library call.
5067 If we have a divmod in this mode, use it in preference to widening
5068 the div (for this test we assume it will not fail). Note that optab2
5069 is set to the one of the two optabs that the call below will use. */
5070 quotient
5071 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5072 op0, op1, rem_flag ? NULL_RTX : target,
5073 unsignedp,
5074 ((optab_handler (optab2, compute_mode)
5075 != CODE_FOR_nothing)
5076 ? OPTAB_DIRECT : OPTAB_WIDEN));
5078 if (quotient == 0)
5080 /* No luck there. Try a quotient-and-remainder insn,
5081 keeping the quotient alone. */
5082 quotient = gen_reg_rtx (compute_mode);
5083 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5084 op0, op1,
5085 quotient, NULL_RTX, unsignedp))
5087 quotient = 0;
5088 if (! rem_flag)
5089 /* Still no luck. If we are not computing the remainder,
5090 use a library call for the quotient. */
5091 quotient = sign_expand_binop (compute_mode,
5092 udiv_optab, sdiv_optab,
5093 op0, op1, target,
5094 unsignedp, OPTAB_LIB_WIDEN);
5099 if (rem_flag)
5101 if (target && GET_MODE (target) != compute_mode)
5102 target = 0;
5104 if (quotient == 0)
5106 /* No divide instruction either. Use library for remainder. */
5107 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5108 op0, op1, target,
5109 unsignedp, OPTAB_LIB_WIDEN);
5110 /* No remainder function. Try a quotient-and-remainder
5111 function, keeping the remainder. */
5112 if (!remainder)
5114 remainder = gen_reg_rtx (compute_mode);
5115 if (!expand_twoval_binop_libfunc
5116 (unsignedp ? udivmod_optab : sdivmod_optab,
5117 op0, op1,
5118 NULL_RTX, remainder,
5119 unsignedp ? UMOD : MOD))
5120 remainder = NULL_RTX;
5123 else
5125 /* We divided. Now finish doing X - Y * (X / Y). */
5126 remainder = expand_mult (compute_mode, quotient, op1,
5127 NULL_RTX, unsignedp);
5128 remainder = expand_binop (compute_mode, sub_optab, op0,
5129 remainder, target, unsignedp,
5130 OPTAB_LIB_WIDEN);
5134 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5137 /* Return a tree node with data type TYPE, describing the value of X.
5138 Usually this is an VAR_DECL, if there is no obvious better choice.
5139 X may be an expression, however we only support those expressions
5140 generated by loop.c. */
5142 tree
5143 make_tree (tree type, rtx x)
5145 tree t;
5147 switch (GET_CODE (x))
5149 case CONST_INT:
5150 case CONST_WIDE_INT:
5151 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5152 return t;
5154 case CONST_DOUBLE:
5155 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5156 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5157 t = wide_int_to_tree (type,
5158 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5159 HOST_BITS_PER_WIDE_INT * 2));
5160 else
5161 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5163 return t;
5165 case CONST_VECTOR:
5167 int units = CONST_VECTOR_NUNITS (x);
5168 tree itype = TREE_TYPE (type);
5169 tree *elts;
5170 int i;
5172 /* Build a tree with vector elements. */
5173 elts = XALLOCAVEC (tree, units);
5174 for (i = units - 1; i >= 0; --i)
5176 rtx elt = CONST_VECTOR_ELT (x, i);
5177 elts[i] = make_tree (itype, elt);
5180 return build_vector (type, elts);
5183 case PLUS:
5184 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5185 make_tree (type, XEXP (x, 1)));
5187 case MINUS:
5188 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5189 make_tree (type, XEXP (x, 1)));
5191 case NEG:
5192 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5194 case MULT:
5195 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5196 make_tree (type, XEXP (x, 1)));
5198 case ASHIFT:
5199 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5200 make_tree (type, XEXP (x, 1)));
5202 case LSHIFTRT:
5203 t = unsigned_type_for (type);
5204 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5205 make_tree (t, XEXP (x, 0)),
5206 make_tree (type, XEXP (x, 1))));
5208 case ASHIFTRT:
5209 t = signed_type_for (type);
5210 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5211 make_tree (t, XEXP (x, 0)),
5212 make_tree (type, XEXP (x, 1))));
5214 case DIV:
5215 if (TREE_CODE (type) != REAL_TYPE)
5216 t = signed_type_for (type);
5217 else
5218 t = type;
5220 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5221 make_tree (t, XEXP (x, 0)),
5222 make_tree (t, XEXP (x, 1))));
5223 case UDIV:
5224 t = unsigned_type_for (type);
5225 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5226 make_tree (t, XEXP (x, 0)),
5227 make_tree (t, XEXP (x, 1))));
5229 case SIGN_EXTEND:
5230 case ZERO_EXTEND:
5231 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5232 GET_CODE (x) == ZERO_EXTEND);
5233 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5235 case CONST:
5236 return make_tree (type, XEXP (x, 0));
5238 case SYMBOL_REF:
5239 t = SYMBOL_REF_DECL (x);
5240 if (t)
5241 return fold_convert (type, build_fold_addr_expr (t));
5242 /* fall through. */
5244 default:
5245 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5247 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5248 address mode to pointer mode. */
5249 if (POINTER_TYPE_P (type))
5250 x = convert_memory_address_addr_space
5251 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5253 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5254 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5255 t->decl_with_rtl.rtl = x;
5257 return t;
5261 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5262 and returning TARGET.
5264 If TARGET is 0, a pseudo-register or constant is returned. */
5267 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5269 rtx tem = 0;
5271 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5272 tem = simplify_binary_operation (AND, mode, op0, op1);
5273 if (tem == 0)
5274 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5276 if (target == 0)
5277 target = tem;
5278 else if (tem != target)
5279 emit_move_insn (target, tem);
5280 return target;
5283 /* Helper function for emit_store_flag. */
5285 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5286 machine_mode mode, machine_mode compare_mode,
5287 int unsignedp, rtx x, rtx y, int normalizep,
5288 machine_mode target_mode)
5290 struct expand_operand ops[4];
5291 rtx op0, comparison, subtarget;
5292 rtx_insn *last;
5293 machine_mode result_mode = targetm.cstore_mode (icode);
5295 last = get_last_insn ();
5296 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5297 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5298 if (!x || !y)
5300 delete_insns_since (last);
5301 return NULL_RTX;
5304 if (target_mode == VOIDmode)
5305 target_mode = result_mode;
5306 if (!target)
5307 target = gen_reg_rtx (target_mode);
5309 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5311 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5312 create_fixed_operand (&ops[1], comparison);
5313 create_fixed_operand (&ops[2], x);
5314 create_fixed_operand (&ops[3], y);
5315 if (!maybe_expand_insn (icode, 4, ops))
5317 delete_insns_since (last);
5318 return NULL_RTX;
5320 subtarget = ops[0].value;
5322 /* If we are converting to a wider mode, first convert to
5323 TARGET_MODE, then normalize. This produces better combining
5324 opportunities on machines that have a SIGN_EXTRACT when we are
5325 testing a single bit. This mostly benefits the 68k.
5327 If STORE_FLAG_VALUE does not have the sign bit set when
5328 interpreted in MODE, we can do this conversion as unsigned, which
5329 is usually more efficient. */
5330 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5332 convert_move (target, subtarget,
5333 val_signbit_known_clear_p (result_mode,
5334 STORE_FLAG_VALUE));
5335 op0 = target;
5336 result_mode = target_mode;
5338 else
5339 op0 = subtarget;
5341 /* If we want to keep subexpressions around, don't reuse our last
5342 target. */
5343 if (optimize)
5344 subtarget = 0;
5346 /* Now normalize to the proper value in MODE. Sometimes we don't
5347 have to do anything. */
5348 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5350 /* STORE_FLAG_VALUE might be the most negative number, so write
5351 the comparison this way to avoid a compiler-time warning. */
5352 else if (- normalizep == STORE_FLAG_VALUE)
5353 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5355 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5356 it hard to use a value of just the sign bit due to ANSI integer
5357 constant typing rules. */
5358 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5359 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5360 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5361 normalizep == 1);
5362 else
5364 gcc_assert (STORE_FLAG_VALUE & 1);
5366 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5367 if (normalizep == -1)
5368 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5371 /* If we were converting to a smaller mode, do the conversion now. */
5372 if (target_mode != result_mode)
5374 convert_move (target, op0, 0);
5375 return target;
5377 else
5378 return op0;
5382 /* A subroutine of emit_store_flag only including "tricks" that do not
5383 need a recursive call. These are kept separate to avoid infinite
5384 loops. */
5386 static rtx
5387 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5388 machine_mode mode, int unsignedp, int normalizep,
5389 machine_mode target_mode)
5391 rtx subtarget;
5392 enum insn_code icode;
5393 machine_mode compare_mode;
5394 enum mode_class mclass;
5395 enum rtx_code scode;
5397 if (unsignedp)
5398 code = unsigned_condition (code);
5399 scode = swap_condition (code);
5401 /* If one operand is constant, make it the second one. Only do this
5402 if the other operand is not constant as well. */
5404 if (swap_commutative_operands_p (op0, op1))
5406 std::swap (op0, op1);
5407 code = swap_condition (code);
5410 if (mode == VOIDmode)
5411 mode = GET_MODE (op0);
5413 /* For some comparisons with 1 and -1, we can convert this to
5414 comparisons with zero. This will often produce more opportunities for
5415 store-flag insns. */
5417 switch (code)
5419 case LT:
5420 if (op1 == const1_rtx)
5421 op1 = const0_rtx, code = LE;
5422 break;
5423 case LE:
5424 if (op1 == constm1_rtx)
5425 op1 = const0_rtx, code = LT;
5426 break;
5427 case GE:
5428 if (op1 == const1_rtx)
5429 op1 = const0_rtx, code = GT;
5430 break;
5431 case GT:
5432 if (op1 == constm1_rtx)
5433 op1 = const0_rtx, code = GE;
5434 break;
5435 case GEU:
5436 if (op1 == const1_rtx)
5437 op1 = const0_rtx, code = NE;
5438 break;
5439 case LTU:
5440 if (op1 == const1_rtx)
5441 op1 = const0_rtx, code = EQ;
5442 break;
5443 default:
5444 break;
5447 /* If we are comparing a double-word integer with zero or -1, we can
5448 convert the comparison into one involving a single word. */
5449 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5450 && GET_MODE_CLASS (mode) == MODE_INT
5451 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5453 rtx tem;
5454 if ((code == EQ || code == NE)
5455 && (op1 == const0_rtx || op1 == constm1_rtx))
5457 rtx op00, op01;
5459 /* Do a logical OR or AND of the two words and compare the
5460 result. */
5461 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5462 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5463 tem = expand_binop (word_mode,
5464 op1 == const0_rtx ? ior_optab : and_optab,
5465 op00, op01, NULL_RTX, unsignedp,
5466 OPTAB_DIRECT);
5468 if (tem != 0)
5469 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5470 unsignedp, normalizep);
5472 else if ((code == LT || code == GE) && op1 == const0_rtx)
5474 rtx op0h;
5476 /* If testing the sign bit, can just test on high word. */
5477 op0h = simplify_gen_subreg (word_mode, op0, mode,
5478 subreg_highpart_offset (word_mode,
5479 mode));
5480 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5481 unsignedp, normalizep);
5483 else
5484 tem = NULL_RTX;
5486 if (tem)
5488 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5489 return tem;
5490 if (!target)
5491 target = gen_reg_rtx (target_mode);
5493 convert_move (target, tem,
5494 !val_signbit_known_set_p (word_mode,
5495 (normalizep ? normalizep
5496 : STORE_FLAG_VALUE)));
5497 return target;
5501 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5502 complement of A (for GE) and shifting the sign bit to the low bit. */
5503 if (op1 == const0_rtx && (code == LT || code == GE)
5504 && GET_MODE_CLASS (mode) == MODE_INT
5505 && (normalizep || STORE_FLAG_VALUE == 1
5506 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5508 subtarget = target;
5510 if (!target)
5511 target_mode = mode;
5513 /* If the result is to be wider than OP0, it is best to convert it
5514 first. If it is to be narrower, it is *incorrect* to convert it
5515 first. */
5516 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5518 op0 = convert_modes (target_mode, mode, op0, 0);
5519 mode = target_mode;
5522 if (target_mode != mode)
5523 subtarget = 0;
5525 if (code == GE)
5526 op0 = expand_unop (mode, one_cmpl_optab, op0,
5527 ((STORE_FLAG_VALUE == 1 || normalizep)
5528 ? 0 : subtarget), 0);
5530 if (STORE_FLAG_VALUE == 1 || normalizep)
5531 /* If we are supposed to produce a 0/1 value, we want to do
5532 a logical shift from the sign bit to the low-order bit; for
5533 a -1/0 value, we do an arithmetic shift. */
5534 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5535 GET_MODE_BITSIZE (mode) - 1,
5536 subtarget, normalizep != -1);
5538 if (mode != target_mode)
5539 op0 = convert_modes (target_mode, mode, op0, 0);
5541 return op0;
5544 mclass = GET_MODE_CLASS (mode);
5545 FOR_EACH_MODE_FROM (compare_mode, mode)
5547 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5548 icode = optab_handler (cstore_optab, optab_mode);
5549 if (icode != CODE_FOR_nothing)
5551 do_pending_stack_adjust ();
5552 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5553 unsignedp, op0, op1, normalizep, target_mode);
5554 if (tem)
5555 return tem;
5557 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5559 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5560 unsignedp, op1, op0, normalizep, target_mode);
5561 if (tem)
5562 return tem;
5564 break;
5568 return 0;
5571 /* Subroutine of emit_store_flag that handles cases in which the operands
5572 are scalar integers. SUBTARGET is the target to use for temporary
5573 operations and TRUEVAL is the value to store when the condition is
5574 true. All other arguments are as for emit_store_flag. */
5577 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5578 rtx op1, machine_mode mode, int unsignedp,
5579 int normalizep, rtx trueval)
5581 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5582 rtx_insn *last = get_last_insn ();
5583 rtx tem;
5585 /* If this is an equality comparison of integers, we can try to exclusive-or
5586 (or subtract) the two operands and use a recursive call to try the
5587 comparison with zero. Don't do any of these cases if branches are
5588 very cheap. */
5590 if ((code == EQ || code == NE) && op1 != const0_rtx)
5592 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5593 OPTAB_WIDEN);
5595 if (tem == 0)
5596 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5597 OPTAB_WIDEN);
5598 if (tem != 0)
5599 tem = emit_store_flag (target, code, tem, const0_rtx,
5600 mode, unsignedp, normalizep);
5601 if (tem != 0)
5602 return tem;
5604 delete_insns_since (last);
5607 /* For integer comparisons, try the reverse comparison. However, for
5608 small X and if we'd have anyway to extend, implementing "X != 0"
5609 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5610 rtx_code rcode = reverse_condition (code);
5611 if (can_compare_p (rcode, mode, ccp_store_flag)
5612 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5613 && code == NE
5614 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5615 && op1 == const0_rtx))
5617 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5618 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5620 /* Again, for the reverse comparison, use either an addition or a XOR. */
5621 if (want_add
5622 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5623 optimize_insn_for_speed_p ()) == 0)
5625 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5626 STORE_FLAG_VALUE, target_mode);
5627 if (tem != 0)
5628 tem = expand_binop (target_mode, add_optab, tem,
5629 gen_int_mode (normalizep, target_mode),
5630 target, 0, OPTAB_WIDEN);
5632 else if (!want_add
5633 && rtx_cost (trueval, mode, XOR, 1,
5634 optimize_insn_for_speed_p ()) == 0)
5636 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5637 normalizep, target_mode);
5638 if (tem != 0)
5639 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5640 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5643 if (tem != 0)
5644 return tem;
5645 delete_insns_since (last);
5648 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5649 the constant zero. Reject all other comparisons at this point. Only
5650 do LE and GT if branches are expensive since they are expensive on
5651 2-operand machines. */
5653 if (op1 != const0_rtx
5654 || (code != EQ && code != NE
5655 && (BRANCH_COST (optimize_insn_for_speed_p (),
5656 false) <= 1 || (code != LE && code != GT))))
5657 return 0;
5659 /* Try to put the result of the comparison in the sign bit. Assume we can't
5660 do the necessary operation below. */
5662 tem = 0;
5664 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5665 the sign bit set. */
5667 if (code == LE)
5669 /* This is destructive, so SUBTARGET can't be OP0. */
5670 if (rtx_equal_p (subtarget, op0))
5671 subtarget = 0;
5673 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5674 OPTAB_WIDEN);
5675 if (tem)
5676 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5677 OPTAB_WIDEN);
5680 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5681 number of bits in the mode of OP0, minus one. */
5683 if (code == GT)
5685 if (rtx_equal_p (subtarget, op0))
5686 subtarget = 0;
5688 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5689 GET_MODE_BITSIZE (mode) - 1,
5690 subtarget, 0);
5691 if (tem)
5692 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5693 OPTAB_WIDEN);
5696 if (code == EQ || code == NE)
5698 /* For EQ or NE, one way to do the comparison is to apply an operation
5699 that converts the operand into a positive number if it is nonzero
5700 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5701 for NE we negate. This puts the result in the sign bit. Then we
5702 normalize with a shift, if needed.
5704 Two operations that can do the above actions are ABS and FFS, so try
5705 them. If that doesn't work, and MODE is smaller than a full word,
5706 we can use zero-extension to the wider mode (an unsigned conversion)
5707 as the operation. */
5709 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5710 that is compensated by the subsequent overflow when subtracting
5711 one / negating. */
5713 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5714 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5715 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5716 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5717 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5719 tem = convert_modes (word_mode, mode, op0, 1);
5720 mode = word_mode;
5723 if (tem != 0)
5725 if (code == EQ)
5726 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5727 0, OPTAB_WIDEN);
5728 else
5729 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5732 /* If we couldn't do it that way, for NE we can "or" the two's complement
5733 of the value with itself. For EQ, we take the one's complement of
5734 that "or", which is an extra insn, so we only handle EQ if branches
5735 are expensive. */
5737 if (tem == 0
5738 && (code == NE
5739 || BRANCH_COST (optimize_insn_for_speed_p (),
5740 false) > 1))
5742 if (rtx_equal_p (subtarget, op0))
5743 subtarget = 0;
5745 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5746 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5747 OPTAB_WIDEN);
5749 if (tem && code == EQ)
5750 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5754 if (tem && normalizep)
5755 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5756 GET_MODE_BITSIZE (mode) - 1,
5757 subtarget, normalizep == 1);
5759 if (tem)
5761 if (!target)
5763 else if (GET_MODE (tem) != target_mode)
5765 convert_move (target, tem, 0);
5766 tem = target;
5768 else if (!subtarget)
5770 emit_move_insn (target, tem);
5771 tem = target;
5774 else
5775 delete_insns_since (last);
5777 return tem;
5780 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5781 and storing in TARGET. Normally return TARGET.
5782 Return 0 if that cannot be done.
5784 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5785 it is VOIDmode, they cannot both be CONST_INT.
5787 UNSIGNEDP is for the case where we have to widen the operands
5788 to perform the operation. It says to use zero-extension.
5790 NORMALIZEP is 1 if we should convert the result to be either zero
5791 or one. Normalize is -1 if we should convert the result to be
5792 either zero or -1. If NORMALIZEP is zero, the result will be left
5793 "raw" out of the scc insn. */
5796 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5797 machine_mode mode, int unsignedp, int normalizep)
5799 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5800 enum rtx_code rcode;
5801 rtx subtarget;
5802 rtx tem, trueval;
5803 rtx_insn *last;
5805 /* If we compare constants, we shouldn't use a store-flag operation,
5806 but a constant load. We can get there via the vanilla route that
5807 usually generates a compare-branch sequence, but will in this case
5808 fold the comparison to a constant, and thus elide the branch. */
5809 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5810 return NULL_RTX;
5812 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5813 target_mode);
5814 if (tem)
5815 return tem;
5817 /* If we reached here, we can't do this with a scc insn, however there
5818 are some comparisons that can be done in other ways. Don't do any
5819 of these cases if branches are very cheap. */
5820 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5821 return 0;
5823 /* See what we need to return. We can only return a 1, -1, or the
5824 sign bit. */
5826 if (normalizep == 0)
5828 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5829 normalizep = STORE_FLAG_VALUE;
5831 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5833 else
5834 return 0;
5837 last = get_last_insn ();
5839 /* If optimizing, use different pseudo registers for each insn, instead
5840 of reusing the same pseudo. This leads to better CSE, but slows
5841 down the compiler, since there are more pseudos. */
5842 subtarget = (!optimize
5843 && (target_mode == mode)) ? target : NULL_RTX;
5844 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5846 /* For floating-point comparisons, try the reverse comparison or try
5847 changing the "orderedness" of the comparison. */
5848 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5850 enum rtx_code first_code;
5851 bool and_them;
5853 rcode = reverse_condition_maybe_unordered (code);
5854 if (can_compare_p (rcode, mode, ccp_store_flag)
5855 && (code == ORDERED || code == UNORDERED
5856 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5857 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5859 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5860 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5862 /* For the reverse comparison, use either an addition or a XOR. */
5863 if (want_add
5864 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5865 optimize_insn_for_speed_p ()) == 0)
5867 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5868 STORE_FLAG_VALUE, target_mode);
5869 if (tem)
5870 return expand_binop (target_mode, add_optab, tem,
5871 gen_int_mode (normalizep, target_mode),
5872 target, 0, OPTAB_WIDEN);
5874 else if (!want_add
5875 && rtx_cost (trueval, mode, XOR, 1,
5876 optimize_insn_for_speed_p ()) == 0)
5878 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5879 normalizep, target_mode);
5880 if (tem)
5881 return expand_binop (target_mode, xor_optab, tem, trueval,
5882 target, INTVAL (trueval) >= 0,
5883 OPTAB_WIDEN);
5887 delete_insns_since (last);
5889 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5890 if (code == ORDERED || code == UNORDERED)
5891 return 0;
5893 and_them = split_comparison (code, mode, &first_code, &code);
5895 /* If there are no NaNs, the first comparison should always fall through.
5896 Effectively change the comparison to the other one. */
5897 if (!HONOR_NANS (mode))
5899 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5900 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5901 target_mode);
5904 if (!HAVE_conditional_move)
5905 return 0;
5907 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5908 conditional move. */
5909 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5910 normalizep, target_mode);
5911 if (tem == 0)
5912 return 0;
5914 if (and_them)
5915 tem = emit_conditional_move (target, code, op0, op1, mode,
5916 tem, const0_rtx, GET_MODE (tem), 0);
5917 else
5918 tem = emit_conditional_move (target, code, op0, op1, mode,
5919 trueval, tem, GET_MODE (tem), 0);
5921 if (tem == 0)
5922 delete_insns_since (last);
5923 return tem;
5926 /* The remaining tricks only apply to integer comparisons. */
5928 if (GET_MODE_CLASS (mode) == MODE_INT)
5929 return emit_store_flag_int (target, subtarget, code, op0, op1, mode,
5930 unsignedp, normalizep, trueval);
5932 return 0;
5935 /* Like emit_store_flag, but always succeeds. */
5938 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5939 machine_mode mode, int unsignedp, int normalizep)
5941 rtx tem;
5942 rtx_code_label *label;
5943 rtx trueval, falseval;
5945 /* First see if emit_store_flag can do the job. */
5946 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5947 if (tem != 0)
5948 return tem;
5950 if (!target)
5951 target = gen_reg_rtx (word_mode);
5953 /* If this failed, we have to do this with set/compare/jump/set code.
5954 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5955 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5956 if (code == NE
5957 && GET_MODE_CLASS (mode) == MODE_INT
5958 && REG_P (target)
5959 && op0 == target
5960 && op1 == const0_rtx)
5962 label = gen_label_rtx ();
5963 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5964 NULL_RTX, NULL, label,
5965 profile_probability::uninitialized ());
5966 emit_move_insn (target, trueval);
5967 emit_label (label);
5968 return target;
5971 if (!REG_P (target)
5972 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5973 target = gen_reg_rtx (GET_MODE (target));
5975 /* Jump in the right direction if the target cannot implement CODE
5976 but can jump on its reverse condition. */
5977 falseval = const0_rtx;
5978 if (! can_compare_p (code, mode, ccp_jump)
5979 && (! FLOAT_MODE_P (mode)
5980 || code == ORDERED || code == UNORDERED
5981 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5982 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5984 enum rtx_code rcode;
5985 if (FLOAT_MODE_P (mode))
5986 rcode = reverse_condition_maybe_unordered (code);
5987 else
5988 rcode = reverse_condition (code);
5990 /* Canonicalize to UNORDERED for the libcall. */
5991 if (can_compare_p (rcode, mode, ccp_jump)
5992 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5994 falseval = trueval;
5995 trueval = const0_rtx;
5996 code = rcode;
6000 emit_move_insn (target, trueval);
6001 label = gen_label_rtx ();
6002 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6003 label, profile_probability::uninitialized ());
6005 emit_move_insn (target, falseval);
6006 emit_label (label);
6008 return target;
6011 /* Perform possibly multi-word comparison and conditional jump to LABEL
6012 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6013 now a thin wrapper around do_compare_rtx_and_jump. */
6015 static void
6016 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6017 rtx_code_label *label)
6019 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6020 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6021 NULL, label, profile_probability::uninitialized ());