PR82045: Avoid passing machine modes through "..."
[official-gcc.git] / gcc / expmed.c
blobff9eb5b2c71dcd005bd5186891d4b2b52548911f
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2017 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "emit-rtl.h"
35 #include "diagnostic-core.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "dojump.h"
39 #include "explow.h"
40 #include "expr.h"
41 #include "langhooks.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx, scalar_int_mode, bool);
54 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 rtx, scalar_int_mode, bool);
58 static void store_split_bit_field (rtx, opt_scalar_int_mode,
59 unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT,
63 rtx, scalar_int_mode, bool);
64 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
65 unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, rtx, int, bool);
67 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
68 unsigned HOST_WIDE_INT,
69 unsigned HOST_WIDE_INT, rtx, int, bool);
70 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
71 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
72 unsigned HOST_WIDE_INT,
73 unsigned HOST_WIDE_INT, int, bool);
74 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
75 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
76 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
78 /* Return a constant integer mask value of mode MODE with BITSIZE ones
79 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
80 The mask is truncated if necessary to the width of mode MODE. The
81 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
83 static inline rtx
84 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
86 return immed_wide_int_const
87 (wi::shifted_mask (bitpos, bitsize, complement,
88 GET_MODE_PRECISION (mode)), mode);
91 /* Test whether a value is zero of a power of two. */
92 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
93 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
95 struct init_expmed_rtl
97 rtx reg;
98 rtx plus;
99 rtx neg;
100 rtx mult;
101 rtx sdiv;
102 rtx udiv;
103 rtx sdiv_32;
104 rtx smod_32;
105 rtx wide_mult;
106 rtx wide_lshr;
107 rtx wide_trunc;
108 rtx shift;
109 rtx shift_mult;
110 rtx shift_add;
111 rtx shift_sub0;
112 rtx shift_sub1;
113 rtx zext;
114 rtx trunc;
116 rtx pow2[MAX_BITS_PER_WORD];
117 rtx cint[MAX_BITS_PER_WORD];
120 static void
121 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
122 scalar_int_mode from_mode, bool speed)
124 int to_size, from_size;
125 rtx which;
127 to_size = GET_MODE_PRECISION (to_mode);
128 from_size = GET_MODE_PRECISION (from_mode);
130 /* Most partial integers have a precision less than the "full"
131 integer it requires for storage. In case one doesn't, for
132 comparison purposes here, reduce the bit size by one in that
133 case. */
134 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
135 && pow2p_hwi (to_size))
136 to_size --;
137 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
138 && pow2p_hwi (from_size))
139 from_size --;
141 /* Assume cost of zero-extend and sign-extend is the same. */
142 which = (to_size < from_size ? all->trunc : all->zext);
144 PUT_MODE (all->reg, from_mode);
145 set_convert_cost (to_mode, from_mode, speed,
146 set_src_cost (which, to_mode, speed));
149 static void
150 init_expmed_one_mode (struct init_expmed_rtl *all,
151 machine_mode mode, int speed)
153 int m, n, mode_bitsize;
154 machine_mode mode_from;
156 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
158 PUT_MODE (all->reg, mode);
159 PUT_MODE (all->plus, mode);
160 PUT_MODE (all->neg, mode);
161 PUT_MODE (all->mult, mode);
162 PUT_MODE (all->sdiv, mode);
163 PUT_MODE (all->udiv, mode);
164 PUT_MODE (all->sdiv_32, mode);
165 PUT_MODE (all->smod_32, mode);
166 PUT_MODE (all->wide_trunc, mode);
167 PUT_MODE (all->shift, mode);
168 PUT_MODE (all->shift_mult, mode);
169 PUT_MODE (all->shift_add, mode);
170 PUT_MODE (all->shift_sub0, mode);
171 PUT_MODE (all->shift_sub1, mode);
172 PUT_MODE (all->zext, mode);
173 PUT_MODE (all->trunc, mode);
175 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
176 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
177 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
178 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
179 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
181 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
182 <= 2 * add_cost (speed, mode)));
183 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
184 <= 4 * add_cost (speed, mode)));
186 set_shift_cost (speed, mode, 0, 0);
188 int cost = add_cost (speed, mode);
189 set_shiftadd_cost (speed, mode, 0, cost);
190 set_shiftsub0_cost (speed, mode, 0, cost);
191 set_shiftsub1_cost (speed, mode, 0, cost);
194 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
195 for (m = 1; m < n; m++)
197 XEXP (all->shift, 1) = all->cint[m];
198 XEXP (all->shift_mult, 1) = all->pow2[m];
200 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
201 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
202 speed));
203 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
204 speed));
205 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
206 speed));
209 scalar_int_mode int_mode_to;
210 if (is_a <scalar_int_mode> (mode, &int_mode_to))
212 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
213 mode_from = (machine_mode)(mode_from + 1))
214 init_expmed_one_conv (all, int_mode_to,
215 as_a <scalar_int_mode> (mode_from), speed);
217 scalar_int_mode wider_mode;
218 if (GET_MODE_CLASS (int_mode_to) == MODE_INT
219 && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
221 PUT_MODE (all->zext, wider_mode);
222 PUT_MODE (all->wide_mult, wider_mode);
223 PUT_MODE (all->wide_lshr, wider_mode);
224 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
226 set_mul_widen_cost (speed, wider_mode,
227 set_src_cost (all->wide_mult, wider_mode, speed));
228 set_mul_highpart_cost (speed, int_mode_to,
229 set_src_cost (all->wide_trunc,
230 int_mode_to, speed));
235 void
236 init_expmed (void)
238 struct init_expmed_rtl all;
239 machine_mode mode = QImode;
240 int m, speed;
242 memset (&all, 0, sizeof all);
243 for (m = 1; m < MAX_BITS_PER_WORD; m++)
245 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
246 all.cint[m] = GEN_INT (m);
249 /* Avoid using hard regs in ways which may be unsupported. */
250 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
251 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
252 all.neg = gen_rtx_NEG (mode, all.reg);
253 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
254 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
255 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
256 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
257 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
258 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
259 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
260 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
261 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
262 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
263 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
264 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
265 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
266 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
267 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
269 for (speed = 0; speed < 2; speed++)
271 crtl->maybe_hot_insn_p = speed;
272 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
274 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
275 mode = (machine_mode)(mode + 1))
276 init_expmed_one_mode (&all, mode, speed);
278 if (MIN_MODE_PARTIAL_INT != VOIDmode)
279 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
280 mode = (machine_mode)(mode + 1))
281 init_expmed_one_mode (&all, mode, speed);
283 if (MIN_MODE_VECTOR_INT != VOIDmode)
284 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
285 mode = (machine_mode)(mode + 1))
286 init_expmed_one_mode (&all, mode, speed);
289 if (alg_hash_used_p ())
291 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
292 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
294 else
295 set_alg_hash_used_p (true);
296 default_rtl_profile ();
298 ggc_free (all.trunc);
299 ggc_free (all.shift_sub1);
300 ggc_free (all.shift_sub0);
301 ggc_free (all.shift_add);
302 ggc_free (all.shift_mult);
303 ggc_free (all.shift);
304 ggc_free (all.wide_trunc);
305 ggc_free (all.wide_lshr);
306 ggc_free (all.wide_mult);
307 ggc_free (all.zext);
308 ggc_free (all.smod_32);
309 ggc_free (all.sdiv_32);
310 ggc_free (all.udiv);
311 ggc_free (all.sdiv);
312 ggc_free (all.mult);
313 ggc_free (all.neg);
314 ggc_free (all.plus);
315 ggc_free (all.reg);
318 /* Return an rtx representing minus the value of X.
319 MODE is the intended mode of the result,
320 useful if X is a CONST_INT. */
323 negate_rtx (machine_mode mode, rtx x)
325 rtx result = simplify_unary_operation (NEG, mode, x, mode);
327 if (result == 0)
328 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
330 return result;
333 /* Whether reverse storage order is supported on the target. */
334 static int reverse_storage_order_supported = -1;
336 /* Check whether reverse storage order is supported on the target. */
338 static void
339 check_reverse_storage_order_support (void)
341 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
343 reverse_storage_order_supported = 0;
344 sorry ("reverse scalar storage order");
346 else
347 reverse_storage_order_supported = 1;
350 /* Whether reverse FP storage order is supported on the target. */
351 static int reverse_float_storage_order_supported = -1;
353 /* Check whether reverse FP storage order is supported on the target. */
355 static void
356 check_reverse_float_storage_order_support (void)
358 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
360 reverse_float_storage_order_supported = 0;
361 sorry ("reverse floating-point scalar storage order");
363 else
364 reverse_float_storage_order_supported = 1;
367 /* Return an rtx representing value of X with reverse storage order.
368 MODE is the intended mode of the result,
369 useful if X is a CONST_INT. */
372 flip_storage_order (machine_mode mode, rtx x)
374 scalar_int_mode int_mode;
375 rtx result;
377 if (mode == QImode)
378 return x;
380 if (COMPLEX_MODE_P (mode))
382 rtx real = read_complex_part (x, false);
383 rtx imag = read_complex_part (x, true);
385 real = flip_storage_order (GET_MODE_INNER (mode), real);
386 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
388 return gen_rtx_CONCAT (mode, real, imag);
391 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
392 check_reverse_storage_order_support ();
394 if (!is_a <scalar_int_mode> (mode, &int_mode))
396 if (FLOAT_MODE_P (mode)
397 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
398 check_reverse_float_storage_order_support ();
400 if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
402 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
403 return x;
405 x = gen_lowpart (int_mode, x);
408 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
409 if (result == 0)
410 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
412 if (int_mode != mode)
413 result = gen_lowpart (mode, result);
415 return result;
418 /* If MODE is set, adjust bitfield memory MEM so that it points to the
419 first unit of mode MODE that contains a bitfield of size BITSIZE at
420 bit position BITNUM. If MODE is not set, return a BLKmode reference
421 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
422 of the field within the new memory. */
424 static rtx
425 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
426 unsigned HOST_WIDE_INT bitsize,
427 unsigned HOST_WIDE_INT bitnum,
428 unsigned HOST_WIDE_INT *new_bitnum)
430 scalar_int_mode imode;
431 if (mode.exists (&imode))
433 unsigned int unit = GET_MODE_BITSIZE (imode);
434 *new_bitnum = bitnum % unit;
435 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
436 return adjust_bitfield_address (mem, imode, offset);
438 else
440 *new_bitnum = bitnum % BITS_PER_UNIT;
441 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
442 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
443 / BITS_PER_UNIT);
444 return adjust_bitfield_address_size (mem, BLKmode, offset, size);
448 /* The caller wants to perform insertion or extraction PATTERN on a
449 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
450 BITREGION_START and BITREGION_END are as for store_bit_field
451 and FIELDMODE is the natural mode of the field.
453 Search for a mode that is compatible with the memory access
454 restrictions and (where applicable) with a register insertion or
455 extraction. Return the new memory on success, storing the adjusted
456 bit position in *NEW_BITNUM. Return null otherwise. */
458 static rtx
459 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
460 rtx op0, HOST_WIDE_INT bitsize,
461 HOST_WIDE_INT bitnum,
462 unsigned HOST_WIDE_INT bitregion_start,
463 unsigned HOST_WIDE_INT bitregion_end,
464 machine_mode fieldmode,
465 unsigned HOST_WIDE_INT *new_bitnum)
467 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
468 bitregion_end, MEM_ALIGN (op0),
469 MEM_VOLATILE_P (op0));
470 scalar_int_mode best_mode;
471 if (iter.next_mode (&best_mode))
473 /* We can use a memory in BEST_MODE. See whether this is true for
474 any wider modes. All other things being equal, we prefer to
475 use the widest mode possible because it tends to expose more
476 CSE opportunities. */
477 if (!iter.prefer_smaller_modes ())
479 /* Limit the search to the mode required by the corresponding
480 register insertion or extraction instruction, if any. */
481 scalar_int_mode limit_mode = word_mode;
482 extraction_insn insn;
483 if (get_best_reg_extraction_insn (&insn, pattern,
484 GET_MODE_BITSIZE (best_mode),
485 fieldmode))
486 limit_mode = insn.field_mode;
488 scalar_int_mode wider_mode;
489 while (iter.next_mode (&wider_mode)
490 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
491 best_mode = wider_mode;
493 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
494 new_bitnum);
496 return NULL_RTX;
499 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
500 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
501 offset is then BITNUM / BITS_PER_UNIT. */
503 static bool
504 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
505 unsigned HOST_WIDE_INT bitsize,
506 machine_mode struct_mode)
508 if (BYTES_BIG_ENDIAN)
509 return (bitnum % BITS_PER_UNIT == 0
510 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
511 || (bitnum + bitsize) % BITS_PER_WORD == 0));
512 else
513 return bitnum % BITS_PER_WORD == 0;
516 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
517 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
518 Return false if the access would touch memory outside the range
519 BITREGION_START to BITREGION_END for conformance to the C++ memory
520 model. */
522 static bool
523 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
524 unsigned HOST_WIDE_INT bitnum,
525 scalar_int_mode fieldmode,
526 unsigned HOST_WIDE_INT bitregion_start,
527 unsigned HOST_WIDE_INT bitregion_end)
529 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
531 /* -fstrict-volatile-bitfields must be enabled and we must have a
532 volatile MEM. */
533 if (!MEM_P (op0)
534 || !MEM_VOLATILE_P (op0)
535 || flag_strict_volatile_bitfields <= 0)
536 return false;
538 /* The bit size must not be larger than the field mode, and
539 the field mode must not be larger than a word. */
540 if (bitsize > modesize || modesize > BITS_PER_WORD)
541 return false;
543 /* Check for cases of unaligned fields that must be split. */
544 if (bitnum % modesize + bitsize > modesize)
545 return false;
547 /* The memory must be sufficiently aligned for a MODESIZE access.
548 This condition guarantees, that the memory access will not
549 touch anything after the end of the structure. */
550 if (MEM_ALIGN (op0) < modesize)
551 return false;
553 /* Check for cases where the C++ memory model applies. */
554 if (bitregion_end != 0
555 && (bitnum - bitnum % modesize < bitregion_start
556 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
557 return false;
559 return true;
562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
563 bit number BITNUM can be treated as a simple value of mode MODE. */
565 static bool
566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
567 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
569 return (MEM_P (op0)
570 && bitnum % BITS_PER_UNIT == 0
571 && bitsize == GET_MODE_BITSIZE (mode)
572 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
573 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
574 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
577 /* Try to use instruction INSV to store VALUE into a field of OP0.
578 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
579 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
580 are as for store_bit_field. */
582 static bool
583 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
584 opt_scalar_int_mode op0_mode,
585 unsigned HOST_WIDE_INT bitsize,
586 unsigned HOST_WIDE_INT bitnum,
587 rtx value, scalar_int_mode value_mode)
589 struct expand_operand ops[4];
590 rtx value1;
591 rtx xop0 = op0;
592 rtx_insn *last = get_last_insn ();
593 bool copy_back = false;
595 scalar_int_mode op_mode = insv->field_mode;
596 unsigned int unit = GET_MODE_BITSIZE (op_mode);
597 if (bitsize == 0 || bitsize > unit)
598 return false;
600 if (MEM_P (xop0))
601 /* Get a reference to the first byte of the field. */
602 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
603 &bitnum);
604 else
606 /* Convert from counting within OP0 to counting in OP_MODE. */
607 if (BYTES_BIG_ENDIAN)
608 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
610 /* If xop0 is a register, we need it in OP_MODE
611 to make it acceptable to the format of insv. */
612 if (GET_CODE (xop0) == SUBREG)
613 /* We can't just change the mode, because this might clobber op0,
614 and we will need the original value of op0 if insv fails. */
615 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
616 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
617 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
620 /* If the destination is a paradoxical subreg such that we need a
621 truncate to the inner mode, perform the insertion on a temporary and
622 truncate the result to the original destination. Note that we can't
623 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
624 X) 0)) is (reg:N X). */
625 if (GET_CODE (xop0) == SUBREG
626 && REG_P (SUBREG_REG (xop0))
627 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
628 op_mode))
630 rtx tem = gen_reg_rtx (op_mode);
631 emit_move_insn (tem, xop0);
632 xop0 = tem;
633 copy_back = true;
636 /* There are similar overflow check at the start of store_bit_field_1,
637 but that only check the situation where the field lies completely
638 outside the register, while there do have situation where the field
639 lies partialy in the register, we need to adjust bitsize for this
640 partial overflow situation. Without this fix, pr48335-2.c on big-endian
641 will broken on those arch support bit insert instruction, like arm, aarch64
642 etc. */
643 if (bitsize + bitnum > unit && bitnum < unit)
645 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
646 "destination object, data truncated into %wu-bit",
647 bitsize, unit - bitnum);
648 bitsize = unit - bitnum;
651 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
652 "backwards" from the size of the unit we are inserting into.
653 Otherwise, we count bits from the most significant on a
654 BYTES/BITS_BIG_ENDIAN machine. */
656 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
657 bitnum = unit - bitsize - bitnum;
659 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
660 value1 = value;
661 if (value_mode != op_mode)
663 if (GET_MODE_BITSIZE (value_mode) >= bitsize)
665 rtx tmp;
666 /* Optimization: Don't bother really extending VALUE
667 if it has all the bits we will actually use. However,
668 if we must narrow it, be sure we do it correctly. */
670 if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
672 tmp = simplify_subreg (op_mode, value1, value_mode, 0);
673 if (! tmp)
674 tmp = simplify_gen_subreg (op_mode,
675 force_reg (value_mode, value1),
676 value_mode, 0);
678 else
680 tmp = gen_lowpart_if_possible (op_mode, value1);
681 if (! tmp)
682 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
684 value1 = tmp;
686 else if (CONST_INT_P (value))
687 value1 = gen_int_mode (INTVAL (value), op_mode);
688 else
689 /* Parse phase is supposed to make VALUE's data type
690 match that of the component reference, which is a type
691 at least as wide as the field; so VALUE should have
692 a mode that corresponds to that type. */
693 gcc_assert (CONSTANT_P (value));
696 create_fixed_operand (&ops[0], xop0);
697 create_integer_operand (&ops[1], bitsize);
698 create_integer_operand (&ops[2], bitnum);
699 create_input_operand (&ops[3], value1, op_mode);
700 if (maybe_expand_insn (insv->icode, 4, ops))
702 if (copy_back)
703 convert_move (op0, xop0, true);
704 return true;
706 delete_insns_since (last);
707 return false;
710 /* A subroutine of store_bit_field, with the same arguments. Return true
711 if the operation could be implemented.
713 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
714 no other way of implementing the operation. If FALLBACK_P is false,
715 return false instead. */
717 static bool
718 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
719 unsigned HOST_WIDE_INT bitnum,
720 unsigned HOST_WIDE_INT bitregion_start,
721 unsigned HOST_WIDE_INT bitregion_end,
722 machine_mode fieldmode,
723 rtx value, bool reverse, bool fallback_p)
725 rtx op0 = str_rtx;
726 rtx orig_value;
728 while (GET_CODE (op0) == SUBREG)
730 /* The following line once was done only if WORDS_BIG_ENDIAN,
731 but I think that is a mistake. WORDS_BIG_ENDIAN is
732 meaningful at a much higher level; when structures are copied
733 between memory and regs, the higher-numbered regs
734 always get higher addresses. */
735 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
736 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
737 int byte_offset = 0;
739 /* Paradoxical subregs need special handling on big-endian machines. */
740 if (paradoxical_subreg_p (op0))
742 int difference = inner_mode_size - outer_mode_size;
744 if (WORDS_BIG_ENDIAN)
745 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
746 if (BYTES_BIG_ENDIAN)
747 byte_offset += difference % UNITS_PER_WORD;
749 else
750 byte_offset = SUBREG_BYTE (op0);
752 bitnum += byte_offset * BITS_PER_UNIT;
753 op0 = SUBREG_REG (op0);
756 /* No action is needed if the target is a register and if the field
757 lies completely outside that register. This can occur if the source
758 code contains an out-of-bounds access to a small array. */
759 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
760 return true;
762 /* Use vec_set patterns for inserting parts of vectors whenever
763 available. */
764 machine_mode outermode = GET_MODE (op0);
765 scalar_mode innermode = GET_MODE_INNER (outermode);
766 if (VECTOR_MODE_P (outermode)
767 && !MEM_P (op0)
768 && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
769 && fieldmode == innermode
770 && bitsize == GET_MODE_BITSIZE (innermode)
771 && !(bitnum % GET_MODE_BITSIZE (innermode)))
773 struct expand_operand ops[3];
774 enum insn_code icode = optab_handler (vec_set_optab, outermode);
775 int pos = bitnum / GET_MODE_BITSIZE (innermode);
777 create_fixed_operand (&ops[0], op0);
778 create_input_operand (&ops[1], value, innermode);
779 create_integer_operand (&ops[2], pos);
780 if (maybe_expand_insn (icode, 3, ops))
781 return true;
784 /* If the target is a register, overwriting the entire object, or storing
785 a full-word or multi-word field can be done with just a SUBREG. */
786 if (!MEM_P (op0)
787 && bitsize == GET_MODE_BITSIZE (fieldmode)
788 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
789 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
791 /* Use the subreg machinery either to narrow OP0 to the required
792 words or to cope with mode punning between equal-sized modes.
793 In the latter case, use subreg on the rhs side, not lhs. */
794 rtx sub;
796 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
798 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
799 if (sub)
801 if (reverse)
802 sub = flip_storage_order (GET_MODE (op0), sub);
803 emit_move_insn (op0, sub);
804 return true;
807 else
809 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
810 bitnum / BITS_PER_UNIT);
811 if (sub)
813 if (reverse)
814 value = flip_storage_order (fieldmode, value);
815 emit_move_insn (sub, value);
816 return true;
821 /* If the target is memory, storing any naturally aligned field can be
822 done with a simple store. For targets that support fast unaligned
823 memory, any naturally sized, unit aligned field can be done directly. */
824 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
826 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
827 if (reverse)
828 value = flip_storage_order (fieldmode, value);
829 emit_move_insn (op0, value);
830 return true;
833 /* Make sure we are playing with integral modes. Pun with subregs
834 if we aren't. This must come after the entire register case above,
835 since that case is valid for any mode. The following cases are only
836 valid for integral modes. */
837 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
838 scalar_int_mode imode;
839 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
841 if (MEM_P (op0))
842 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
843 0, MEM_SIZE (op0));
844 else
845 op0 = gen_lowpart (op0_mode.require (), op0);
848 /* Storing an lsb-aligned field in a register
849 can be done with a movstrict instruction. */
851 if (!MEM_P (op0)
852 && !reverse
853 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
854 && bitsize == GET_MODE_BITSIZE (fieldmode)
855 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
857 struct expand_operand ops[2];
858 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
859 rtx arg0 = op0;
860 unsigned HOST_WIDE_INT subreg_off;
862 if (GET_CODE (arg0) == SUBREG)
864 /* Else we've got some float mode source being extracted into
865 a different float mode destination -- this combination of
866 subregs results in Severe Tire Damage. */
867 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
868 || GET_MODE_CLASS (fieldmode) == MODE_INT
869 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
870 arg0 = SUBREG_REG (arg0);
873 subreg_off = bitnum / BITS_PER_UNIT;
874 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
876 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
878 create_fixed_operand (&ops[0], arg0);
879 /* Shrink the source operand to FIELDMODE. */
880 create_convert_operand_to (&ops[1], value, fieldmode, false);
881 if (maybe_expand_insn (icode, 2, ops))
882 return true;
886 /* Handle fields bigger than a word. */
888 if (bitsize > BITS_PER_WORD)
890 /* Here we transfer the words of the field
891 in the order least significant first.
892 This is because the most significant word is the one which may
893 be less than full.
894 However, only do that if the value is not BLKmode. */
896 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
897 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
898 unsigned int i;
899 rtx_insn *last;
901 /* This is the mode we must force value to, so that there will be enough
902 subwords to extract. Note that fieldmode will often (always?) be
903 VOIDmode, because that is what store_field uses to indicate that this
904 is a bit field, but passing VOIDmode to operand_subword_force
905 is not allowed. */
906 fieldmode = GET_MODE (value);
907 if (fieldmode == VOIDmode)
908 fieldmode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
910 last = get_last_insn ();
911 for (i = 0; i < nwords; i++)
913 /* If I is 0, use the low-order word in both field and target;
914 if I is 1, use the next to lowest word; and so on. */
915 unsigned int wordnum = (backwards
916 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
917 - i - 1
918 : i);
919 unsigned int bit_offset = (backwards ^ reverse
920 ? MAX ((int) bitsize - ((int) i + 1)
921 * BITS_PER_WORD,
923 : (int) i * BITS_PER_WORD);
924 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
925 unsigned HOST_WIDE_INT new_bitsize =
926 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
928 /* If the remaining chunk doesn't have full wordsize we have
929 to make sure that for big-endian machines the higher order
930 bits are used. */
931 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
932 value_word = simplify_expand_binop (word_mode, lshr_optab,
933 value_word,
934 GEN_INT (BITS_PER_WORD
935 - new_bitsize),
936 NULL_RTX, true,
937 OPTAB_LIB_WIDEN);
939 if (!store_bit_field_1 (op0, new_bitsize,
940 bitnum + bit_offset,
941 bitregion_start, bitregion_end,
942 word_mode,
943 value_word, reverse, fallback_p))
945 delete_insns_since (last);
946 return false;
949 return true;
952 /* If VALUE has a floating-point or complex mode, access it as an
953 integer of the corresponding size. This can occur on a machine
954 with 64 bit registers that uses SFmode for float. It can also
955 occur for unaligned float or complex fields. */
956 orig_value = value;
957 scalar_int_mode value_mode;
958 if (GET_MODE (value) == VOIDmode)
959 /* By this point we've dealt with values that are bigger than a word,
960 so word_mode is a conservatively correct choice. */
961 value_mode = word_mode;
962 else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
964 value_mode = int_mode_for_mode (GET_MODE (value)).require ();
965 value = gen_reg_rtx (value_mode);
966 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
969 /* If OP0 is a multi-word register, narrow it to the affected word.
970 If the region spans two words, defer to store_split_bit_field.
971 Don't do this if op0 is a single hard register wider than word
972 such as a float or vector register. */
973 if (!MEM_P (op0)
974 && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
975 && (!REG_P (op0)
976 || !HARD_REGISTER_P (op0)
977 || HARD_REGNO_NREGS (REGNO (op0), op0_mode.require ()) != 1))
979 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
981 if (!fallback_p)
982 return false;
984 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
985 bitregion_start, bitregion_end,
986 value, value_mode, reverse);
987 return true;
989 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
990 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
991 gcc_assert (op0);
992 op0_mode = word_mode;
993 bitnum %= BITS_PER_WORD;
996 /* From here on we can assume that the field to be stored in fits
997 within a word. If the destination is a register, it too fits
998 in a word. */
1000 extraction_insn insv;
1001 if (!MEM_P (op0)
1002 && !reverse
1003 && get_best_reg_extraction_insn (&insv, EP_insv,
1004 GET_MODE_BITSIZE (op0_mode.require ()),
1005 fieldmode)
1006 && store_bit_field_using_insv (&insv, op0, op0_mode,
1007 bitsize, bitnum, value, value_mode))
1008 return true;
1010 /* If OP0 is a memory, try copying it to a register and seeing if a
1011 cheap register alternative is available. */
1012 if (MEM_P (op0) && !reverse)
1014 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1015 fieldmode)
1016 && store_bit_field_using_insv (&insv, op0, op0_mode,
1017 bitsize, bitnum, value, value_mode))
1018 return true;
1020 rtx_insn *last = get_last_insn ();
1022 /* Try loading part of OP0 into a register, inserting the bitfield
1023 into that, and then copying the result back to OP0. */
1024 unsigned HOST_WIDE_INT bitpos;
1025 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1026 bitregion_start, bitregion_end,
1027 fieldmode, &bitpos);
1028 if (xop0)
1030 rtx tempreg = copy_to_reg (xop0);
1031 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1032 bitregion_start, bitregion_end,
1033 fieldmode, orig_value, reverse, false))
1035 emit_move_insn (xop0, tempreg);
1036 return true;
1038 delete_insns_since (last);
1042 if (!fallback_p)
1043 return false;
1045 store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1046 bitregion_end, value, value_mode, reverse);
1047 return true;
1050 /* Generate code to store value from rtx VALUE
1051 into a bit-field within structure STR_RTX
1052 containing BITSIZE bits starting at bit BITNUM.
1054 BITREGION_START is bitpos of the first bitfield in this region.
1055 BITREGION_END is the bitpos of the ending bitfield in this region.
1056 These two fields are 0, if the C++ memory model does not apply,
1057 or we are not interested in keeping track of bitfield regions.
1059 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1061 If REVERSE is true, the store is to be done in reverse order. */
1063 void
1064 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1065 unsigned HOST_WIDE_INT bitnum,
1066 unsigned HOST_WIDE_INT bitregion_start,
1067 unsigned HOST_WIDE_INT bitregion_end,
1068 machine_mode fieldmode,
1069 rtx value, bool reverse)
1071 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1072 scalar_int_mode int_mode;
1073 if (is_a <scalar_int_mode> (fieldmode, &int_mode)
1074 && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode,
1075 bitregion_start, bitregion_end))
1077 /* Storing of a full word can be done with a simple store.
1078 We know here that the field can be accessed with one single
1079 instruction. For targets that support unaligned memory,
1080 an unaligned access may be necessary. */
1081 if (bitsize == GET_MODE_BITSIZE (int_mode))
1083 str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1084 bitnum / BITS_PER_UNIT);
1085 if (reverse)
1086 value = flip_storage_order (int_mode, value);
1087 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1088 emit_move_insn (str_rtx, value);
1090 else
1092 rtx temp;
1094 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1095 &bitnum);
1096 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1097 temp = copy_to_reg (str_rtx);
1098 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1099 int_mode, value, reverse, true))
1100 gcc_unreachable ();
1102 emit_move_insn (str_rtx, temp);
1105 return;
1108 /* Under the C++0x memory model, we must not touch bits outside the
1109 bit region. Adjust the address to start at the beginning of the
1110 bit region. */
1111 if (MEM_P (str_rtx) && bitregion_start > 0)
1113 scalar_int_mode best_mode;
1114 machine_mode addr_mode = VOIDmode;
1115 HOST_WIDE_INT offset, size;
1117 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1119 offset = bitregion_start / BITS_PER_UNIT;
1120 bitnum -= bitregion_start;
1121 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1122 bitregion_end -= bitregion_start;
1123 bitregion_start = 0;
1124 if (get_best_mode (bitsize, bitnum,
1125 bitregion_start, bitregion_end,
1126 MEM_ALIGN (str_rtx), INT_MAX,
1127 MEM_VOLATILE_P (str_rtx), &best_mode))
1128 addr_mode = best_mode;
1129 str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1130 offset, size);
1133 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1134 bitregion_start, bitregion_end,
1135 fieldmode, value, reverse, true))
1136 gcc_unreachable ();
1139 /* Use shifts and boolean operations to store VALUE into a bit field of
1140 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1141 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1142 the mode of VALUE.
1144 If REVERSE is true, the store is to be done in reverse order. */
1146 static void
1147 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1148 unsigned HOST_WIDE_INT bitsize,
1149 unsigned HOST_WIDE_INT bitnum,
1150 unsigned HOST_WIDE_INT bitregion_start,
1151 unsigned HOST_WIDE_INT bitregion_end,
1152 rtx value, scalar_int_mode value_mode, bool reverse)
1154 /* There is a case not handled here:
1155 a structure with a known alignment of just a halfword
1156 and a field split across two aligned halfwords within the structure.
1157 Or likewise a structure with a known alignment of just a byte
1158 and a field split across two bytes.
1159 Such cases are not supposed to be able to occur. */
1161 scalar_int_mode best_mode;
1162 if (MEM_P (op0))
1164 unsigned int max_bitsize = BITS_PER_WORD;
1165 scalar_int_mode imode;
1166 if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1167 max_bitsize = GET_MODE_BITSIZE (imode);
1169 if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1170 MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1171 &best_mode))
1173 /* The only way this should occur is if the field spans word
1174 boundaries. */
1175 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1176 bitregion_start, bitregion_end,
1177 value, value_mode, reverse);
1178 return;
1181 op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1183 else
1184 best_mode = op0_mode.require ();
1186 store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1187 value, value_mode, reverse);
1190 /* Helper function for store_fixed_bit_field, stores
1191 the bit field always using MODE, which is the mode of OP0. The other
1192 arguments are as for store_fixed_bit_field. */
1194 static void
1195 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1196 unsigned HOST_WIDE_INT bitsize,
1197 unsigned HOST_WIDE_INT bitnum,
1198 rtx value, scalar_int_mode value_mode, bool reverse)
1200 rtx temp;
1201 int all_zero = 0;
1202 int all_one = 0;
1204 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1205 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1207 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1208 /* BITNUM is the distance between our msb
1209 and that of the containing datum.
1210 Convert it to the distance from the lsb. */
1211 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1213 /* Now BITNUM is always the distance between our lsb
1214 and that of OP0. */
1216 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1217 we must first convert its mode to MODE. */
1219 if (CONST_INT_P (value))
1221 unsigned HOST_WIDE_INT v = UINTVAL (value);
1223 if (bitsize < HOST_BITS_PER_WIDE_INT)
1224 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1226 if (v == 0)
1227 all_zero = 1;
1228 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1229 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1230 || (bitsize == HOST_BITS_PER_WIDE_INT
1231 && v == HOST_WIDE_INT_M1U))
1232 all_one = 1;
1234 value = lshift_value (mode, v, bitnum);
1236 else
1238 int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1239 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1241 if (value_mode != mode)
1242 value = convert_to_mode (mode, value, 1);
1244 if (must_and)
1245 value = expand_binop (mode, and_optab, value,
1246 mask_rtx (mode, 0, bitsize, 0),
1247 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1248 if (bitnum > 0)
1249 value = expand_shift (LSHIFT_EXPR, mode, value,
1250 bitnum, NULL_RTX, 1);
1253 if (reverse)
1254 value = flip_storage_order (mode, value);
1256 /* Now clear the chosen bits in OP0,
1257 except that if VALUE is -1 we need not bother. */
1258 /* We keep the intermediates in registers to allow CSE to combine
1259 consecutive bitfield assignments. */
1261 temp = force_reg (mode, op0);
1263 if (! all_one)
1265 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1266 if (reverse)
1267 mask = flip_storage_order (mode, mask);
1268 temp = expand_binop (mode, and_optab, temp, mask,
1269 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1270 temp = force_reg (mode, temp);
1273 /* Now logical-or VALUE into OP0, unless it is zero. */
1275 if (! all_zero)
1277 temp = expand_binop (mode, ior_optab, temp, value,
1278 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1279 temp = force_reg (mode, temp);
1282 if (op0 != temp)
1284 op0 = copy_rtx (op0);
1285 emit_move_insn (op0, temp);
1289 /* Store a bit field that is split across multiple accessible memory objects.
1291 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1292 BITSIZE is the field width; BITPOS the position of its first bit
1293 (within the word).
1294 VALUE is the value to store, which has mode VALUE_MODE.
1295 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1296 a BLKmode MEM.
1298 If REVERSE is true, the store is to be done in reverse order.
1300 This does not yet handle fields wider than BITS_PER_WORD. */
1302 static void
1303 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1304 unsigned HOST_WIDE_INT bitsize,
1305 unsigned HOST_WIDE_INT bitpos,
1306 unsigned HOST_WIDE_INT bitregion_start,
1307 unsigned HOST_WIDE_INT bitregion_end,
1308 rtx value, scalar_int_mode value_mode, bool reverse)
1310 unsigned int unit, total_bits, bitsdone = 0;
1312 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1313 much at a time. */
1314 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1315 unit = BITS_PER_WORD;
1316 else
1317 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1319 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1320 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1321 again, and we will mutually recurse forever. */
1322 if (MEM_P (op0) && op0_mode.exists ())
1323 unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1325 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1326 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1327 that VALUE might be a floating-point constant. */
1328 if (CONSTANT_P (value) && !CONST_INT_P (value))
1330 rtx word = gen_lowpart_common (word_mode, value);
1332 if (word && (value != word))
1333 value = word;
1334 else
1335 value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1336 value_mode = word_mode;
1339 total_bits = GET_MODE_BITSIZE (value_mode);
1341 while (bitsdone < bitsize)
1343 unsigned HOST_WIDE_INT thissize;
1344 unsigned HOST_WIDE_INT thispos;
1345 unsigned HOST_WIDE_INT offset;
1346 rtx part;
1348 offset = (bitpos + bitsdone) / unit;
1349 thispos = (bitpos + bitsdone) % unit;
1351 /* When region of bytes we can touch is restricted, decrease
1352 UNIT close to the end of the region as needed. If op0 is a REG
1353 or SUBREG of REG, don't do this, as there can't be data races
1354 on a register and we can expand shorter code in some cases. */
1355 if (bitregion_end
1356 && unit > BITS_PER_UNIT
1357 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1358 && !REG_P (op0)
1359 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1361 unit = unit / 2;
1362 continue;
1365 /* THISSIZE must not overrun a word boundary. Otherwise,
1366 store_fixed_bit_field will call us again, and we will mutually
1367 recurse forever. */
1368 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1369 thissize = MIN (thissize, unit - thispos);
1371 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1373 /* Fetch successively less significant portions. */
1374 if (CONST_INT_P (value))
1375 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1376 >> (bitsize - bitsdone - thissize))
1377 & ((HOST_WIDE_INT_1 << thissize) - 1));
1378 /* Likewise, but the source is little-endian. */
1379 else if (reverse)
1380 part = extract_fixed_bit_field (word_mode, value, value_mode,
1381 thissize,
1382 bitsize - bitsdone - thissize,
1383 NULL_RTX, 1, false);
1384 else
1385 /* The args are chosen so that the last part includes the
1386 lsb. Give extract_bit_field the value it needs (with
1387 endianness compensation) to fetch the piece we want. */
1388 part = extract_fixed_bit_field (word_mode, value, value_mode,
1389 thissize,
1390 total_bits - bitsize + bitsdone,
1391 NULL_RTX, 1, false);
1393 else
1395 /* Fetch successively more significant portions. */
1396 if (CONST_INT_P (value))
1397 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1398 >> bitsdone)
1399 & ((HOST_WIDE_INT_1 << thissize) - 1));
1400 /* Likewise, but the source is big-endian. */
1401 else if (reverse)
1402 part = extract_fixed_bit_field (word_mode, value, value_mode,
1403 thissize,
1404 total_bits - bitsdone - thissize,
1405 NULL_RTX, 1, false);
1406 else
1407 part = extract_fixed_bit_field (word_mode, value, value_mode,
1408 thissize, bitsdone, NULL_RTX,
1409 1, false);
1412 /* If OP0 is a register, then handle OFFSET here. */
1413 rtx op0_piece = op0;
1414 opt_scalar_int_mode op0_piece_mode = op0_mode;
1415 if (SUBREG_P (op0) || REG_P (op0))
1417 scalar_int_mode imode;
1418 if (op0_mode.exists (&imode)
1419 && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1421 if (offset)
1422 op0_piece = const0_rtx;
1424 else
1426 op0_piece = operand_subword_force (op0,
1427 offset * unit / BITS_PER_WORD,
1428 GET_MODE (op0));
1429 op0_piece_mode = word_mode;
1431 offset &= BITS_PER_WORD / unit - 1;
1434 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1435 it is just an out-of-bounds access. Ignore it. */
1436 if (op0_piece != const0_rtx)
1437 store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1438 offset * unit + thispos, bitregion_start,
1439 bitregion_end, part, word_mode, reverse);
1440 bitsdone += thissize;
1444 /* A subroutine of extract_bit_field_1 that converts return value X
1445 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1446 to extract_bit_field. */
1448 static rtx
1449 convert_extracted_bit_field (rtx x, machine_mode mode,
1450 machine_mode tmode, bool unsignedp)
1452 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1453 return x;
1455 /* If the x mode is not a scalar integral, first convert to the
1456 integer mode of that size and then access it as a floating-point
1457 value via a SUBREG. */
1458 if (!SCALAR_INT_MODE_P (tmode))
1460 scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1461 x = convert_to_mode (int_mode, x, unsignedp);
1462 x = force_reg (int_mode, x);
1463 return gen_lowpart (tmode, x);
1466 return convert_to_mode (tmode, x, unsignedp);
1469 /* Try to use an ext(z)v pattern to extract a field from OP0.
1470 Return the extracted value on success, otherwise return null.
1471 EXTV describes the extraction instruction to use. If OP0_MODE
1472 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1473 The other arguments are as for extract_bit_field. */
1475 static rtx
1476 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1477 opt_scalar_int_mode op0_mode,
1478 unsigned HOST_WIDE_INT bitsize,
1479 unsigned HOST_WIDE_INT bitnum,
1480 int unsignedp, rtx target,
1481 machine_mode mode, machine_mode tmode)
1483 struct expand_operand ops[4];
1484 rtx spec_target = target;
1485 rtx spec_target_subreg = 0;
1486 scalar_int_mode ext_mode = extv->field_mode;
1487 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1489 if (bitsize == 0 || unit < bitsize)
1490 return NULL_RTX;
1492 if (MEM_P (op0))
1493 /* Get a reference to the first byte of the field. */
1494 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1495 &bitnum);
1496 else
1498 /* Convert from counting within OP0 to counting in EXT_MODE. */
1499 if (BYTES_BIG_ENDIAN)
1500 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1502 /* If op0 is a register, we need it in EXT_MODE to make it
1503 acceptable to the format of ext(z)v. */
1504 if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1505 return NULL_RTX;
1506 if (REG_P (op0) && op0_mode.require () != ext_mode)
1507 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1510 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1511 "backwards" from the size of the unit we are extracting from.
1512 Otherwise, we count bits from the most significant on a
1513 BYTES/BITS_BIG_ENDIAN machine. */
1515 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1516 bitnum = unit - bitsize - bitnum;
1518 if (target == 0)
1519 target = spec_target = gen_reg_rtx (tmode);
1521 if (GET_MODE (target) != ext_mode)
1523 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1524 between the mode of the extraction (word_mode) and the target
1525 mode. Instead, create a temporary and use convert_move to set
1526 the target. */
1527 if (REG_P (target)
1528 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1530 target = gen_lowpart (ext_mode, target);
1531 if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1532 spec_target_subreg = target;
1534 else
1535 target = gen_reg_rtx (ext_mode);
1538 create_output_operand (&ops[0], target, ext_mode);
1539 create_fixed_operand (&ops[1], op0);
1540 create_integer_operand (&ops[2], bitsize);
1541 create_integer_operand (&ops[3], bitnum);
1542 if (maybe_expand_insn (extv->icode, 4, ops))
1544 target = ops[0].value;
1545 if (target == spec_target)
1546 return target;
1547 if (target == spec_target_subreg)
1548 return spec_target;
1549 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1551 return NULL_RTX;
1554 /* A subroutine of extract_bit_field, with the same arguments.
1555 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1556 if we can find no other means of implementing the operation.
1557 if FALLBACK_P is false, return NULL instead. */
1559 static rtx
1560 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1561 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1562 machine_mode mode, machine_mode tmode,
1563 bool reverse, bool fallback_p, rtx *alt_rtl)
1565 rtx op0 = str_rtx;
1566 machine_mode mode1;
1568 if (tmode == VOIDmode)
1569 tmode = mode;
1571 while (GET_CODE (op0) == SUBREG)
1573 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1574 op0 = SUBREG_REG (op0);
1577 /* If we have an out-of-bounds access to a register, just return an
1578 uninitialized register of the required mode. This can occur if the
1579 source code contains an out-of-bounds access to a small array. */
1580 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1581 return gen_reg_rtx (tmode);
1583 if (REG_P (op0)
1584 && mode == GET_MODE (op0)
1585 && bitnum == 0
1586 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1588 if (reverse)
1589 op0 = flip_storage_order (mode, op0);
1590 /* We're trying to extract a full register from itself. */
1591 return op0;
1594 /* First try to check for vector from vector extractions. */
1595 if (VECTOR_MODE_P (GET_MODE (op0))
1596 && !MEM_P (op0)
1597 && VECTOR_MODE_P (tmode)
1598 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1600 machine_mode new_mode = GET_MODE (op0);
1601 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1603 new_mode = mode_for_vector (GET_MODE_INNER (tmode),
1604 GET_MODE_BITSIZE (GET_MODE (op0))
1605 / GET_MODE_UNIT_BITSIZE (tmode));
1606 if (!VECTOR_MODE_P (new_mode)
1607 || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1608 || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1609 || !targetm.vector_mode_supported_p (new_mode))
1610 new_mode = VOIDmode;
1612 if (new_mode != VOIDmode
1613 && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1614 != CODE_FOR_nothing)
1615 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1616 == bitnum / GET_MODE_BITSIZE (tmode)))
1618 struct expand_operand ops[3];
1619 machine_mode outermode = new_mode;
1620 machine_mode innermode = tmode;
1621 enum insn_code icode
1622 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1623 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1625 if (new_mode != GET_MODE (op0))
1626 op0 = gen_lowpart (new_mode, op0);
1627 create_output_operand (&ops[0], target, innermode);
1628 ops[0].target = 1;
1629 create_input_operand (&ops[1], op0, outermode);
1630 create_integer_operand (&ops[2], pos);
1631 if (maybe_expand_insn (icode, 3, ops))
1633 if (alt_rtl && ops[0].target)
1634 *alt_rtl = target;
1635 target = ops[0].value;
1636 if (GET_MODE (target) != mode)
1637 return gen_lowpart (tmode, target);
1638 return target;
1643 /* See if we can get a better vector mode before extracting. */
1644 if (VECTOR_MODE_P (GET_MODE (op0))
1645 && !MEM_P (op0)
1646 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1648 machine_mode new_mode;
1650 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1651 new_mode = MIN_MODE_VECTOR_FLOAT;
1652 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1653 new_mode = MIN_MODE_VECTOR_FRACT;
1654 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1655 new_mode = MIN_MODE_VECTOR_UFRACT;
1656 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1657 new_mode = MIN_MODE_VECTOR_ACCUM;
1658 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1659 new_mode = MIN_MODE_VECTOR_UACCUM;
1660 else
1661 new_mode = MIN_MODE_VECTOR_INT;
1663 FOR_EACH_MODE_FROM (new_mode, new_mode)
1664 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1665 && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1666 && targetm.vector_mode_supported_p (new_mode))
1667 break;
1668 if (new_mode != VOIDmode)
1669 op0 = gen_lowpart (new_mode, op0);
1672 /* Use vec_extract patterns for extracting parts of vectors whenever
1673 available. */
1674 machine_mode outermode = GET_MODE (op0);
1675 scalar_mode innermode = GET_MODE_INNER (outermode);
1676 if (VECTOR_MODE_P (outermode)
1677 && !MEM_P (op0)
1678 && (convert_optab_handler (vec_extract_optab, outermode, innermode)
1679 != CODE_FOR_nothing)
1680 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (innermode)
1681 == bitnum / GET_MODE_BITSIZE (innermode)))
1683 struct expand_operand ops[3];
1684 enum insn_code icode
1685 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1686 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1688 create_output_operand (&ops[0], target, innermode);
1689 ops[0].target = 1;
1690 create_input_operand (&ops[1], op0, outermode);
1691 create_integer_operand (&ops[2], pos);
1692 if (maybe_expand_insn (icode, 3, ops))
1694 if (alt_rtl && ops[0].target)
1695 *alt_rtl = target;
1696 target = ops[0].value;
1697 if (GET_MODE (target) != mode)
1698 return gen_lowpart (tmode, target);
1699 return target;
1703 /* Make sure we are playing with integral modes. Pun with subregs
1704 if we aren't. */
1705 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1706 scalar_int_mode imode;
1707 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1709 if (MEM_P (op0))
1710 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1711 0, MEM_SIZE (op0));
1712 else if (op0_mode.exists (&imode))
1714 op0 = gen_lowpart (imode, op0);
1716 /* If we got a SUBREG, force it into a register since we
1717 aren't going to be able to do another SUBREG on it. */
1718 if (GET_CODE (op0) == SUBREG)
1719 op0 = force_reg (imode, op0);
1721 else
1723 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1724 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1725 emit_move_insn (mem, op0);
1726 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1730 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1731 If that's wrong, the solution is to test for it and set TARGET to 0
1732 if needed. */
1734 /* Get the mode of the field to use for atomic access or subreg
1735 conversion. */
1736 mode1 = mode;
1737 if (SCALAR_INT_MODE_P (tmode))
1739 machine_mode try_mode = mode_for_size (bitsize,
1740 GET_MODE_CLASS (tmode), 0);
1741 if (try_mode != BLKmode)
1742 mode1 = try_mode;
1744 gcc_assert (mode1 != BLKmode);
1746 /* Extraction of a full MODE1 value can be done with a subreg as long
1747 as the least significant bit of the value is the least significant
1748 bit of either OP0 or a word of OP0. */
1749 if (!MEM_P (op0)
1750 && !reverse
1751 && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
1752 && bitsize == GET_MODE_BITSIZE (mode1)
1753 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, op0_mode.require ()))
1755 rtx sub = simplify_gen_subreg (mode1, op0, op0_mode.require (),
1756 bitnum / BITS_PER_UNIT);
1757 if (sub)
1758 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1761 /* Extraction of a full MODE1 value can be done with a load as long as
1762 the field is on a byte boundary and is sufficiently aligned. */
1763 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1765 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1766 if (reverse)
1767 op0 = flip_storage_order (mode1, op0);
1768 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1771 /* Handle fields bigger than a word. */
1773 if (bitsize > BITS_PER_WORD)
1775 /* Here we transfer the words of the field
1776 in the order least significant first.
1777 This is because the most significant word is the one which may
1778 be less than full. */
1780 const bool backwards = WORDS_BIG_ENDIAN;
1781 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1782 unsigned int i;
1783 rtx_insn *last;
1785 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1786 target = gen_reg_rtx (mode);
1788 /* In case we're about to clobber a base register or something
1789 (see gcc.c-torture/execute/20040625-1.c). */
1790 if (reg_mentioned_p (target, str_rtx))
1791 target = gen_reg_rtx (mode);
1793 /* Indicate for flow that the entire target reg is being set. */
1794 emit_clobber (target);
1796 last = get_last_insn ();
1797 for (i = 0; i < nwords; i++)
1799 /* If I is 0, use the low-order word in both field and target;
1800 if I is 1, use the next to lowest word; and so on. */
1801 /* Word number in TARGET to use. */
1802 unsigned int wordnum
1803 = (backwards
1804 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1805 : i);
1806 /* Offset from start of field in OP0. */
1807 unsigned int bit_offset = (backwards ^ reverse
1808 ? MAX ((int) bitsize - ((int) i + 1)
1809 * BITS_PER_WORD,
1811 : (int) i * BITS_PER_WORD);
1812 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1813 rtx result_part
1814 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1815 bitsize - i * BITS_PER_WORD),
1816 bitnum + bit_offset, 1, target_part,
1817 mode, word_mode, reverse, fallback_p, NULL);
1819 gcc_assert (target_part);
1820 if (!result_part)
1822 delete_insns_since (last);
1823 return NULL;
1826 if (result_part != target_part)
1827 emit_move_insn (target_part, result_part);
1830 if (unsignedp)
1832 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1833 need to be zero'd out. */
1834 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1836 unsigned int i, total_words;
1838 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1839 for (i = nwords; i < total_words; i++)
1840 emit_move_insn
1841 (operand_subword (target,
1842 backwards ? total_words - i - 1 : i,
1843 1, VOIDmode),
1844 const0_rtx);
1846 return target;
1849 /* Signed bit field: sign-extend with two arithmetic shifts. */
1850 target = expand_shift (LSHIFT_EXPR, mode, target,
1851 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1852 return expand_shift (RSHIFT_EXPR, mode, target,
1853 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1856 /* If OP0 is a multi-word register, narrow it to the affected word.
1857 If the region spans two words, defer to extract_split_bit_field. */
1858 if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1860 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1862 if (!fallback_p)
1863 return NULL_RTX;
1864 target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1865 unsignedp, reverse);
1866 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1868 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1869 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1870 op0_mode = word_mode;
1871 bitnum %= BITS_PER_WORD;
1874 /* From here on we know the desired field is smaller than a word.
1875 If OP0 is a register, it too fits within a word. */
1876 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1877 extraction_insn extv;
1878 if (!MEM_P (op0)
1879 && !reverse
1880 /* ??? We could limit the structure size to the part of OP0 that
1881 contains the field, with appropriate checks for endianness
1882 and TRULY_NOOP_TRUNCATION. */
1883 && get_best_reg_extraction_insn (&extv, pattern,
1884 GET_MODE_BITSIZE (op0_mode.require ()),
1885 tmode))
1887 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1888 bitsize, bitnum,
1889 unsignedp, target, mode,
1890 tmode);
1891 if (result)
1892 return result;
1895 /* If OP0 is a memory, try copying it to a register and seeing if a
1896 cheap register alternative is available. */
1897 if (MEM_P (op0) & !reverse)
1899 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1900 tmode))
1902 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1903 bitsize, bitnum,
1904 unsignedp, target, mode,
1905 tmode);
1906 if (result)
1907 return result;
1910 rtx_insn *last = get_last_insn ();
1912 /* Try loading part of OP0 into a register and extracting the
1913 bitfield from that. */
1914 unsigned HOST_WIDE_INT bitpos;
1915 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1916 0, 0, tmode, &bitpos);
1917 if (xop0)
1919 xop0 = copy_to_reg (xop0);
1920 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1921 unsignedp, target,
1922 mode, tmode, reverse, false, NULL);
1923 if (result)
1924 return result;
1925 delete_insns_since (last);
1929 if (!fallback_p)
1930 return NULL;
1932 /* Find a correspondingly-sized integer field, so we can apply
1933 shifts and masks to it. */
1934 scalar_int_mode int_mode;
1935 if (!int_mode_for_mode (tmode).exists (&int_mode))
1936 /* If this fails, we should probably push op0 out to memory and then
1937 do a load. */
1938 int_mode = int_mode_for_mode (mode).require ();
1940 target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
1941 bitnum, target, unsignedp, reverse);
1943 /* Complex values must be reversed piecewise, so we need to undo the global
1944 reversal, convert to the complex mode and reverse again. */
1945 if (reverse && COMPLEX_MODE_P (tmode))
1947 target = flip_storage_order (int_mode, target);
1948 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1949 target = flip_storage_order (tmode, target);
1951 else
1952 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1954 return target;
1957 /* Generate code to extract a byte-field from STR_RTX
1958 containing BITSIZE bits, starting at BITNUM,
1959 and put it in TARGET if possible (if TARGET is nonzero).
1960 Regardless of TARGET, we return the rtx for where the value is placed.
1962 STR_RTX is the structure containing the byte (a REG or MEM).
1963 UNSIGNEDP is nonzero if this is an unsigned bit field.
1964 MODE is the natural mode of the field value once extracted.
1965 TMODE is the mode the caller would like the value to have;
1966 but the value may be returned with type MODE instead.
1968 If REVERSE is true, the extraction is to be done in reverse order.
1970 If a TARGET is specified and we can store in it at no extra cost,
1971 we do so, and return TARGET.
1972 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1973 if they are equally easy. */
1976 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1977 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1978 machine_mode mode, machine_mode tmode, bool reverse,
1979 rtx *alt_rtl)
1981 machine_mode mode1;
1983 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1984 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1985 mode1 = GET_MODE (str_rtx);
1986 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1987 mode1 = GET_MODE (target);
1988 else
1989 mode1 = tmode;
1991 scalar_int_mode int_mode;
1992 if (is_a <scalar_int_mode> (mode1, &int_mode)
1993 && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode, 0, 0))
1995 /* Extraction of a full INT_MODE value can be done with a simple load.
1996 We know here that the field can be accessed with one single
1997 instruction. For targets that support unaligned memory,
1998 an unaligned access may be necessary. */
1999 if (bitsize == GET_MODE_BITSIZE (int_mode))
2001 rtx result = adjust_bitfield_address (str_rtx, int_mode,
2002 bitnum / BITS_PER_UNIT);
2003 if (reverse)
2004 result = flip_storage_order (int_mode, result);
2005 gcc_assert (bitnum % BITS_PER_UNIT == 0);
2006 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2009 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
2010 &bitnum);
2011 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
2012 str_rtx = copy_to_reg (str_rtx);
2015 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2016 target, mode, tmode, reverse, true, alt_rtl);
2019 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2020 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
2021 otherwise OP0 is a BLKmode MEM.
2023 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2024 If REVERSE is true, the extraction is to be done in reverse order.
2026 If TARGET is nonzero, attempts to store the value there
2027 and return TARGET, but this is not guaranteed.
2028 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2030 static rtx
2031 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2032 opt_scalar_int_mode op0_mode,
2033 unsigned HOST_WIDE_INT bitsize,
2034 unsigned HOST_WIDE_INT bitnum, rtx target,
2035 int unsignedp, bool reverse)
2037 scalar_int_mode mode;
2038 if (MEM_P (op0))
2040 if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2041 BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2042 /* The only way this should occur is if the field spans word
2043 boundaries. */
2044 return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2045 unsignedp, reverse);
2047 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2049 else
2050 mode = op0_mode.require ();
2052 return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2053 target, unsignedp, reverse);
2056 /* Helper function for extract_fixed_bit_field, extracts
2057 the bit field always using MODE, which is the mode of OP0.
2058 The other arguments are as for extract_fixed_bit_field. */
2060 static rtx
2061 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2062 unsigned HOST_WIDE_INT bitsize,
2063 unsigned HOST_WIDE_INT bitnum, rtx target,
2064 int unsignedp, bool reverse)
2066 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2067 for invalid input, such as extract equivalent of f5 from
2068 gcc.dg/pr48335-2.c. */
2070 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2071 /* BITNUM is the distance between our msb and that of OP0.
2072 Convert it to the distance from the lsb. */
2073 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2075 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2076 We have reduced the big-endian case to the little-endian case. */
2077 if (reverse)
2078 op0 = flip_storage_order (mode, op0);
2080 if (unsignedp)
2082 if (bitnum)
2084 /* If the field does not already start at the lsb,
2085 shift it so it does. */
2086 /* Maybe propagate the target for the shift. */
2087 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2088 if (tmode != mode)
2089 subtarget = 0;
2090 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2092 /* Convert the value to the desired mode. TMODE must also be a
2093 scalar integer for this conversion to make sense, since we
2094 shouldn't reinterpret the bits. */
2095 scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2096 if (mode != new_mode)
2097 op0 = convert_to_mode (new_mode, op0, 1);
2099 /* Unless the msb of the field used to be the msb when we shifted,
2100 mask out the upper bits. */
2102 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2103 return expand_binop (new_mode, and_optab, op0,
2104 mask_rtx (new_mode, 0, bitsize, 0),
2105 target, 1, OPTAB_LIB_WIDEN);
2106 return op0;
2109 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2110 then arithmetic-shift its lsb to the lsb of the word. */
2111 op0 = force_reg (mode, op0);
2113 /* Find the narrowest integer mode that contains the field. */
2115 opt_scalar_int_mode mode_iter;
2116 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2117 if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2118 break;
2120 mode = mode_iter.require ();
2121 op0 = convert_to_mode (mode, op0, 0);
2123 if (mode != tmode)
2124 target = 0;
2126 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2128 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2129 /* Maybe propagate the target for the shift. */
2130 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2131 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2134 return expand_shift (RSHIFT_EXPR, mode, op0,
2135 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2138 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2139 VALUE << BITPOS. */
2141 static rtx
2142 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2143 int bitpos)
2145 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2148 /* Extract a bit field that is split across two words
2149 and return an RTX for the result.
2151 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2152 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2153 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2154 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2155 a BLKmode MEM.
2157 If REVERSE is true, the extraction is to be done in reverse order. */
2159 static rtx
2160 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2161 unsigned HOST_WIDE_INT bitsize,
2162 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2163 bool reverse)
2165 unsigned int unit;
2166 unsigned int bitsdone = 0;
2167 rtx result = NULL_RTX;
2168 int first = 1;
2170 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2171 much at a time. */
2172 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2173 unit = BITS_PER_WORD;
2174 else
2175 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2177 while (bitsdone < bitsize)
2179 unsigned HOST_WIDE_INT thissize;
2180 rtx part;
2181 unsigned HOST_WIDE_INT thispos;
2182 unsigned HOST_WIDE_INT offset;
2184 offset = (bitpos + bitsdone) / unit;
2185 thispos = (bitpos + bitsdone) % unit;
2187 /* THISSIZE must not overrun a word boundary. Otherwise,
2188 extract_fixed_bit_field will call us again, and we will mutually
2189 recurse forever. */
2190 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2191 thissize = MIN (thissize, unit - thispos);
2193 /* If OP0 is a register, then handle OFFSET here. */
2194 rtx op0_piece = op0;
2195 opt_scalar_int_mode op0_piece_mode = op0_mode;
2196 if (SUBREG_P (op0) || REG_P (op0))
2198 op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2199 op0_piece_mode = word_mode;
2200 offset = 0;
2203 /* Extract the parts in bit-counting order,
2204 whose meaning is determined by BYTES_PER_UNIT.
2205 OFFSET is in UNITs, and UNIT is in bits. */
2206 part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2207 thissize, offset * unit + thispos,
2208 0, 1, reverse);
2209 bitsdone += thissize;
2211 /* Shift this part into place for the result. */
2212 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2214 if (bitsize != bitsdone)
2215 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2216 bitsize - bitsdone, 0, 1);
2218 else
2220 if (bitsdone != thissize)
2221 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2222 bitsdone - thissize, 0, 1);
2225 if (first)
2226 result = part;
2227 else
2228 /* Combine the parts with bitwise or. This works
2229 because we extracted each part as an unsigned bit field. */
2230 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2231 OPTAB_LIB_WIDEN);
2233 first = 0;
2236 /* Unsigned bit field: we are done. */
2237 if (unsignedp)
2238 return result;
2239 /* Signed bit field: sign-extend with two arithmetic shifts. */
2240 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2241 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2242 return expand_shift (RSHIFT_EXPR, word_mode, result,
2243 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2246 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2247 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2248 MODE, fill the upper bits with zeros. Fail if the layout of either
2249 mode is unknown (as for CC modes) or if the extraction would involve
2250 unprofitable mode punning. Return the value on success, otherwise
2251 return null.
2253 This is different from gen_lowpart* in these respects:
2255 - the returned value must always be considered an rvalue
2257 - when MODE is wider than SRC_MODE, the extraction involves
2258 a zero extension
2260 - when MODE is smaller than SRC_MODE, the extraction involves
2261 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2263 In other words, this routine performs a computation, whereas the
2264 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2265 operations. */
2268 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2270 scalar_int_mode int_mode, src_int_mode;
2272 if (mode == src_mode)
2273 return src;
2275 if (CONSTANT_P (src))
2277 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2278 fails, it will happily create (subreg (symbol_ref)) or similar
2279 invalid SUBREGs. */
2280 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2281 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2282 if (ret)
2283 return ret;
2285 if (GET_MODE (src) == VOIDmode
2286 || !validate_subreg (mode, src_mode, src, byte))
2287 return NULL_RTX;
2289 src = force_reg (GET_MODE (src), src);
2290 return gen_rtx_SUBREG (mode, src, byte);
2293 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2294 return NULL_RTX;
2296 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2297 && MODES_TIEABLE_P (mode, src_mode))
2299 rtx x = gen_lowpart_common (mode, src);
2300 if (x)
2301 return x;
2304 if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2305 || !int_mode_for_mode (mode).exists (&int_mode))
2306 return NULL_RTX;
2308 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2309 return NULL_RTX;
2310 if (!MODES_TIEABLE_P (int_mode, mode))
2311 return NULL_RTX;
2313 src = gen_lowpart (src_int_mode, src);
2314 src = convert_modes (int_mode, src_int_mode, src, true);
2315 src = gen_lowpart (mode, src);
2316 return src;
2319 /* Add INC into TARGET. */
2321 void
2322 expand_inc (rtx target, rtx inc)
2324 rtx value = expand_binop (GET_MODE (target), add_optab,
2325 target, inc,
2326 target, 0, OPTAB_LIB_WIDEN);
2327 if (value != target)
2328 emit_move_insn (target, value);
2331 /* Subtract DEC from TARGET. */
2333 void
2334 expand_dec (rtx target, rtx dec)
2336 rtx value = expand_binop (GET_MODE (target), sub_optab,
2337 target, dec,
2338 target, 0, OPTAB_LIB_WIDEN);
2339 if (value != target)
2340 emit_move_insn (target, value);
2343 /* Output a shift instruction for expression code CODE,
2344 with SHIFTED being the rtx for the value to shift,
2345 and AMOUNT the rtx for the amount to shift by.
2346 Store the result in the rtx TARGET, if that is convenient.
2347 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2348 Return the rtx for where the value is.
2349 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2350 in which case 0 is returned. */
2352 static rtx
2353 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2354 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2356 rtx op1, temp = 0;
2357 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2358 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2359 optab lshift_optab = ashl_optab;
2360 optab rshift_arith_optab = ashr_optab;
2361 optab rshift_uns_optab = lshr_optab;
2362 optab lrotate_optab = rotl_optab;
2363 optab rrotate_optab = rotr_optab;
2364 machine_mode op1_mode;
2365 machine_mode scalar_mode = mode;
2366 int attempt;
2367 bool speed = optimize_insn_for_speed_p ();
2369 if (VECTOR_MODE_P (mode))
2370 scalar_mode = GET_MODE_INNER (mode);
2371 op1 = amount;
2372 op1_mode = GET_MODE (op1);
2374 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2375 shift amount is a vector, use the vector/vector shift patterns. */
2376 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2378 lshift_optab = vashl_optab;
2379 rshift_arith_optab = vashr_optab;
2380 rshift_uns_optab = vlshr_optab;
2381 lrotate_optab = vrotl_optab;
2382 rrotate_optab = vrotr_optab;
2385 /* Previously detected shift-counts computed by NEGATE_EXPR
2386 and shifted in the other direction; but that does not work
2387 on all machines. */
2389 if (SHIFT_COUNT_TRUNCATED)
2391 if (CONST_INT_P (op1)
2392 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2393 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2394 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2395 % GET_MODE_BITSIZE (scalar_mode));
2396 else if (GET_CODE (op1) == SUBREG
2397 && subreg_lowpart_p (op1)
2398 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2399 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2400 op1 = SUBREG_REG (op1);
2403 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2404 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2405 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2406 amount instead. */
2407 if (rotate
2408 && CONST_INT_P (op1)
2409 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2410 GET_MODE_BITSIZE (scalar_mode) - 1))
2412 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2413 left = !left;
2414 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2417 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2418 Note that this is not the case for bigger values. For instance a rotation
2419 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2420 0x04030201 (bswapsi). */
2421 if (rotate
2422 && CONST_INT_P (op1)
2423 && INTVAL (op1) == BITS_PER_UNIT
2424 && GET_MODE_SIZE (scalar_mode) == 2
2425 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2426 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2427 unsignedp);
2429 if (op1 == const0_rtx)
2430 return shifted;
2432 /* Check whether its cheaper to implement a left shift by a constant
2433 bit count by a sequence of additions. */
2434 if (code == LSHIFT_EXPR
2435 && CONST_INT_P (op1)
2436 && INTVAL (op1) > 0
2437 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2438 && INTVAL (op1) < MAX_BITS_PER_WORD
2439 && (shift_cost (speed, mode, INTVAL (op1))
2440 > INTVAL (op1) * add_cost (speed, mode))
2441 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2443 int i;
2444 for (i = 0; i < INTVAL (op1); i++)
2446 temp = force_reg (mode, shifted);
2447 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2448 unsignedp, OPTAB_LIB_WIDEN);
2450 return shifted;
2453 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2455 enum optab_methods methods;
2457 if (attempt == 0)
2458 methods = OPTAB_DIRECT;
2459 else if (attempt == 1)
2460 methods = OPTAB_WIDEN;
2461 else
2462 methods = OPTAB_LIB_WIDEN;
2464 if (rotate)
2466 /* Widening does not work for rotation. */
2467 if (methods == OPTAB_WIDEN)
2468 continue;
2469 else if (methods == OPTAB_LIB_WIDEN)
2471 /* If we have been unable to open-code this by a rotation,
2472 do it as the IOR of two shifts. I.e., to rotate A
2473 by N bits, compute
2474 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2475 where C is the bitsize of A.
2477 It is theoretically possible that the target machine might
2478 not be able to perform either shift and hence we would
2479 be making two libcalls rather than just the one for the
2480 shift (similarly if IOR could not be done). We will allow
2481 this extremely unlikely lossage to avoid complicating the
2482 code below. */
2484 rtx subtarget = target == shifted ? 0 : target;
2485 rtx new_amount, other_amount;
2486 rtx temp1;
2488 new_amount = op1;
2489 if (op1 == const0_rtx)
2490 return shifted;
2491 else if (CONST_INT_P (op1))
2492 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2493 - INTVAL (op1));
2494 else
2496 other_amount
2497 = simplify_gen_unary (NEG, GET_MODE (op1),
2498 op1, GET_MODE (op1));
2499 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2500 other_amount
2501 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2502 gen_int_mode (mask, GET_MODE (op1)));
2505 shifted = force_reg (mode, shifted);
2507 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2508 mode, shifted, new_amount, 0, 1);
2509 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2510 mode, shifted, other_amount,
2511 subtarget, 1);
2512 return expand_binop (mode, ior_optab, temp, temp1, target,
2513 unsignedp, methods);
2516 temp = expand_binop (mode,
2517 left ? lrotate_optab : rrotate_optab,
2518 shifted, op1, target, unsignedp, methods);
2520 else if (unsignedp)
2521 temp = expand_binop (mode,
2522 left ? lshift_optab : rshift_uns_optab,
2523 shifted, op1, target, unsignedp, methods);
2525 /* Do arithmetic shifts.
2526 Also, if we are going to widen the operand, we can just as well
2527 use an arithmetic right-shift instead of a logical one. */
2528 if (temp == 0 && ! rotate
2529 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2531 enum optab_methods methods1 = methods;
2533 /* If trying to widen a log shift to an arithmetic shift,
2534 don't accept an arithmetic shift of the same size. */
2535 if (unsignedp)
2536 methods1 = OPTAB_MUST_WIDEN;
2538 /* Arithmetic shift */
2540 temp = expand_binop (mode,
2541 left ? lshift_optab : rshift_arith_optab,
2542 shifted, op1, target, unsignedp, methods1);
2545 /* We used to try extzv here for logical right shifts, but that was
2546 only useful for one machine, the VAX, and caused poor code
2547 generation there for lshrdi3, so the code was deleted and a
2548 define_expand for lshrsi3 was added to vax.md. */
2551 gcc_assert (temp != NULL_RTX || may_fail);
2552 return temp;
2555 /* Output a shift instruction for expression code CODE,
2556 with SHIFTED being the rtx for the value to shift,
2557 and AMOUNT the amount to shift by.
2558 Store the result in the rtx TARGET, if that is convenient.
2559 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2560 Return the rtx for where the value is. */
2563 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2564 int amount, rtx target, int unsignedp)
2566 return expand_shift_1 (code, mode,
2567 shifted, GEN_INT (amount), target, unsignedp);
2570 /* Likewise, but return 0 if that cannot be done. */
2572 static rtx
2573 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2574 int amount, rtx target, int unsignedp)
2576 return expand_shift_1 (code, mode,
2577 shifted, GEN_INT (amount), target, unsignedp, true);
2580 /* Output a shift instruction for expression code CODE,
2581 with SHIFTED being the rtx for the value to shift,
2582 and AMOUNT the tree for the amount to shift by.
2583 Store the result in the rtx TARGET, if that is convenient.
2584 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2585 Return the rtx for where the value is. */
2588 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2589 tree amount, rtx target, int unsignedp)
2591 return expand_shift_1 (code, mode,
2592 shifted, expand_normal (amount), target, unsignedp);
2596 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2597 const struct mult_cost *, machine_mode mode);
2598 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2599 const struct algorithm *, enum mult_variant);
2600 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2601 static rtx extract_high_half (scalar_int_mode, rtx);
2602 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2603 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2604 int, int);
2605 /* Compute and return the best algorithm for multiplying by T.
2606 The algorithm must cost less than cost_limit
2607 If retval.cost >= COST_LIMIT, no algorithm was found and all
2608 other field of the returned struct are undefined.
2609 MODE is the machine mode of the multiplication. */
2611 static void
2612 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2613 const struct mult_cost *cost_limit, machine_mode mode)
2615 int m;
2616 struct algorithm *alg_in, *best_alg;
2617 struct mult_cost best_cost;
2618 struct mult_cost new_limit;
2619 int op_cost, op_latency;
2620 unsigned HOST_WIDE_INT orig_t = t;
2621 unsigned HOST_WIDE_INT q;
2622 int maxm, hash_index;
2623 bool cache_hit = false;
2624 enum alg_code cache_alg = alg_zero;
2625 bool speed = optimize_insn_for_speed_p ();
2626 scalar_int_mode imode;
2627 struct alg_hash_entry *entry_ptr;
2629 /* Indicate that no algorithm is yet found. If no algorithm
2630 is found, this value will be returned and indicate failure. */
2631 alg_out->cost.cost = cost_limit->cost + 1;
2632 alg_out->cost.latency = cost_limit->latency + 1;
2634 if (cost_limit->cost < 0
2635 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2636 return;
2638 /* Be prepared for vector modes. */
2639 imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2641 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2643 /* Restrict the bits of "t" to the multiplication's mode. */
2644 t &= GET_MODE_MASK (imode);
2646 /* t == 1 can be done in zero cost. */
2647 if (t == 1)
2649 alg_out->ops = 1;
2650 alg_out->cost.cost = 0;
2651 alg_out->cost.latency = 0;
2652 alg_out->op[0] = alg_m;
2653 return;
2656 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2657 fail now. */
2658 if (t == 0)
2660 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2661 return;
2662 else
2664 alg_out->ops = 1;
2665 alg_out->cost.cost = zero_cost (speed);
2666 alg_out->cost.latency = zero_cost (speed);
2667 alg_out->op[0] = alg_zero;
2668 return;
2672 /* We'll be needing a couple extra algorithm structures now. */
2674 alg_in = XALLOCA (struct algorithm);
2675 best_alg = XALLOCA (struct algorithm);
2676 best_cost = *cost_limit;
2678 /* Compute the hash index. */
2679 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2681 /* See if we already know what to do for T. */
2682 entry_ptr = alg_hash_entry_ptr (hash_index);
2683 if (entry_ptr->t == t
2684 && entry_ptr->mode == mode
2685 && entry_ptr->speed == speed
2686 && entry_ptr->alg != alg_unknown)
2688 cache_alg = entry_ptr->alg;
2690 if (cache_alg == alg_impossible)
2692 /* The cache tells us that it's impossible to synthesize
2693 multiplication by T within entry_ptr->cost. */
2694 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2695 /* COST_LIMIT is at least as restrictive as the one
2696 recorded in the hash table, in which case we have no
2697 hope of synthesizing a multiplication. Just
2698 return. */
2699 return;
2701 /* If we get here, COST_LIMIT is less restrictive than the
2702 one recorded in the hash table, so we may be able to
2703 synthesize a multiplication. Proceed as if we didn't
2704 have the cache entry. */
2706 else
2708 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2709 /* The cached algorithm shows that this multiplication
2710 requires more cost than COST_LIMIT. Just return. This
2711 way, we don't clobber this cache entry with
2712 alg_impossible but retain useful information. */
2713 return;
2715 cache_hit = true;
2717 switch (cache_alg)
2719 case alg_shift:
2720 goto do_alg_shift;
2722 case alg_add_t_m2:
2723 case alg_sub_t_m2:
2724 goto do_alg_addsub_t_m2;
2726 case alg_add_factor:
2727 case alg_sub_factor:
2728 goto do_alg_addsub_factor;
2730 case alg_add_t2_m:
2731 goto do_alg_add_t2_m;
2733 case alg_sub_t2_m:
2734 goto do_alg_sub_t2_m;
2736 default:
2737 gcc_unreachable ();
2742 /* If we have a group of zero bits at the low-order part of T, try
2743 multiplying by the remaining bits and then doing a shift. */
2745 if ((t & 1) == 0)
2747 do_alg_shift:
2748 m = ctz_or_zero (t); /* m = number of low zero bits */
2749 if (m < maxm)
2751 q = t >> m;
2752 /* The function expand_shift will choose between a shift and
2753 a sequence of additions, so the observed cost is given as
2754 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2755 op_cost = m * add_cost (speed, mode);
2756 if (shift_cost (speed, mode, m) < op_cost)
2757 op_cost = shift_cost (speed, mode, m);
2758 new_limit.cost = best_cost.cost - op_cost;
2759 new_limit.latency = best_cost.latency - op_cost;
2760 synth_mult (alg_in, q, &new_limit, mode);
2762 alg_in->cost.cost += op_cost;
2763 alg_in->cost.latency += op_cost;
2764 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2766 best_cost = alg_in->cost;
2767 std::swap (alg_in, best_alg);
2768 best_alg->log[best_alg->ops] = m;
2769 best_alg->op[best_alg->ops] = alg_shift;
2772 /* See if treating ORIG_T as a signed number yields a better
2773 sequence. Try this sequence only for a negative ORIG_T
2774 as it would be useless for a non-negative ORIG_T. */
2775 if ((HOST_WIDE_INT) orig_t < 0)
2777 /* Shift ORIG_T as follows because a right shift of a
2778 negative-valued signed type is implementation
2779 defined. */
2780 q = ~(~orig_t >> m);
2781 /* The function expand_shift will choose between a shift
2782 and a sequence of additions, so the observed cost is
2783 given as MIN (m * add_cost(speed, mode),
2784 shift_cost(speed, mode, m)). */
2785 op_cost = m * add_cost (speed, mode);
2786 if (shift_cost (speed, mode, m) < op_cost)
2787 op_cost = shift_cost (speed, mode, m);
2788 new_limit.cost = best_cost.cost - op_cost;
2789 new_limit.latency = best_cost.latency - op_cost;
2790 synth_mult (alg_in, q, &new_limit, mode);
2792 alg_in->cost.cost += op_cost;
2793 alg_in->cost.latency += op_cost;
2794 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2796 best_cost = alg_in->cost;
2797 std::swap (alg_in, best_alg);
2798 best_alg->log[best_alg->ops] = m;
2799 best_alg->op[best_alg->ops] = alg_shift;
2803 if (cache_hit)
2804 goto done;
2807 /* If we have an odd number, add or subtract one. */
2808 if ((t & 1) != 0)
2810 unsigned HOST_WIDE_INT w;
2812 do_alg_addsub_t_m2:
2813 for (w = 1; (w & t) != 0; w <<= 1)
2815 /* If T was -1, then W will be zero after the loop. This is another
2816 case where T ends with ...111. Handling this with (T + 1) and
2817 subtract 1 produces slightly better code and results in algorithm
2818 selection much faster than treating it like the ...0111 case
2819 below. */
2820 if (w == 0
2821 || (w > 2
2822 /* Reject the case where t is 3.
2823 Thus we prefer addition in that case. */
2824 && t != 3))
2826 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2828 op_cost = add_cost (speed, mode);
2829 new_limit.cost = best_cost.cost - op_cost;
2830 new_limit.latency = best_cost.latency - op_cost;
2831 synth_mult (alg_in, t + 1, &new_limit, mode);
2833 alg_in->cost.cost += op_cost;
2834 alg_in->cost.latency += op_cost;
2835 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2837 best_cost = alg_in->cost;
2838 std::swap (alg_in, best_alg);
2839 best_alg->log[best_alg->ops] = 0;
2840 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2843 else
2845 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2847 op_cost = add_cost (speed, mode);
2848 new_limit.cost = best_cost.cost - op_cost;
2849 new_limit.latency = best_cost.latency - op_cost;
2850 synth_mult (alg_in, t - 1, &new_limit, mode);
2852 alg_in->cost.cost += op_cost;
2853 alg_in->cost.latency += op_cost;
2854 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2856 best_cost = alg_in->cost;
2857 std::swap (alg_in, best_alg);
2858 best_alg->log[best_alg->ops] = 0;
2859 best_alg->op[best_alg->ops] = alg_add_t_m2;
2863 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2864 quickly with a - a * n for some appropriate constant n. */
2865 m = exact_log2 (-orig_t + 1);
2866 if (m >= 0 && m < maxm)
2868 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2869 /* If the target has a cheap shift-and-subtract insn use
2870 that in preference to a shift insn followed by a sub insn.
2871 Assume that the shift-and-sub is "atomic" with a latency
2872 equal to it's cost, otherwise assume that on superscalar
2873 hardware the shift may be executed concurrently with the
2874 earlier steps in the algorithm. */
2875 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2877 op_cost = shiftsub1_cost (speed, mode, m);
2878 op_latency = op_cost;
2880 else
2881 op_latency = add_cost (speed, mode);
2883 new_limit.cost = best_cost.cost - op_cost;
2884 new_limit.latency = best_cost.latency - op_latency;
2885 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2886 &new_limit, mode);
2888 alg_in->cost.cost += op_cost;
2889 alg_in->cost.latency += op_latency;
2890 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2892 best_cost = alg_in->cost;
2893 std::swap (alg_in, best_alg);
2894 best_alg->log[best_alg->ops] = m;
2895 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2899 if (cache_hit)
2900 goto done;
2903 /* Look for factors of t of the form
2904 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2905 If we find such a factor, we can multiply by t using an algorithm that
2906 multiplies by q, shift the result by m and add/subtract it to itself.
2908 We search for large factors first and loop down, even if large factors
2909 are less probable than small; if we find a large factor we will find a
2910 good sequence quickly, and therefore be able to prune (by decreasing
2911 COST_LIMIT) the search. */
2913 do_alg_addsub_factor:
2914 for (m = floor_log2 (t - 1); m >= 2; m--)
2916 unsigned HOST_WIDE_INT d;
2918 d = (HOST_WIDE_INT_1U << m) + 1;
2919 if (t % d == 0 && t > d && m < maxm
2920 && (!cache_hit || cache_alg == alg_add_factor))
2922 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2923 if (shiftadd_cost (speed, mode, m) <= op_cost)
2924 op_cost = shiftadd_cost (speed, mode, m);
2926 op_latency = op_cost;
2929 new_limit.cost = best_cost.cost - op_cost;
2930 new_limit.latency = best_cost.latency - op_latency;
2931 synth_mult (alg_in, t / d, &new_limit, mode);
2933 alg_in->cost.cost += op_cost;
2934 alg_in->cost.latency += op_latency;
2935 if (alg_in->cost.latency < op_cost)
2936 alg_in->cost.latency = op_cost;
2937 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2939 best_cost = alg_in->cost;
2940 std::swap (alg_in, best_alg);
2941 best_alg->log[best_alg->ops] = m;
2942 best_alg->op[best_alg->ops] = alg_add_factor;
2944 /* Other factors will have been taken care of in the recursion. */
2945 break;
2948 d = (HOST_WIDE_INT_1U << m) - 1;
2949 if (t % d == 0 && t > d && m < maxm
2950 && (!cache_hit || cache_alg == alg_sub_factor))
2952 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2953 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2954 op_cost = shiftsub0_cost (speed, mode, m);
2956 op_latency = op_cost;
2958 new_limit.cost = best_cost.cost - op_cost;
2959 new_limit.latency = best_cost.latency - op_latency;
2960 synth_mult (alg_in, t / d, &new_limit, mode);
2962 alg_in->cost.cost += op_cost;
2963 alg_in->cost.latency += op_latency;
2964 if (alg_in->cost.latency < op_cost)
2965 alg_in->cost.latency = op_cost;
2966 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2968 best_cost = alg_in->cost;
2969 std::swap (alg_in, best_alg);
2970 best_alg->log[best_alg->ops] = m;
2971 best_alg->op[best_alg->ops] = alg_sub_factor;
2973 break;
2976 if (cache_hit)
2977 goto done;
2979 /* Try shift-and-add (load effective address) instructions,
2980 i.e. do a*3, a*5, a*9. */
2981 if ((t & 1) != 0)
2983 do_alg_add_t2_m:
2984 q = t - 1;
2985 m = ctz_hwi (q);
2986 if (q && m < maxm)
2988 op_cost = shiftadd_cost (speed, mode, m);
2989 new_limit.cost = best_cost.cost - op_cost;
2990 new_limit.latency = best_cost.latency - op_cost;
2991 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2993 alg_in->cost.cost += op_cost;
2994 alg_in->cost.latency += op_cost;
2995 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2997 best_cost = alg_in->cost;
2998 std::swap (alg_in, best_alg);
2999 best_alg->log[best_alg->ops] = m;
3000 best_alg->op[best_alg->ops] = alg_add_t2_m;
3003 if (cache_hit)
3004 goto done;
3006 do_alg_sub_t2_m:
3007 q = t + 1;
3008 m = ctz_hwi (q);
3009 if (q && m < maxm)
3011 op_cost = shiftsub0_cost (speed, mode, m);
3012 new_limit.cost = best_cost.cost - op_cost;
3013 new_limit.latency = best_cost.latency - op_cost;
3014 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3016 alg_in->cost.cost += op_cost;
3017 alg_in->cost.latency += op_cost;
3018 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3020 best_cost = alg_in->cost;
3021 std::swap (alg_in, best_alg);
3022 best_alg->log[best_alg->ops] = m;
3023 best_alg->op[best_alg->ops] = alg_sub_t2_m;
3026 if (cache_hit)
3027 goto done;
3030 done:
3031 /* If best_cost has not decreased, we have not found any algorithm. */
3032 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3034 /* We failed to find an algorithm. Record alg_impossible for
3035 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3036 we are asked to find an algorithm for T within the same or
3037 lower COST_LIMIT, we can immediately return to the
3038 caller. */
3039 entry_ptr->t = t;
3040 entry_ptr->mode = mode;
3041 entry_ptr->speed = speed;
3042 entry_ptr->alg = alg_impossible;
3043 entry_ptr->cost = *cost_limit;
3044 return;
3047 /* Cache the result. */
3048 if (!cache_hit)
3050 entry_ptr->t = t;
3051 entry_ptr->mode = mode;
3052 entry_ptr->speed = speed;
3053 entry_ptr->alg = best_alg->op[best_alg->ops];
3054 entry_ptr->cost.cost = best_cost.cost;
3055 entry_ptr->cost.latency = best_cost.latency;
3058 /* If we are getting a too long sequence for `struct algorithm'
3059 to record, make this search fail. */
3060 if (best_alg->ops == MAX_BITS_PER_WORD)
3061 return;
3063 /* Copy the algorithm from temporary space to the space at alg_out.
3064 We avoid using structure assignment because the majority of
3065 best_alg is normally undefined, and this is a critical function. */
3066 alg_out->ops = best_alg->ops + 1;
3067 alg_out->cost = best_cost;
3068 memcpy (alg_out->op, best_alg->op,
3069 alg_out->ops * sizeof *alg_out->op);
3070 memcpy (alg_out->log, best_alg->log,
3071 alg_out->ops * sizeof *alg_out->log);
3074 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3075 Try three variations:
3077 - a shift/add sequence based on VAL itself
3078 - a shift/add sequence based on -VAL, followed by a negation
3079 - a shift/add sequence based on VAL - 1, followed by an addition.
3081 Return true if the cheapest of these cost less than MULT_COST,
3082 describing the algorithm in *ALG and final fixup in *VARIANT. */
3084 bool
3085 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3086 struct algorithm *alg, enum mult_variant *variant,
3087 int mult_cost)
3089 struct algorithm alg2;
3090 struct mult_cost limit;
3091 int op_cost;
3092 bool speed = optimize_insn_for_speed_p ();
3094 /* Fail quickly for impossible bounds. */
3095 if (mult_cost < 0)
3096 return false;
3098 /* Ensure that mult_cost provides a reasonable upper bound.
3099 Any constant multiplication can be performed with less
3100 than 2 * bits additions. */
3101 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3102 if (mult_cost > op_cost)
3103 mult_cost = op_cost;
3105 *variant = basic_variant;
3106 limit.cost = mult_cost;
3107 limit.latency = mult_cost;
3108 synth_mult (alg, val, &limit, mode);
3110 /* This works only if the inverted value actually fits in an
3111 `unsigned int' */
3112 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3114 op_cost = neg_cost (speed, mode);
3115 if (MULT_COST_LESS (&alg->cost, mult_cost))
3117 limit.cost = alg->cost.cost - op_cost;
3118 limit.latency = alg->cost.latency - op_cost;
3120 else
3122 limit.cost = mult_cost - op_cost;
3123 limit.latency = mult_cost - op_cost;
3126 synth_mult (&alg2, -val, &limit, mode);
3127 alg2.cost.cost += op_cost;
3128 alg2.cost.latency += op_cost;
3129 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3130 *alg = alg2, *variant = negate_variant;
3133 /* This proves very useful for division-by-constant. */
3134 op_cost = add_cost (speed, mode);
3135 if (MULT_COST_LESS (&alg->cost, mult_cost))
3137 limit.cost = alg->cost.cost - op_cost;
3138 limit.latency = alg->cost.latency - op_cost;
3140 else
3142 limit.cost = mult_cost - op_cost;
3143 limit.latency = mult_cost - op_cost;
3146 synth_mult (&alg2, val - 1, &limit, mode);
3147 alg2.cost.cost += op_cost;
3148 alg2.cost.latency += op_cost;
3149 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3150 *alg = alg2, *variant = add_variant;
3152 return MULT_COST_LESS (&alg->cost, mult_cost);
3155 /* A subroutine of expand_mult, used for constant multiplications.
3156 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3157 convenient. Use the shift/add sequence described by ALG and apply
3158 the final fixup specified by VARIANT. */
3160 static rtx
3161 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3162 rtx target, const struct algorithm *alg,
3163 enum mult_variant variant)
3165 unsigned HOST_WIDE_INT val_so_far;
3166 rtx_insn *insn;
3167 rtx accum, tem;
3168 int opno;
3169 machine_mode nmode;
3171 /* Avoid referencing memory over and over and invalid sharing
3172 on SUBREGs. */
3173 op0 = force_reg (mode, op0);
3175 /* ACCUM starts out either as OP0 or as a zero, depending on
3176 the first operation. */
3178 if (alg->op[0] == alg_zero)
3180 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3181 val_so_far = 0;
3183 else if (alg->op[0] == alg_m)
3185 accum = copy_to_mode_reg (mode, op0);
3186 val_so_far = 1;
3188 else
3189 gcc_unreachable ();
3191 for (opno = 1; opno < alg->ops; opno++)
3193 int log = alg->log[opno];
3194 rtx shift_subtarget = optimize ? 0 : accum;
3195 rtx add_target
3196 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3197 && !optimize)
3198 ? target : 0;
3199 rtx accum_target = optimize ? 0 : accum;
3200 rtx accum_inner;
3202 switch (alg->op[opno])
3204 case alg_shift:
3205 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3206 /* REG_EQUAL note will be attached to the following insn. */
3207 emit_move_insn (accum, tem);
3208 val_so_far <<= log;
3209 break;
3211 case alg_add_t_m2:
3212 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3213 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3214 add_target ? add_target : accum_target);
3215 val_so_far += HOST_WIDE_INT_1U << log;
3216 break;
3218 case alg_sub_t_m2:
3219 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3220 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3221 add_target ? add_target : accum_target);
3222 val_so_far -= HOST_WIDE_INT_1U << log;
3223 break;
3225 case alg_add_t2_m:
3226 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3227 log, shift_subtarget, 0);
3228 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3229 add_target ? add_target : accum_target);
3230 val_so_far = (val_so_far << log) + 1;
3231 break;
3233 case alg_sub_t2_m:
3234 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3235 log, shift_subtarget, 0);
3236 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3237 add_target ? add_target : accum_target);
3238 val_so_far = (val_so_far << log) - 1;
3239 break;
3241 case alg_add_factor:
3242 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3243 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3244 add_target ? add_target : accum_target);
3245 val_so_far += val_so_far << log;
3246 break;
3248 case alg_sub_factor:
3249 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3250 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3251 (add_target
3252 ? add_target : (optimize ? 0 : tem)));
3253 val_so_far = (val_so_far << log) - val_so_far;
3254 break;
3256 default:
3257 gcc_unreachable ();
3260 if (SCALAR_INT_MODE_P (mode))
3262 /* Write a REG_EQUAL note on the last insn so that we can cse
3263 multiplication sequences. Note that if ACCUM is a SUBREG,
3264 we've set the inner register and must properly indicate that. */
3265 tem = op0, nmode = mode;
3266 accum_inner = accum;
3267 if (GET_CODE (accum) == SUBREG)
3269 accum_inner = SUBREG_REG (accum);
3270 nmode = GET_MODE (accum_inner);
3271 tem = gen_lowpart (nmode, op0);
3274 insn = get_last_insn ();
3275 set_dst_reg_note (insn, REG_EQUAL,
3276 gen_rtx_MULT (nmode, tem,
3277 gen_int_mode (val_so_far, nmode)),
3278 accum_inner);
3282 if (variant == negate_variant)
3284 val_so_far = -val_so_far;
3285 accum = expand_unop (mode, neg_optab, accum, target, 0);
3287 else if (variant == add_variant)
3289 val_so_far = val_so_far + 1;
3290 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3293 /* Compare only the bits of val and val_so_far that are significant
3294 in the result mode, to avoid sign-/zero-extension confusion. */
3295 nmode = GET_MODE_INNER (mode);
3296 val &= GET_MODE_MASK (nmode);
3297 val_so_far &= GET_MODE_MASK (nmode);
3298 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3300 return accum;
3303 /* Perform a multiplication and return an rtx for the result.
3304 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3305 TARGET is a suggestion for where to store the result (an rtx).
3307 We check specially for a constant integer as OP1.
3308 If you want this check for OP0 as well, then before calling
3309 you should swap the two operands if OP0 would be constant. */
3312 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3313 int unsignedp)
3315 enum mult_variant variant;
3316 struct algorithm algorithm;
3317 rtx scalar_op1;
3318 int max_cost;
3319 bool speed = optimize_insn_for_speed_p ();
3320 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3322 if (CONSTANT_P (op0))
3323 std::swap (op0, op1);
3325 /* For vectors, there are several simplifications that can be made if
3326 all elements of the vector constant are identical. */
3327 scalar_op1 = unwrap_const_vec_duplicate (op1);
3329 if (INTEGRAL_MODE_P (mode))
3331 rtx fake_reg;
3332 HOST_WIDE_INT coeff;
3333 bool is_neg;
3334 int mode_bitsize;
3336 if (op1 == CONST0_RTX (mode))
3337 return op1;
3338 if (op1 == CONST1_RTX (mode))
3339 return op0;
3340 if (op1 == CONSTM1_RTX (mode))
3341 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3342 op0, target, 0);
3344 if (do_trapv)
3345 goto skip_synth;
3347 /* If mode is integer vector mode, check if the backend supports
3348 vector lshift (by scalar or vector) at all. If not, we can't use
3349 synthetized multiply. */
3350 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3351 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3352 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3353 goto skip_synth;
3355 /* These are the operations that are potentially turned into
3356 a sequence of shifts and additions. */
3357 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3359 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3360 less than or equal in size to `unsigned int' this doesn't matter.
3361 If the mode is larger than `unsigned int', then synth_mult works
3362 only if the constant value exactly fits in an `unsigned int' without
3363 any truncation. This means that multiplying by negative values does
3364 not work; results are off by 2^32 on a 32 bit machine. */
3365 if (CONST_INT_P (scalar_op1))
3367 coeff = INTVAL (scalar_op1);
3368 is_neg = coeff < 0;
3370 #if TARGET_SUPPORTS_WIDE_INT
3371 else if (CONST_WIDE_INT_P (scalar_op1))
3372 #else
3373 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3374 #endif
3376 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3377 /* Perfect power of 2 (other than 1, which is handled above). */
3378 if (shift > 0)
3379 return expand_shift (LSHIFT_EXPR, mode, op0,
3380 shift, target, unsignedp);
3381 else
3382 goto skip_synth;
3384 else
3385 goto skip_synth;
3387 /* We used to test optimize here, on the grounds that it's better to
3388 produce a smaller program when -O is not used. But this causes
3389 such a terrible slowdown sometimes that it seems better to always
3390 use synth_mult. */
3392 /* Special case powers of two. */
3393 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3394 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3395 return expand_shift (LSHIFT_EXPR, mode, op0,
3396 floor_log2 (coeff), target, unsignedp);
3398 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3400 /* Attempt to handle multiplication of DImode values by negative
3401 coefficients, by performing the multiplication by a positive
3402 multiplier and then inverting the result. */
3403 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3405 /* Its safe to use -coeff even for INT_MIN, as the
3406 result is interpreted as an unsigned coefficient.
3407 Exclude cost of op0 from max_cost to match the cost
3408 calculation of the synth_mult. */
3409 coeff = -(unsigned HOST_WIDE_INT) coeff;
3410 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3411 mode, speed)
3412 - neg_cost (speed, mode));
3413 if (max_cost <= 0)
3414 goto skip_synth;
3416 /* Special case powers of two. */
3417 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3419 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3420 floor_log2 (coeff), target, unsignedp);
3421 return expand_unop (mode, neg_optab, temp, target, 0);
3424 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3425 max_cost))
3427 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3428 &algorithm, variant);
3429 return expand_unop (mode, neg_optab, temp, target, 0);
3431 goto skip_synth;
3434 /* Exclude cost of op0 from max_cost to match the cost
3435 calculation of the synth_mult. */
3436 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3437 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3438 return expand_mult_const (mode, op0, coeff, target,
3439 &algorithm, variant);
3441 skip_synth:
3443 /* Expand x*2.0 as x+x. */
3444 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3445 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3447 op0 = force_reg (GET_MODE (op0), op0);
3448 return expand_binop (mode, add_optab, op0, op0,
3449 target, unsignedp, OPTAB_LIB_WIDEN);
3452 /* This used to use umul_optab if unsigned, but for non-widening multiply
3453 there is no difference between signed and unsigned. */
3454 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3455 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3456 gcc_assert (op0);
3457 return op0;
3460 /* Return a cost estimate for multiplying a register by the given
3461 COEFFicient in the given MODE and SPEED. */
3464 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3466 int max_cost;
3467 struct algorithm algorithm;
3468 enum mult_variant variant;
3470 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3471 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3472 mode, speed);
3473 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3474 return algorithm.cost.cost;
3475 else
3476 return max_cost;
3479 /* Perform a widening multiplication and return an rtx for the result.
3480 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3481 TARGET is a suggestion for where to store the result (an rtx).
3482 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3483 or smul_widen_optab.
3485 We check specially for a constant integer as OP1, comparing the
3486 cost of a widening multiply against the cost of a sequence of shifts
3487 and adds. */
3490 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3491 int unsignedp, optab this_optab)
3493 bool speed = optimize_insn_for_speed_p ();
3494 rtx cop1;
3496 if (CONST_INT_P (op1)
3497 && GET_MODE (op0) != VOIDmode
3498 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3499 this_optab == umul_widen_optab))
3500 && CONST_INT_P (cop1)
3501 && (INTVAL (cop1) >= 0
3502 || HWI_COMPUTABLE_MODE_P (mode)))
3504 HOST_WIDE_INT coeff = INTVAL (cop1);
3505 int max_cost;
3506 enum mult_variant variant;
3507 struct algorithm algorithm;
3509 if (coeff == 0)
3510 return CONST0_RTX (mode);
3512 /* Special case powers of two. */
3513 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3515 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3516 return expand_shift (LSHIFT_EXPR, mode, op0,
3517 floor_log2 (coeff), target, unsignedp);
3520 /* Exclude cost of op0 from max_cost to match the cost
3521 calculation of the synth_mult. */
3522 max_cost = mul_widen_cost (speed, mode);
3523 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3524 max_cost))
3526 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3527 return expand_mult_const (mode, op0, coeff, target,
3528 &algorithm, variant);
3531 return expand_binop (mode, this_optab, op0, op1, target,
3532 unsignedp, OPTAB_LIB_WIDEN);
3535 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3536 replace division by D, and put the least significant N bits of the result
3537 in *MULTIPLIER_PTR and return the most significant bit.
3539 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3540 needed precision is in PRECISION (should be <= N).
3542 PRECISION should be as small as possible so this function can choose
3543 multiplier more freely.
3545 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3546 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3548 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3549 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3551 unsigned HOST_WIDE_INT
3552 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3553 unsigned HOST_WIDE_INT *multiplier_ptr,
3554 int *post_shift_ptr, int *lgup_ptr)
3556 int lgup, post_shift;
3557 int pow, pow2;
3559 /* lgup = ceil(log2(divisor)); */
3560 lgup = ceil_log2 (d);
3562 gcc_assert (lgup <= n);
3564 pow = n + lgup;
3565 pow2 = n + lgup - precision;
3567 /* mlow = 2^(N + lgup)/d */
3568 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3569 wide_int mlow = wi::udiv_trunc (val, d);
3571 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3572 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3573 wide_int mhigh = wi::udiv_trunc (val, d);
3575 /* If precision == N, then mlow, mhigh exceed 2^N
3576 (but they do not exceed 2^(N+1)). */
3578 /* Reduce to lowest terms. */
3579 for (post_shift = lgup; post_shift > 0; post_shift--)
3581 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3582 HOST_BITS_PER_WIDE_INT);
3583 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3584 HOST_BITS_PER_WIDE_INT);
3585 if (ml_lo >= mh_lo)
3586 break;
3588 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3589 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3592 *post_shift_ptr = post_shift;
3593 *lgup_ptr = lgup;
3594 if (n < HOST_BITS_PER_WIDE_INT)
3596 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3597 *multiplier_ptr = mhigh.to_uhwi () & mask;
3598 return mhigh.to_uhwi () >= mask;
3600 else
3602 *multiplier_ptr = mhigh.to_uhwi ();
3603 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3607 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3608 congruent to 1 (mod 2**N). */
3610 static unsigned HOST_WIDE_INT
3611 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3613 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3615 /* The algorithm notes that the choice y = x satisfies
3616 x*y == 1 mod 2^3, since x is assumed odd.
3617 Each iteration doubles the number of bits of significance in y. */
3619 unsigned HOST_WIDE_INT mask;
3620 unsigned HOST_WIDE_INT y = x;
3621 int nbit = 3;
3623 mask = (n == HOST_BITS_PER_WIDE_INT
3624 ? HOST_WIDE_INT_M1U
3625 : (HOST_WIDE_INT_1U << n) - 1);
3627 while (nbit < n)
3629 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3630 nbit *= 2;
3632 return y;
3635 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3636 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3637 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3638 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3639 become signed.
3641 The result is put in TARGET if that is convenient.
3643 MODE is the mode of operation. */
3646 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3647 rtx op1, rtx target, int unsignedp)
3649 rtx tem;
3650 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3652 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3653 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3654 tem = expand_and (mode, tem, op1, NULL_RTX);
3655 adj_operand
3656 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3657 adj_operand);
3659 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3660 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3661 tem = expand_and (mode, tem, op0, NULL_RTX);
3662 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3663 target);
3665 return target;
3668 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3670 static rtx
3671 extract_high_half (scalar_int_mode mode, rtx op)
3673 if (mode == word_mode)
3674 return gen_highpart (mode, op);
3676 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3678 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3679 GET_MODE_BITSIZE (mode), 0, 1);
3680 return convert_modes (mode, wider_mode, op, 0);
3683 /* Like expmed_mult_highpart, but only consider using a multiplication
3684 optab. OP1 is an rtx for the constant operand. */
3686 static rtx
3687 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3688 rtx target, int unsignedp, int max_cost)
3690 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3691 optab moptab;
3692 rtx tem;
3693 int size;
3694 bool speed = optimize_insn_for_speed_p ();
3696 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3698 size = GET_MODE_BITSIZE (mode);
3700 /* Firstly, try using a multiplication insn that only generates the needed
3701 high part of the product, and in the sign flavor of unsignedp. */
3702 if (mul_highpart_cost (speed, mode) < max_cost)
3704 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3705 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3706 unsignedp, OPTAB_DIRECT);
3707 if (tem)
3708 return tem;
3711 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3712 Need to adjust the result after the multiplication. */
3713 if (size - 1 < BITS_PER_WORD
3714 && (mul_highpart_cost (speed, mode)
3715 + 2 * shift_cost (speed, mode, size-1)
3716 + 4 * add_cost (speed, mode) < max_cost))
3718 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3719 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3720 unsignedp, OPTAB_DIRECT);
3721 if (tem)
3722 /* We used the wrong signedness. Adjust the result. */
3723 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3724 tem, unsignedp);
3727 /* Try widening multiplication. */
3728 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3729 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3730 && mul_widen_cost (speed, wider_mode) < max_cost)
3732 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3733 unsignedp, OPTAB_WIDEN);
3734 if (tem)
3735 return extract_high_half (mode, tem);
3738 /* Try widening the mode and perform a non-widening multiplication. */
3739 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3740 && size - 1 < BITS_PER_WORD
3741 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3742 < max_cost))
3744 rtx_insn *insns;
3745 rtx wop0, wop1;
3747 /* We need to widen the operands, for example to ensure the
3748 constant multiplier is correctly sign or zero extended.
3749 Use a sequence to clean-up any instructions emitted by
3750 the conversions if things don't work out. */
3751 start_sequence ();
3752 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3753 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3754 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3755 unsignedp, OPTAB_WIDEN);
3756 insns = get_insns ();
3757 end_sequence ();
3759 if (tem)
3761 emit_insn (insns);
3762 return extract_high_half (mode, tem);
3766 /* Try widening multiplication of opposite signedness, and adjust. */
3767 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3768 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3769 && size - 1 < BITS_PER_WORD
3770 && (mul_widen_cost (speed, wider_mode)
3771 + 2 * shift_cost (speed, mode, size-1)
3772 + 4 * add_cost (speed, mode) < max_cost))
3774 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3775 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3776 if (tem != 0)
3778 tem = extract_high_half (mode, tem);
3779 /* We used the wrong signedness. Adjust the result. */
3780 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3781 target, unsignedp);
3785 return 0;
3788 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3789 putting the high half of the result in TARGET if that is convenient,
3790 and return where the result is. If the operation can not be performed,
3791 0 is returned.
3793 MODE is the mode of operation and result.
3795 UNSIGNEDP nonzero means unsigned multiply.
3797 MAX_COST is the total allowed cost for the expanded RTL. */
3799 static rtx
3800 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3801 rtx target, int unsignedp, int max_cost)
3803 unsigned HOST_WIDE_INT cnst1;
3804 int extra_cost;
3805 bool sign_adjust = false;
3806 enum mult_variant variant;
3807 struct algorithm alg;
3808 rtx tem;
3809 bool speed = optimize_insn_for_speed_p ();
3811 /* We can't support modes wider than HOST_BITS_PER_INT. */
3812 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3814 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3816 /* We can't optimize modes wider than BITS_PER_WORD.
3817 ??? We might be able to perform double-word arithmetic if
3818 mode == word_mode, however all the cost calculations in
3819 synth_mult etc. assume single-word operations. */
3820 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3821 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3822 return expmed_mult_highpart_optab (mode, op0, op1, target,
3823 unsignedp, max_cost);
3825 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3827 /* Check whether we try to multiply by a negative constant. */
3828 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3830 sign_adjust = true;
3831 extra_cost += add_cost (speed, mode);
3834 /* See whether shift/add multiplication is cheap enough. */
3835 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3836 max_cost - extra_cost))
3838 /* See whether the specialized multiplication optabs are
3839 cheaper than the shift/add version. */
3840 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3841 alg.cost.cost + extra_cost);
3842 if (tem)
3843 return tem;
3845 tem = convert_to_mode (wider_mode, op0, unsignedp);
3846 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3847 tem = extract_high_half (mode, tem);
3849 /* Adjust result for signedness. */
3850 if (sign_adjust)
3851 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3853 return tem;
3855 return expmed_mult_highpart_optab (mode, op0, op1, target,
3856 unsignedp, max_cost);
3860 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3862 static rtx
3863 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3865 rtx result, temp, shift;
3866 rtx_code_label *label;
3867 int logd;
3868 int prec = GET_MODE_PRECISION (mode);
3870 logd = floor_log2 (d);
3871 result = gen_reg_rtx (mode);
3873 /* Avoid conditional branches when they're expensive. */
3874 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3875 && optimize_insn_for_speed_p ())
3877 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3878 mode, 0, -1);
3879 if (signmask)
3881 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3882 signmask = force_reg (mode, signmask);
3883 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3885 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3886 which instruction sequence to use. If logical right shifts
3887 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3888 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3890 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3891 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3892 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3893 > COSTS_N_INSNS (2)))
3895 temp = expand_binop (mode, xor_optab, op0, signmask,
3896 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3897 temp = expand_binop (mode, sub_optab, temp, signmask,
3898 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3899 temp = expand_binop (mode, and_optab, temp,
3900 gen_int_mode (masklow, mode),
3901 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3902 temp = expand_binop (mode, xor_optab, temp, signmask,
3903 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3904 temp = expand_binop (mode, sub_optab, temp, signmask,
3905 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3907 else
3909 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3910 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3911 signmask = force_reg (mode, signmask);
3913 temp = expand_binop (mode, add_optab, op0, signmask,
3914 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3915 temp = expand_binop (mode, and_optab, temp,
3916 gen_int_mode (masklow, mode),
3917 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3918 temp = expand_binop (mode, sub_optab, temp, signmask,
3919 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3921 return temp;
3925 /* Mask contains the mode's signbit and the significant bits of the
3926 modulus. By including the signbit in the operation, many targets
3927 can avoid an explicit compare operation in the following comparison
3928 against zero. */
3929 wide_int mask = wi::mask (logd, false, prec);
3930 mask = wi::set_bit (mask, prec - 1);
3932 temp = expand_binop (mode, and_optab, op0,
3933 immed_wide_int_const (mask, mode),
3934 result, 1, OPTAB_LIB_WIDEN);
3935 if (temp != result)
3936 emit_move_insn (result, temp);
3938 label = gen_label_rtx ();
3939 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3941 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3942 0, OPTAB_LIB_WIDEN);
3944 mask = wi::mask (logd, true, prec);
3945 temp = expand_binop (mode, ior_optab, temp,
3946 immed_wide_int_const (mask, mode),
3947 result, 1, OPTAB_LIB_WIDEN);
3948 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3949 0, OPTAB_LIB_WIDEN);
3950 if (temp != result)
3951 emit_move_insn (result, temp);
3952 emit_label (label);
3953 return result;
3956 /* Expand signed division of OP0 by a power of two D in mode MODE.
3957 This routine is only called for positive values of D. */
3959 static rtx
3960 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3962 rtx temp;
3963 rtx_code_label *label;
3964 int logd;
3966 logd = floor_log2 (d);
3968 if (d == 2
3969 && BRANCH_COST (optimize_insn_for_speed_p (),
3970 false) >= 1)
3972 temp = gen_reg_rtx (mode);
3973 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3974 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3975 0, OPTAB_LIB_WIDEN);
3976 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3979 if (HAVE_conditional_move
3980 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3982 rtx temp2;
3984 start_sequence ();
3985 temp2 = copy_to_mode_reg (mode, op0);
3986 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3987 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3988 temp = force_reg (mode, temp);
3990 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3991 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3992 mode, temp, temp2, mode, 0);
3993 if (temp2)
3995 rtx_insn *seq = get_insns ();
3996 end_sequence ();
3997 emit_insn (seq);
3998 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4000 end_sequence ();
4003 if (BRANCH_COST (optimize_insn_for_speed_p (),
4004 false) >= 2)
4006 int ushift = GET_MODE_BITSIZE (mode) - logd;
4008 temp = gen_reg_rtx (mode);
4009 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4010 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4011 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4012 > COSTS_N_INSNS (1))
4013 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4014 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4015 else
4016 temp = expand_shift (RSHIFT_EXPR, mode, temp,
4017 ushift, NULL_RTX, 1);
4018 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4019 0, OPTAB_LIB_WIDEN);
4020 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4023 label = gen_label_rtx ();
4024 temp = copy_to_mode_reg (mode, op0);
4025 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4026 expand_inc (temp, gen_int_mode (d - 1, mode));
4027 emit_label (label);
4028 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4031 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4032 if that is convenient, and returning where the result is.
4033 You may request either the quotient or the remainder as the result;
4034 specify REM_FLAG nonzero to get the remainder.
4036 CODE is the expression code for which kind of division this is;
4037 it controls how rounding is done. MODE is the machine mode to use.
4038 UNSIGNEDP nonzero means do unsigned division. */
4040 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4041 and then correct it by or'ing in missing high bits
4042 if result of ANDI is nonzero.
4043 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4044 This could optimize to a bfexts instruction.
4045 But C doesn't use these operations, so their optimizations are
4046 left for later. */
4047 /* ??? For modulo, we don't actually need the highpart of the first product,
4048 the low part will do nicely. And for small divisors, the second multiply
4049 can also be a low-part only multiply or even be completely left out.
4050 E.g. to calculate the remainder of a division by 3 with a 32 bit
4051 multiply, multiply with 0x55555556 and extract the upper two bits;
4052 the result is exact for inputs up to 0x1fffffff.
4053 The input range can be reduced by using cross-sum rules.
4054 For odd divisors >= 3, the following table gives right shift counts
4055 so that if a number is shifted by an integer multiple of the given
4056 amount, the remainder stays the same:
4057 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4058 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4059 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4060 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4061 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4063 Cross-sum rules for even numbers can be derived by leaving as many bits
4064 to the right alone as the divisor has zeros to the right.
4065 E.g. if x is an unsigned 32 bit number:
4066 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4070 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4071 rtx op0, rtx op1, rtx target, int unsignedp)
4073 machine_mode compute_mode;
4074 rtx tquotient;
4075 rtx quotient = 0, remainder = 0;
4076 rtx_insn *last;
4077 rtx_insn *insn;
4078 optab optab1, optab2;
4079 int op1_is_constant, op1_is_pow2 = 0;
4080 int max_cost, extra_cost;
4081 static HOST_WIDE_INT last_div_const = 0;
4082 bool speed = optimize_insn_for_speed_p ();
4084 op1_is_constant = CONST_INT_P (op1);
4085 if (op1_is_constant)
4087 wide_int ext_op1 = rtx_mode_t (op1, mode);
4088 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4089 || (! unsignedp
4090 && wi::popcount (wi::neg (ext_op1)) == 1));
4094 This is the structure of expand_divmod:
4096 First comes code to fix up the operands so we can perform the operations
4097 correctly and efficiently.
4099 Second comes a switch statement with code specific for each rounding mode.
4100 For some special operands this code emits all RTL for the desired
4101 operation, for other cases, it generates only a quotient and stores it in
4102 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4103 to indicate that it has not done anything.
4105 Last comes code that finishes the operation. If QUOTIENT is set and
4106 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4107 QUOTIENT is not set, it is computed using trunc rounding.
4109 We try to generate special code for division and remainder when OP1 is a
4110 constant. If |OP1| = 2**n we can use shifts and some other fast
4111 operations. For other values of OP1, we compute a carefully selected
4112 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4113 by m.
4115 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4116 half of the product. Different strategies for generating the product are
4117 implemented in expmed_mult_highpart.
4119 If what we actually want is the remainder, we generate that by another
4120 by-constant multiplication and a subtraction. */
4122 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4123 code below will malfunction if we are, so check here and handle
4124 the special case if so. */
4125 if (op1 == const1_rtx)
4126 return rem_flag ? const0_rtx : op0;
4128 /* When dividing by -1, we could get an overflow.
4129 negv_optab can handle overflows. */
4130 if (! unsignedp && op1 == constm1_rtx)
4132 if (rem_flag)
4133 return const0_rtx;
4134 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4135 ? negv_optab : neg_optab, op0, target, 0);
4138 if (target
4139 /* Don't use the function value register as a target
4140 since we have to read it as well as write it,
4141 and function-inlining gets confused by this. */
4142 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4143 /* Don't clobber an operand while doing a multi-step calculation. */
4144 || ((rem_flag || op1_is_constant)
4145 && (reg_mentioned_p (target, op0)
4146 || (MEM_P (op0) && MEM_P (target))))
4147 || reg_mentioned_p (target, op1)
4148 || (MEM_P (op1) && MEM_P (target))))
4149 target = 0;
4151 /* Get the mode in which to perform this computation. Normally it will
4152 be MODE, but sometimes we can't do the desired operation in MODE.
4153 If so, pick a wider mode in which we can do the operation. Convert
4154 to that mode at the start to avoid repeated conversions.
4156 First see what operations we need. These depend on the expression
4157 we are evaluating. (We assume that divxx3 insns exist under the
4158 same conditions that modxx3 insns and that these insns don't normally
4159 fail. If these assumptions are not correct, we may generate less
4160 efficient code in some cases.)
4162 Then see if we find a mode in which we can open-code that operation
4163 (either a division, modulus, or shift). Finally, check for the smallest
4164 mode for which we can do the operation with a library call. */
4166 /* We might want to refine this now that we have division-by-constant
4167 optimization. Since expmed_mult_highpart tries so many variants, it is
4168 not straightforward to generalize this. Maybe we should make an array
4169 of possible modes in init_expmed? Save this for GCC 2.7. */
4171 optab1 = (op1_is_pow2
4172 ? (unsignedp ? lshr_optab : ashr_optab)
4173 : (unsignedp ? udiv_optab : sdiv_optab));
4174 optab2 = (op1_is_pow2 ? optab1
4175 : (unsignedp ? udivmod_optab : sdivmod_optab));
4177 FOR_EACH_MODE_FROM (compute_mode, mode)
4178 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4179 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4180 break;
4182 if (compute_mode == VOIDmode)
4183 FOR_EACH_MODE_FROM (compute_mode, mode)
4184 if (optab_libfunc (optab1, compute_mode)
4185 || optab_libfunc (optab2, compute_mode))
4186 break;
4188 /* If we still couldn't find a mode, use MODE, but expand_binop will
4189 probably die. */
4190 if (compute_mode == VOIDmode)
4191 compute_mode = mode;
4193 if (target && GET_MODE (target) == compute_mode)
4194 tquotient = target;
4195 else
4196 tquotient = gen_reg_rtx (compute_mode);
4198 #if 0
4199 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4200 (mode), and thereby get better code when OP1 is a constant. Do that
4201 later. It will require going over all usages of SIZE below. */
4202 size = GET_MODE_BITSIZE (mode);
4203 #endif
4205 /* Only deduct something for a REM if the last divide done was
4206 for a different constant. Then set the constant of the last
4207 divide. */
4208 max_cost = (unsignedp
4209 ? udiv_cost (speed, compute_mode)
4210 : sdiv_cost (speed, compute_mode));
4211 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4212 && INTVAL (op1) == last_div_const))
4213 max_cost -= (mul_cost (speed, compute_mode)
4214 + add_cost (speed, compute_mode));
4216 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4218 /* Now convert to the best mode to use. */
4219 if (compute_mode != mode)
4221 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4222 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4224 /* convert_modes may have placed op1 into a register, so we
4225 must recompute the following. */
4226 op1_is_constant = CONST_INT_P (op1);
4227 if (op1_is_constant)
4229 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4230 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4231 || (! unsignedp
4232 && wi::popcount (wi::neg (ext_op1)) == 1));
4234 else
4235 op1_is_pow2 = 0;
4238 /* If one of the operands is a volatile MEM, copy it into a register. */
4240 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4241 op0 = force_reg (compute_mode, op0);
4242 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4243 op1 = force_reg (compute_mode, op1);
4245 /* If we need the remainder or if OP1 is constant, we need to
4246 put OP0 in a register in case it has any queued subexpressions. */
4247 if (rem_flag || op1_is_constant)
4248 op0 = force_reg (compute_mode, op0);
4250 last = get_last_insn ();
4252 /* Promote floor rounding to trunc rounding for unsigned operations. */
4253 if (unsignedp)
4255 if (code == FLOOR_DIV_EXPR)
4256 code = TRUNC_DIV_EXPR;
4257 if (code == FLOOR_MOD_EXPR)
4258 code = TRUNC_MOD_EXPR;
4259 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4260 code = TRUNC_DIV_EXPR;
4263 if (op1 != const0_rtx)
4264 switch (code)
4266 case TRUNC_MOD_EXPR:
4267 case TRUNC_DIV_EXPR:
4268 if (op1_is_constant)
4270 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4271 int size = GET_MODE_BITSIZE (int_mode);
4272 if (unsignedp)
4274 unsigned HOST_WIDE_INT mh, ml;
4275 int pre_shift, post_shift;
4276 int dummy;
4277 wide_int wd = rtx_mode_t (op1, int_mode);
4278 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4280 if (wi::popcount (wd) == 1)
4282 pre_shift = floor_log2 (d);
4283 if (rem_flag)
4285 unsigned HOST_WIDE_INT mask
4286 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4287 remainder
4288 = expand_binop (int_mode, and_optab, op0,
4289 gen_int_mode (mask, int_mode),
4290 remainder, 1,
4291 OPTAB_LIB_WIDEN);
4292 if (remainder)
4293 return gen_lowpart (mode, remainder);
4295 quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4296 pre_shift, tquotient, 1);
4298 else if (size <= HOST_BITS_PER_WIDE_INT)
4300 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4302 /* Most significant bit of divisor is set; emit an scc
4303 insn. */
4304 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4305 int_mode, 1, 1);
4307 else
4309 /* Find a suitable multiplier and right shift count
4310 instead of multiplying with D. */
4312 mh = choose_multiplier (d, size, size,
4313 &ml, &post_shift, &dummy);
4315 /* If the suggested multiplier is more than SIZE bits,
4316 we can do better for even divisors, using an
4317 initial right shift. */
4318 if (mh != 0 && (d & 1) == 0)
4320 pre_shift = ctz_or_zero (d);
4321 mh = choose_multiplier (d >> pre_shift, size,
4322 size - pre_shift,
4323 &ml, &post_shift, &dummy);
4324 gcc_assert (!mh);
4326 else
4327 pre_shift = 0;
4329 if (mh != 0)
4331 rtx t1, t2, t3, t4;
4333 if (post_shift - 1 >= BITS_PER_WORD)
4334 goto fail1;
4336 extra_cost
4337 = (shift_cost (speed, int_mode, post_shift - 1)
4338 + shift_cost (speed, int_mode, 1)
4339 + 2 * add_cost (speed, int_mode));
4340 t1 = expmed_mult_highpart
4341 (int_mode, op0, gen_int_mode (ml, int_mode),
4342 NULL_RTX, 1, max_cost - extra_cost);
4343 if (t1 == 0)
4344 goto fail1;
4345 t2 = force_operand (gen_rtx_MINUS (int_mode,
4346 op0, t1),
4347 NULL_RTX);
4348 t3 = expand_shift (RSHIFT_EXPR, int_mode,
4349 t2, 1, NULL_RTX, 1);
4350 t4 = force_operand (gen_rtx_PLUS (int_mode,
4351 t1, t3),
4352 NULL_RTX);
4353 quotient = expand_shift
4354 (RSHIFT_EXPR, int_mode, t4,
4355 post_shift - 1, tquotient, 1);
4357 else
4359 rtx t1, t2;
4361 if (pre_shift >= BITS_PER_WORD
4362 || post_shift >= BITS_PER_WORD)
4363 goto fail1;
4365 t1 = expand_shift
4366 (RSHIFT_EXPR, int_mode, op0,
4367 pre_shift, NULL_RTX, 1);
4368 extra_cost
4369 = (shift_cost (speed, int_mode, pre_shift)
4370 + shift_cost (speed, int_mode, post_shift));
4371 t2 = expmed_mult_highpart
4372 (int_mode, t1,
4373 gen_int_mode (ml, int_mode),
4374 NULL_RTX, 1, max_cost - extra_cost);
4375 if (t2 == 0)
4376 goto fail1;
4377 quotient = expand_shift
4378 (RSHIFT_EXPR, int_mode, t2,
4379 post_shift, tquotient, 1);
4383 else /* Too wide mode to use tricky code */
4384 break;
4386 insn = get_last_insn ();
4387 if (insn != last)
4388 set_dst_reg_note (insn, REG_EQUAL,
4389 gen_rtx_UDIV (int_mode, op0, op1),
4390 quotient);
4392 else /* TRUNC_DIV, signed */
4394 unsigned HOST_WIDE_INT ml;
4395 int lgup, post_shift;
4396 rtx mlr;
4397 HOST_WIDE_INT d = INTVAL (op1);
4398 unsigned HOST_WIDE_INT abs_d;
4400 /* Since d might be INT_MIN, we have to cast to
4401 unsigned HOST_WIDE_INT before negating to avoid
4402 undefined signed overflow. */
4403 abs_d = (d >= 0
4404 ? (unsigned HOST_WIDE_INT) d
4405 : - (unsigned HOST_WIDE_INT) d);
4407 /* n rem d = n rem -d */
4408 if (rem_flag && d < 0)
4410 d = abs_d;
4411 op1 = gen_int_mode (abs_d, int_mode);
4414 if (d == 1)
4415 quotient = op0;
4416 else if (d == -1)
4417 quotient = expand_unop (int_mode, neg_optab, op0,
4418 tquotient, 0);
4419 else if (size <= HOST_BITS_PER_WIDE_INT
4420 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4422 /* This case is not handled correctly below. */
4423 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4424 int_mode, 1, 1);
4425 if (quotient == 0)
4426 goto fail1;
4428 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4429 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4430 && (rem_flag
4431 ? smod_pow2_cheap (speed, int_mode)
4432 : sdiv_pow2_cheap (speed, int_mode))
4433 /* We assume that cheap metric is true if the
4434 optab has an expander for this mode. */
4435 && ((optab_handler ((rem_flag ? smod_optab
4436 : sdiv_optab),
4437 int_mode)
4438 != CODE_FOR_nothing)
4439 || (optab_handler (sdivmod_optab, int_mode)
4440 != CODE_FOR_nothing)))
4442 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4443 && (size <= HOST_BITS_PER_WIDE_INT
4444 || abs_d != (unsigned HOST_WIDE_INT) d))
4446 if (rem_flag)
4448 remainder = expand_smod_pow2 (int_mode, op0, d);
4449 if (remainder)
4450 return gen_lowpart (mode, remainder);
4453 if (sdiv_pow2_cheap (speed, int_mode)
4454 && ((optab_handler (sdiv_optab, int_mode)
4455 != CODE_FOR_nothing)
4456 || (optab_handler (sdivmod_optab, int_mode)
4457 != CODE_FOR_nothing)))
4458 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4459 int_mode, op0,
4460 gen_int_mode (abs_d,
4461 int_mode),
4462 NULL_RTX, 0);
4463 else
4464 quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4466 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4467 negate the quotient. */
4468 if (d < 0)
4470 insn = get_last_insn ();
4471 if (insn != last
4472 && abs_d < (HOST_WIDE_INT_1U
4473 << (HOST_BITS_PER_WIDE_INT - 1)))
4474 set_dst_reg_note (insn, REG_EQUAL,
4475 gen_rtx_DIV (int_mode, op0,
4476 gen_int_mode
4477 (abs_d,
4478 int_mode)),
4479 quotient);
4481 quotient = expand_unop (int_mode, neg_optab,
4482 quotient, quotient, 0);
4485 else if (size <= HOST_BITS_PER_WIDE_INT)
4487 choose_multiplier (abs_d, size, size - 1,
4488 &ml, &post_shift, &lgup);
4489 if (ml < HOST_WIDE_INT_1U << (size - 1))
4491 rtx t1, t2, t3;
4493 if (post_shift >= BITS_PER_WORD
4494 || size - 1 >= BITS_PER_WORD)
4495 goto fail1;
4497 extra_cost = (shift_cost (speed, int_mode, post_shift)
4498 + shift_cost (speed, int_mode, size - 1)
4499 + add_cost (speed, int_mode));
4500 t1 = expmed_mult_highpart
4501 (int_mode, op0, gen_int_mode (ml, int_mode),
4502 NULL_RTX, 0, max_cost - extra_cost);
4503 if (t1 == 0)
4504 goto fail1;
4505 t2 = expand_shift
4506 (RSHIFT_EXPR, int_mode, t1,
4507 post_shift, NULL_RTX, 0);
4508 t3 = expand_shift
4509 (RSHIFT_EXPR, int_mode, op0,
4510 size - 1, NULL_RTX, 0);
4511 if (d < 0)
4512 quotient
4513 = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4514 tquotient);
4515 else
4516 quotient
4517 = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4518 tquotient);
4520 else
4522 rtx t1, t2, t3, t4;
4524 if (post_shift >= BITS_PER_WORD
4525 || size - 1 >= BITS_PER_WORD)
4526 goto fail1;
4528 ml |= HOST_WIDE_INT_M1U << (size - 1);
4529 mlr = gen_int_mode (ml, int_mode);
4530 extra_cost = (shift_cost (speed, int_mode, post_shift)
4531 + shift_cost (speed, int_mode, size - 1)
4532 + 2 * add_cost (speed, int_mode));
4533 t1 = expmed_mult_highpart (int_mode, op0, mlr,
4534 NULL_RTX, 0,
4535 max_cost - extra_cost);
4536 if (t1 == 0)
4537 goto fail1;
4538 t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4539 NULL_RTX);
4540 t3 = expand_shift
4541 (RSHIFT_EXPR, int_mode, t2,
4542 post_shift, NULL_RTX, 0);
4543 t4 = expand_shift
4544 (RSHIFT_EXPR, int_mode, op0,
4545 size - 1, NULL_RTX, 0);
4546 if (d < 0)
4547 quotient
4548 = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4549 tquotient);
4550 else
4551 quotient
4552 = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4553 tquotient);
4556 else /* Too wide mode to use tricky code */
4557 break;
4559 insn = get_last_insn ();
4560 if (insn != last)
4561 set_dst_reg_note (insn, REG_EQUAL,
4562 gen_rtx_DIV (int_mode, op0, op1),
4563 quotient);
4565 break;
4567 fail1:
4568 delete_insns_since (last);
4569 break;
4571 case FLOOR_DIV_EXPR:
4572 case FLOOR_MOD_EXPR:
4573 /* We will come here only for signed operations. */
4574 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4576 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4577 int size = GET_MODE_BITSIZE (int_mode);
4578 unsigned HOST_WIDE_INT mh, ml;
4579 int pre_shift, lgup, post_shift;
4580 HOST_WIDE_INT d = INTVAL (op1);
4582 if (d > 0)
4584 /* We could just as easily deal with negative constants here,
4585 but it does not seem worth the trouble for GCC 2.6. */
4586 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4588 pre_shift = floor_log2 (d);
4589 if (rem_flag)
4591 unsigned HOST_WIDE_INT mask
4592 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4593 remainder = expand_binop
4594 (int_mode, and_optab, op0,
4595 gen_int_mode (mask, int_mode),
4596 remainder, 0, OPTAB_LIB_WIDEN);
4597 if (remainder)
4598 return gen_lowpart (mode, remainder);
4600 quotient = expand_shift
4601 (RSHIFT_EXPR, int_mode, op0,
4602 pre_shift, tquotient, 0);
4604 else
4606 rtx t1, t2, t3, t4;
4608 mh = choose_multiplier (d, size, size - 1,
4609 &ml, &post_shift, &lgup);
4610 gcc_assert (!mh);
4612 if (post_shift < BITS_PER_WORD
4613 && size - 1 < BITS_PER_WORD)
4615 t1 = expand_shift
4616 (RSHIFT_EXPR, int_mode, op0,
4617 size - 1, NULL_RTX, 0);
4618 t2 = expand_binop (int_mode, xor_optab, op0, t1,
4619 NULL_RTX, 0, OPTAB_WIDEN);
4620 extra_cost = (shift_cost (speed, int_mode, post_shift)
4621 + shift_cost (speed, int_mode, size - 1)
4622 + 2 * add_cost (speed, int_mode));
4623 t3 = expmed_mult_highpart
4624 (int_mode, t2, gen_int_mode (ml, int_mode),
4625 NULL_RTX, 1, max_cost - extra_cost);
4626 if (t3 != 0)
4628 t4 = expand_shift
4629 (RSHIFT_EXPR, int_mode, t3,
4630 post_shift, NULL_RTX, 1);
4631 quotient = expand_binop (int_mode, xor_optab,
4632 t4, t1, tquotient, 0,
4633 OPTAB_WIDEN);
4638 else
4640 rtx nsign, t1, t2, t3, t4;
4641 t1 = force_operand (gen_rtx_PLUS (int_mode,
4642 op0, constm1_rtx), NULL_RTX);
4643 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4644 0, OPTAB_WIDEN);
4645 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4646 size - 1, NULL_RTX, 0);
4647 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4648 NULL_RTX);
4649 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4650 NULL_RTX, 0);
4651 if (t4)
4653 rtx t5;
4654 t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4655 NULL_RTX, 0);
4656 quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4657 tquotient);
4662 if (quotient != 0)
4663 break;
4664 delete_insns_since (last);
4666 /* Try using an instruction that produces both the quotient and
4667 remainder, using truncation. We can easily compensate the quotient
4668 or remainder to get floor rounding, once we have the remainder.
4669 Notice that we compute also the final remainder value here,
4670 and return the result right away. */
4671 if (target == 0 || GET_MODE (target) != compute_mode)
4672 target = gen_reg_rtx (compute_mode);
4674 if (rem_flag)
4676 remainder
4677 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4678 quotient = gen_reg_rtx (compute_mode);
4680 else
4682 quotient
4683 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4684 remainder = gen_reg_rtx (compute_mode);
4687 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4688 quotient, remainder, 0))
4690 /* This could be computed with a branch-less sequence.
4691 Save that for later. */
4692 rtx tem;
4693 rtx_code_label *label = gen_label_rtx ();
4694 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4695 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4696 NULL_RTX, 0, OPTAB_WIDEN);
4697 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4698 expand_dec (quotient, const1_rtx);
4699 expand_inc (remainder, op1);
4700 emit_label (label);
4701 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4704 /* No luck with division elimination or divmod. Have to do it
4705 by conditionally adjusting op0 *and* the result. */
4707 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4708 rtx adjusted_op0;
4709 rtx tem;
4711 quotient = gen_reg_rtx (compute_mode);
4712 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4713 label1 = gen_label_rtx ();
4714 label2 = gen_label_rtx ();
4715 label3 = gen_label_rtx ();
4716 label4 = gen_label_rtx ();
4717 label5 = gen_label_rtx ();
4718 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4719 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4720 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4721 quotient, 0, OPTAB_LIB_WIDEN);
4722 if (tem != quotient)
4723 emit_move_insn (quotient, tem);
4724 emit_jump_insn (targetm.gen_jump (label5));
4725 emit_barrier ();
4726 emit_label (label1);
4727 expand_inc (adjusted_op0, const1_rtx);
4728 emit_jump_insn (targetm.gen_jump (label4));
4729 emit_barrier ();
4730 emit_label (label2);
4731 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4732 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4733 quotient, 0, OPTAB_LIB_WIDEN);
4734 if (tem != quotient)
4735 emit_move_insn (quotient, tem);
4736 emit_jump_insn (targetm.gen_jump (label5));
4737 emit_barrier ();
4738 emit_label (label3);
4739 expand_dec (adjusted_op0, const1_rtx);
4740 emit_label (label4);
4741 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4742 quotient, 0, OPTAB_LIB_WIDEN);
4743 if (tem != quotient)
4744 emit_move_insn (quotient, tem);
4745 expand_dec (quotient, const1_rtx);
4746 emit_label (label5);
4748 break;
4750 case CEIL_DIV_EXPR:
4751 case CEIL_MOD_EXPR:
4752 if (unsignedp)
4754 if (op1_is_constant
4755 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4756 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4757 || INTVAL (op1) >= 0))
4759 scalar_int_mode int_mode
4760 = as_a <scalar_int_mode> (compute_mode);
4761 rtx t1, t2, t3;
4762 unsigned HOST_WIDE_INT d = INTVAL (op1);
4763 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4764 floor_log2 (d), tquotient, 1);
4765 t2 = expand_binop (int_mode, and_optab, op0,
4766 gen_int_mode (d - 1, int_mode),
4767 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4768 t3 = gen_reg_rtx (int_mode);
4769 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4770 if (t3 == 0)
4772 rtx_code_label *lab;
4773 lab = gen_label_rtx ();
4774 do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4775 expand_inc (t1, const1_rtx);
4776 emit_label (lab);
4777 quotient = t1;
4779 else
4780 quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4781 tquotient);
4782 break;
4785 /* Try using an instruction that produces both the quotient and
4786 remainder, using truncation. We can easily compensate the
4787 quotient or remainder to get ceiling rounding, once we have the
4788 remainder. Notice that we compute also the final remainder
4789 value here, and return the result right away. */
4790 if (target == 0 || GET_MODE (target) != compute_mode)
4791 target = gen_reg_rtx (compute_mode);
4793 if (rem_flag)
4795 remainder = (REG_P (target)
4796 ? target : gen_reg_rtx (compute_mode));
4797 quotient = gen_reg_rtx (compute_mode);
4799 else
4801 quotient = (REG_P (target)
4802 ? target : gen_reg_rtx (compute_mode));
4803 remainder = gen_reg_rtx (compute_mode);
4806 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4807 remainder, 1))
4809 /* This could be computed with a branch-less sequence.
4810 Save that for later. */
4811 rtx_code_label *label = gen_label_rtx ();
4812 do_cmp_and_jump (remainder, const0_rtx, EQ,
4813 compute_mode, label);
4814 expand_inc (quotient, const1_rtx);
4815 expand_dec (remainder, op1);
4816 emit_label (label);
4817 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4820 /* No luck with division elimination or divmod. Have to do it
4821 by conditionally adjusting op0 *and* the result. */
4823 rtx_code_label *label1, *label2;
4824 rtx adjusted_op0, tem;
4826 quotient = gen_reg_rtx (compute_mode);
4827 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4828 label1 = gen_label_rtx ();
4829 label2 = gen_label_rtx ();
4830 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4831 compute_mode, label1);
4832 emit_move_insn (quotient, const0_rtx);
4833 emit_jump_insn (targetm.gen_jump (label2));
4834 emit_barrier ();
4835 emit_label (label1);
4836 expand_dec (adjusted_op0, const1_rtx);
4837 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4838 quotient, 1, OPTAB_LIB_WIDEN);
4839 if (tem != quotient)
4840 emit_move_insn (quotient, tem);
4841 expand_inc (quotient, const1_rtx);
4842 emit_label (label2);
4845 else /* signed */
4847 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4848 && INTVAL (op1) >= 0)
4850 /* This is extremely similar to the code for the unsigned case
4851 above. For 2.7 we should merge these variants, but for
4852 2.6.1 I don't want to touch the code for unsigned since that
4853 get used in C. The signed case will only be used by other
4854 languages (Ada). */
4856 rtx t1, t2, t3;
4857 unsigned HOST_WIDE_INT d = INTVAL (op1);
4858 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4859 floor_log2 (d), tquotient, 0);
4860 t2 = expand_binop (compute_mode, and_optab, op0,
4861 gen_int_mode (d - 1, compute_mode),
4862 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4863 t3 = gen_reg_rtx (compute_mode);
4864 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4865 compute_mode, 1, 1);
4866 if (t3 == 0)
4868 rtx_code_label *lab;
4869 lab = gen_label_rtx ();
4870 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4871 expand_inc (t1, const1_rtx);
4872 emit_label (lab);
4873 quotient = t1;
4875 else
4876 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4877 t1, t3),
4878 tquotient);
4879 break;
4882 /* Try using an instruction that produces both the quotient and
4883 remainder, using truncation. We can easily compensate the
4884 quotient or remainder to get ceiling rounding, once we have the
4885 remainder. Notice that we compute also the final remainder
4886 value here, and return the result right away. */
4887 if (target == 0 || GET_MODE (target) != compute_mode)
4888 target = gen_reg_rtx (compute_mode);
4889 if (rem_flag)
4891 remainder= (REG_P (target)
4892 ? target : gen_reg_rtx (compute_mode));
4893 quotient = gen_reg_rtx (compute_mode);
4895 else
4897 quotient = (REG_P (target)
4898 ? target : gen_reg_rtx (compute_mode));
4899 remainder = gen_reg_rtx (compute_mode);
4902 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4903 remainder, 0))
4905 /* This could be computed with a branch-less sequence.
4906 Save that for later. */
4907 rtx tem;
4908 rtx_code_label *label = gen_label_rtx ();
4909 do_cmp_and_jump (remainder, const0_rtx, EQ,
4910 compute_mode, label);
4911 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4912 NULL_RTX, 0, OPTAB_WIDEN);
4913 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4914 expand_inc (quotient, const1_rtx);
4915 expand_dec (remainder, op1);
4916 emit_label (label);
4917 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4920 /* No luck with division elimination or divmod. Have to do it
4921 by conditionally adjusting op0 *and* the result. */
4923 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4924 rtx adjusted_op0;
4925 rtx tem;
4927 quotient = gen_reg_rtx (compute_mode);
4928 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4929 label1 = gen_label_rtx ();
4930 label2 = gen_label_rtx ();
4931 label3 = gen_label_rtx ();
4932 label4 = gen_label_rtx ();
4933 label5 = gen_label_rtx ();
4934 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4935 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4936 compute_mode, label1);
4937 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4938 quotient, 0, OPTAB_LIB_WIDEN);
4939 if (tem != quotient)
4940 emit_move_insn (quotient, tem);
4941 emit_jump_insn (targetm.gen_jump (label5));
4942 emit_barrier ();
4943 emit_label (label1);
4944 expand_dec (adjusted_op0, const1_rtx);
4945 emit_jump_insn (targetm.gen_jump (label4));
4946 emit_barrier ();
4947 emit_label (label2);
4948 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4949 compute_mode, label3);
4950 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4951 quotient, 0, OPTAB_LIB_WIDEN);
4952 if (tem != quotient)
4953 emit_move_insn (quotient, tem);
4954 emit_jump_insn (targetm.gen_jump (label5));
4955 emit_barrier ();
4956 emit_label (label3);
4957 expand_inc (adjusted_op0, const1_rtx);
4958 emit_label (label4);
4959 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4960 quotient, 0, OPTAB_LIB_WIDEN);
4961 if (tem != quotient)
4962 emit_move_insn (quotient, tem);
4963 expand_inc (quotient, const1_rtx);
4964 emit_label (label5);
4967 break;
4969 case EXACT_DIV_EXPR:
4970 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4972 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4973 int size = GET_MODE_BITSIZE (int_mode);
4974 HOST_WIDE_INT d = INTVAL (op1);
4975 unsigned HOST_WIDE_INT ml;
4976 int pre_shift;
4977 rtx t1;
4979 pre_shift = ctz_or_zero (d);
4980 ml = invert_mod2n (d >> pre_shift, size);
4981 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4982 pre_shift, NULL_RTX, unsignedp);
4983 quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
4984 NULL_RTX, 1);
4986 insn = get_last_insn ();
4987 set_dst_reg_note (insn, REG_EQUAL,
4988 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4989 int_mode, op0, op1),
4990 quotient);
4992 break;
4994 case ROUND_DIV_EXPR:
4995 case ROUND_MOD_EXPR:
4996 if (unsignedp)
4998 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4999 rtx tem;
5000 rtx_code_label *label;
5001 label = gen_label_rtx ();
5002 quotient = gen_reg_rtx (int_mode);
5003 remainder = gen_reg_rtx (int_mode);
5004 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5006 rtx tem;
5007 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5008 quotient, 1, OPTAB_LIB_WIDEN);
5009 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5010 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5011 remainder, 1, OPTAB_LIB_WIDEN);
5013 tem = plus_constant (int_mode, op1, -1);
5014 tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5015 do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5016 expand_inc (quotient, const1_rtx);
5017 expand_dec (remainder, op1);
5018 emit_label (label);
5020 else
5022 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5023 int size = GET_MODE_BITSIZE (int_mode);
5024 rtx abs_rem, abs_op1, tem, mask;
5025 rtx_code_label *label;
5026 label = gen_label_rtx ();
5027 quotient = gen_reg_rtx (int_mode);
5028 remainder = gen_reg_rtx (int_mode);
5029 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5031 rtx tem;
5032 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5033 quotient, 0, OPTAB_LIB_WIDEN);
5034 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5035 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5036 remainder, 0, OPTAB_LIB_WIDEN);
5038 abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5039 abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5040 tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5041 1, NULL_RTX, 1);
5042 do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5043 tem = expand_binop (int_mode, xor_optab, op0, op1,
5044 NULL_RTX, 0, OPTAB_WIDEN);
5045 mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5046 size - 1, NULL_RTX, 0);
5047 tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5048 NULL_RTX, 0, OPTAB_WIDEN);
5049 tem = expand_binop (int_mode, sub_optab, tem, mask,
5050 NULL_RTX, 0, OPTAB_WIDEN);
5051 expand_inc (quotient, tem);
5052 tem = expand_binop (int_mode, xor_optab, mask, op1,
5053 NULL_RTX, 0, OPTAB_WIDEN);
5054 tem = expand_binop (int_mode, sub_optab, tem, mask,
5055 NULL_RTX, 0, OPTAB_WIDEN);
5056 expand_dec (remainder, tem);
5057 emit_label (label);
5059 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5061 default:
5062 gcc_unreachable ();
5065 if (quotient == 0)
5067 if (target && GET_MODE (target) != compute_mode)
5068 target = 0;
5070 if (rem_flag)
5072 /* Try to produce the remainder without producing the quotient.
5073 If we seem to have a divmod pattern that does not require widening,
5074 don't try widening here. We should really have a WIDEN argument
5075 to expand_twoval_binop, since what we'd really like to do here is
5076 1) try a mod insn in compute_mode
5077 2) try a divmod insn in compute_mode
5078 3) try a div insn in compute_mode and multiply-subtract to get
5079 remainder
5080 4) try the same things with widening allowed. */
5081 remainder
5082 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5083 op0, op1, target,
5084 unsignedp,
5085 ((optab_handler (optab2, compute_mode)
5086 != CODE_FOR_nothing)
5087 ? OPTAB_DIRECT : OPTAB_WIDEN));
5088 if (remainder == 0)
5090 /* No luck there. Can we do remainder and divide at once
5091 without a library call? */
5092 remainder = gen_reg_rtx (compute_mode);
5093 if (! expand_twoval_binop ((unsignedp
5094 ? udivmod_optab
5095 : sdivmod_optab),
5096 op0, op1,
5097 NULL_RTX, remainder, unsignedp))
5098 remainder = 0;
5101 if (remainder)
5102 return gen_lowpart (mode, remainder);
5105 /* Produce the quotient. Try a quotient insn, but not a library call.
5106 If we have a divmod in this mode, use it in preference to widening
5107 the div (for this test we assume it will not fail). Note that optab2
5108 is set to the one of the two optabs that the call below will use. */
5109 quotient
5110 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5111 op0, op1, rem_flag ? NULL_RTX : target,
5112 unsignedp,
5113 ((optab_handler (optab2, compute_mode)
5114 != CODE_FOR_nothing)
5115 ? OPTAB_DIRECT : OPTAB_WIDEN));
5117 if (quotient == 0)
5119 /* No luck there. Try a quotient-and-remainder insn,
5120 keeping the quotient alone. */
5121 quotient = gen_reg_rtx (compute_mode);
5122 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5123 op0, op1,
5124 quotient, NULL_RTX, unsignedp))
5126 quotient = 0;
5127 if (! rem_flag)
5128 /* Still no luck. If we are not computing the remainder,
5129 use a library call for the quotient. */
5130 quotient = sign_expand_binop (compute_mode,
5131 udiv_optab, sdiv_optab,
5132 op0, op1, target,
5133 unsignedp, OPTAB_LIB_WIDEN);
5138 if (rem_flag)
5140 if (target && GET_MODE (target) != compute_mode)
5141 target = 0;
5143 if (quotient == 0)
5145 /* No divide instruction either. Use library for remainder. */
5146 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5147 op0, op1, target,
5148 unsignedp, OPTAB_LIB_WIDEN);
5149 /* No remainder function. Try a quotient-and-remainder
5150 function, keeping the remainder. */
5151 if (!remainder)
5153 remainder = gen_reg_rtx (compute_mode);
5154 if (!expand_twoval_binop_libfunc
5155 (unsignedp ? udivmod_optab : sdivmod_optab,
5156 op0, op1,
5157 NULL_RTX, remainder,
5158 unsignedp ? UMOD : MOD))
5159 remainder = NULL_RTX;
5162 else
5164 /* We divided. Now finish doing X - Y * (X / Y). */
5165 remainder = expand_mult (compute_mode, quotient, op1,
5166 NULL_RTX, unsignedp);
5167 remainder = expand_binop (compute_mode, sub_optab, op0,
5168 remainder, target, unsignedp,
5169 OPTAB_LIB_WIDEN);
5173 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5176 /* Return a tree node with data type TYPE, describing the value of X.
5177 Usually this is an VAR_DECL, if there is no obvious better choice.
5178 X may be an expression, however we only support those expressions
5179 generated by loop.c. */
5181 tree
5182 make_tree (tree type, rtx x)
5184 tree t;
5186 switch (GET_CODE (x))
5188 case CONST_INT:
5189 case CONST_WIDE_INT:
5190 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5191 return t;
5193 case CONST_DOUBLE:
5194 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5195 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5196 t = wide_int_to_tree (type,
5197 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5198 HOST_BITS_PER_WIDE_INT * 2));
5199 else
5200 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5202 return t;
5204 case CONST_VECTOR:
5206 int units = CONST_VECTOR_NUNITS (x);
5207 tree itype = TREE_TYPE (type);
5208 tree *elts;
5209 int i;
5211 /* Build a tree with vector elements. */
5212 elts = XALLOCAVEC (tree, units);
5213 for (i = units - 1; i >= 0; --i)
5215 rtx elt = CONST_VECTOR_ELT (x, i);
5216 elts[i] = make_tree (itype, elt);
5219 return build_vector (type, elts);
5222 case PLUS:
5223 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5224 make_tree (type, XEXP (x, 1)));
5226 case MINUS:
5227 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5228 make_tree (type, XEXP (x, 1)));
5230 case NEG:
5231 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5233 case MULT:
5234 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5235 make_tree (type, XEXP (x, 1)));
5237 case ASHIFT:
5238 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5239 make_tree (type, XEXP (x, 1)));
5241 case LSHIFTRT:
5242 t = unsigned_type_for (type);
5243 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5244 make_tree (t, XEXP (x, 0)),
5245 make_tree (type, XEXP (x, 1))));
5247 case ASHIFTRT:
5248 t = signed_type_for (type);
5249 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5250 make_tree (t, XEXP (x, 0)),
5251 make_tree (type, XEXP (x, 1))));
5253 case DIV:
5254 if (TREE_CODE (type) != REAL_TYPE)
5255 t = signed_type_for (type);
5256 else
5257 t = type;
5259 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5260 make_tree (t, XEXP (x, 0)),
5261 make_tree (t, XEXP (x, 1))));
5262 case UDIV:
5263 t = unsigned_type_for (type);
5264 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5265 make_tree (t, XEXP (x, 0)),
5266 make_tree (t, XEXP (x, 1))));
5268 case SIGN_EXTEND:
5269 case ZERO_EXTEND:
5270 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5271 GET_CODE (x) == ZERO_EXTEND);
5272 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5274 case CONST:
5275 return make_tree (type, XEXP (x, 0));
5277 case SYMBOL_REF:
5278 t = SYMBOL_REF_DECL (x);
5279 if (t)
5280 return fold_convert (type, build_fold_addr_expr (t));
5281 /* fall through. */
5283 default:
5284 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5286 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5287 address mode to pointer mode. */
5288 if (POINTER_TYPE_P (type))
5289 x = convert_memory_address_addr_space
5290 (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5292 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5293 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5294 t->decl_with_rtl.rtl = x;
5296 return t;
5300 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5301 and returning TARGET.
5303 If TARGET is 0, a pseudo-register or constant is returned. */
5306 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5308 rtx tem = 0;
5310 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5311 tem = simplify_binary_operation (AND, mode, op0, op1);
5312 if (tem == 0)
5313 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5315 if (target == 0)
5316 target = tem;
5317 else if (tem != target)
5318 emit_move_insn (target, tem);
5319 return target;
5322 /* Helper function for emit_store_flag. */
5324 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5325 machine_mode mode, machine_mode compare_mode,
5326 int unsignedp, rtx x, rtx y, int normalizep,
5327 machine_mode target_mode)
5329 struct expand_operand ops[4];
5330 rtx op0, comparison, subtarget;
5331 rtx_insn *last;
5332 scalar_int_mode result_mode = targetm.cstore_mode (icode);
5333 scalar_int_mode int_target_mode;
5335 last = get_last_insn ();
5336 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5337 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5338 if (!x || !y)
5340 delete_insns_since (last);
5341 return NULL_RTX;
5344 if (target_mode == VOIDmode)
5345 int_target_mode = result_mode;
5346 else
5347 int_target_mode = as_a <scalar_int_mode> (target_mode);
5348 if (!target)
5349 target = gen_reg_rtx (int_target_mode);
5351 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5353 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5354 create_fixed_operand (&ops[1], comparison);
5355 create_fixed_operand (&ops[2], x);
5356 create_fixed_operand (&ops[3], y);
5357 if (!maybe_expand_insn (icode, 4, ops))
5359 delete_insns_since (last);
5360 return NULL_RTX;
5362 subtarget = ops[0].value;
5364 /* If we are converting to a wider mode, first convert to
5365 INT_TARGET_MODE, then normalize. This produces better combining
5366 opportunities on machines that have a SIGN_EXTRACT when we are
5367 testing a single bit. This mostly benefits the 68k.
5369 If STORE_FLAG_VALUE does not have the sign bit set when
5370 interpreted in MODE, we can do this conversion as unsigned, which
5371 is usually more efficient. */
5372 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
5374 convert_move (target, subtarget,
5375 val_signbit_known_clear_p (result_mode,
5376 STORE_FLAG_VALUE));
5377 op0 = target;
5378 result_mode = int_target_mode;
5380 else
5381 op0 = subtarget;
5383 /* If we want to keep subexpressions around, don't reuse our last
5384 target. */
5385 if (optimize)
5386 subtarget = 0;
5388 /* Now normalize to the proper value in MODE. Sometimes we don't
5389 have to do anything. */
5390 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5392 /* STORE_FLAG_VALUE might be the most negative number, so write
5393 the comparison this way to avoid a compiler-time warning. */
5394 else if (- normalizep == STORE_FLAG_VALUE)
5395 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5397 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5398 it hard to use a value of just the sign bit due to ANSI integer
5399 constant typing rules. */
5400 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5401 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5402 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5403 normalizep == 1);
5404 else
5406 gcc_assert (STORE_FLAG_VALUE & 1);
5408 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5409 if (normalizep == -1)
5410 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5413 /* If we were converting to a smaller mode, do the conversion now. */
5414 if (int_target_mode != result_mode)
5416 convert_move (target, op0, 0);
5417 return target;
5419 else
5420 return op0;
5424 /* A subroutine of emit_store_flag only including "tricks" that do not
5425 need a recursive call. These are kept separate to avoid infinite
5426 loops. */
5428 static rtx
5429 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5430 machine_mode mode, int unsignedp, int normalizep,
5431 machine_mode target_mode)
5433 rtx subtarget;
5434 enum insn_code icode;
5435 machine_mode compare_mode;
5436 enum mode_class mclass;
5437 enum rtx_code scode;
5439 if (unsignedp)
5440 code = unsigned_condition (code);
5441 scode = swap_condition (code);
5443 /* If one operand is constant, make it the second one. Only do this
5444 if the other operand is not constant as well. */
5446 if (swap_commutative_operands_p (op0, op1))
5448 std::swap (op0, op1);
5449 code = swap_condition (code);
5452 if (mode == VOIDmode)
5453 mode = GET_MODE (op0);
5455 /* For some comparisons with 1 and -1, we can convert this to
5456 comparisons with zero. This will often produce more opportunities for
5457 store-flag insns. */
5459 switch (code)
5461 case LT:
5462 if (op1 == const1_rtx)
5463 op1 = const0_rtx, code = LE;
5464 break;
5465 case LE:
5466 if (op1 == constm1_rtx)
5467 op1 = const0_rtx, code = LT;
5468 break;
5469 case GE:
5470 if (op1 == const1_rtx)
5471 op1 = const0_rtx, code = GT;
5472 break;
5473 case GT:
5474 if (op1 == constm1_rtx)
5475 op1 = const0_rtx, code = GE;
5476 break;
5477 case GEU:
5478 if (op1 == const1_rtx)
5479 op1 = const0_rtx, code = NE;
5480 break;
5481 case LTU:
5482 if (op1 == const1_rtx)
5483 op1 = const0_rtx, code = EQ;
5484 break;
5485 default:
5486 break;
5489 /* If we are comparing a double-word integer with zero or -1, we can
5490 convert the comparison into one involving a single word. */
5491 scalar_int_mode int_mode;
5492 if (is_int_mode (mode, &int_mode)
5493 && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5494 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5496 rtx tem;
5497 if ((code == EQ || code == NE)
5498 && (op1 == const0_rtx || op1 == constm1_rtx))
5500 rtx op00, op01;
5502 /* Do a logical OR or AND of the two words and compare the
5503 result. */
5504 op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5505 op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5506 tem = expand_binop (word_mode,
5507 op1 == const0_rtx ? ior_optab : and_optab,
5508 op00, op01, NULL_RTX, unsignedp,
5509 OPTAB_DIRECT);
5511 if (tem != 0)
5512 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5513 unsignedp, normalizep);
5515 else if ((code == LT || code == GE) && op1 == const0_rtx)
5517 rtx op0h;
5519 /* If testing the sign bit, can just test on high word. */
5520 op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5521 subreg_highpart_offset (word_mode,
5522 int_mode));
5523 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5524 unsignedp, normalizep);
5526 else
5527 tem = NULL_RTX;
5529 if (tem)
5531 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5532 return tem;
5533 if (!target)
5534 target = gen_reg_rtx (target_mode);
5536 convert_move (target, tem,
5537 !val_signbit_known_set_p (word_mode,
5538 (normalizep ? normalizep
5539 : STORE_FLAG_VALUE)));
5540 return target;
5544 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5545 complement of A (for GE) and shifting the sign bit to the low bit. */
5546 if (op1 == const0_rtx && (code == LT || code == GE)
5547 && is_int_mode (mode, &int_mode)
5548 && (normalizep || STORE_FLAG_VALUE == 1
5549 || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5551 scalar_int_mode int_target_mode;
5552 subtarget = target;
5554 if (!target)
5555 int_target_mode = int_mode;
5556 else
5558 /* If the result is to be wider than OP0, it is best to convert it
5559 first. If it is to be narrower, it is *incorrect* to convert it
5560 first. */
5561 int_target_mode = as_a <scalar_int_mode> (target_mode);
5562 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5564 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5565 int_mode = int_target_mode;
5569 if (int_target_mode != int_mode)
5570 subtarget = 0;
5572 if (code == GE)
5573 op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5574 ((STORE_FLAG_VALUE == 1 || normalizep)
5575 ? 0 : subtarget), 0);
5577 if (STORE_FLAG_VALUE == 1 || normalizep)
5578 /* If we are supposed to produce a 0/1 value, we want to do
5579 a logical shift from the sign bit to the low-order bit; for
5580 a -1/0 value, we do an arithmetic shift. */
5581 op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5582 GET_MODE_BITSIZE (int_mode) - 1,
5583 subtarget, normalizep != -1);
5585 if (int_mode != int_target_mode)
5586 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5588 return op0;
5591 mclass = GET_MODE_CLASS (mode);
5592 FOR_EACH_MODE_FROM (compare_mode, mode)
5594 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5595 icode = optab_handler (cstore_optab, optab_mode);
5596 if (icode != CODE_FOR_nothing)
5598 do_pending_stack_adjust ();
5599 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5600 unsignedp, op0, op1, normalizep, target_mode);
5601 if (tem)
5602 return tem;
5604 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5606 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5607 unsignedp, op1, op0, normalizep, target_mode);
5608 if (tem)
5609 return tem;
5611 break;
5615 return 0;
5618 /* Subroutine of emit_store_flag that handles cases in which the operands
5619 are scalar integers. SUBTARGET is the target to use for temporary
5620 operations and TRUEVAL is the value to store when the condition is
5621 true. All other arguments are as for emit_store_flag. */
5624 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5625 rtx op1, scalar_int_mode mode, int unsignedp,
5626 int normalizep, rtx trueval)
5628 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5629 rtx_insn *last = get_last_insn ();
5630 rtx tem;
5632 /* If this is an equality comparison of integers, we can try to exclusive-or
5633 (or subtract) the two operands and use a recursive call to try the
5634 comparison with zero. Don't do any of these cases if branches are
5635 very cheap. */
5637 if ((code == EQ || code == NE) && op1 != const0_rtx)
5639 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5640 OPTAB_WIDEN);
5642 if (tem == 0)
5643 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5644 OPTAB_WIDEN);
5645 if (tem != 0)
5646 tem = emit_store_flag (target, code, tem, const0_rtx,
5647 mode, unsignedp, normalizep);
5648 if (tem != 0)
5649 return tem;
5651 delete_insns_since (last);
5654 /* For integer comparisons, try the reverse comparison. However, for
5655 small X and if we'd have anyway to extend, implementing "X != 0"
5656 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5657 rtx_code rcode = reverse_condition (code);
5658 if (can_compare_p (rcode, mode, ccp_store_flag)
5659 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5660 && code == NE
5661 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5662 && op1 == const0_rtx))
5664 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5665 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5667 /* Again, for the reverse comparison, use either an addition or a XOR. */
5668 if (want_add
5669 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5670 optimize_insn_for_speed_p ()) == 0)
5672 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5673 STORE_FLAG_VALUE, target_mode);
5674 if (tem != 0)
5675 tem = expand_binop (target_mode, add_optab, tem,
5676 gen_int_mode (normalizep, target_mode),
5677 target, 0, OPTAB_WIDEN);
5679 else if (!want_add
5680 && rtx_cost (trueval, mode, XOR, 1,
5681 optimize_insn_for_speed_p ()) == 0)
5683 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5684 normalizep, target_mode);
5685 if (tem != 0)
5686 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5687 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5690 if (tem != 0)
5691 return tem;
5692 delete_insns_since (last);
5695 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5696 the constant zero. Reject all other comparisons at this point. Only
5697 do LE and GT if branches are expensive since they are expensive on
5698 2-operand machines. */
5700 if (op1 != const0_rtx
5701 || (code != EQ && code != NE
5702 && (BRANCH_COST (optimize_insn_for_speed_p (),
5703 false) <= 1 || (code != LE && code != GT))))
5704 return 0;
5706 /* Try to put the result of the comparison in the sign bit. Assume we can't
5707 do the necessary operation below. */
5709 tem = 0;
5711 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5712 the sign bit set. */
5714 if (code == LE)
5716 /* This is destructive, so SUBTARGET can't be OP0. */
5717 if (rtx_equal_p (subtarget, op0))
5718 subtarget = 0;
5720 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5721 OPTAB_WIDEN);
5722 if (tem)
5723 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5724 OPTAB_WIDEN);
5727 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5728 number of bits in the mode of OP0, minus one. */
5730 if (code == GT)
5732 if (rtx_equal_p (subtarget, op0))
5733 subtarget = 0;
5735 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5736 GET_MODE_BITSIZE (mode) - 1,
5737 subtarget, 0);
5738 if (tem)
5739 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5740 OPTAB_WIDEN);
5743 if (code == EQ || code == NE)
5745 /* For EQ or NE, one way to do the comparison is to apply an operation
5746 that converts the operand into a positive number if it is nonzero
5747 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5748 for NE we negate. This puts the result in the sign bit. Then we
5749 normalize with a shift, if needed.
5751 Two operations that can do the above actions are ABS and FFS, so try
5752 them. If that doesn't work, and MODE is smaller than a full word,
5753 we can use zero-extension to the wider mode (an unsigned conversion)
5754 as the operation. */
5756 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5757 that is compensated by the subsequent overflow when subtracting
5758 one / negating. */
5760 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5761 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5762 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5763 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5764 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5766 tem = convert_modes (word_mode, mode, op0, 1);
5767 mode = word_mode;
5770 if (tem != 0)
5772 if (code == EQ)
5773 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5774 0, OPTAB_WIDEN);
5775 else
5776 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5779 /* If we couldn't do it that way, for NE we can "or" the two's complement
5780 of the value with itself. For EQ, we take the one's complement of
5781 that "or", which is an extra insn, so we only handle EQ if branches
5782 are expensive. */
5784 if (tem == 0
5785 && (code == NE
5786 || BRANCH_COST (optimize_insn_for_speed_p (),
5787 false) > 1))
5789 if (rtx_equal_p (subtarget, op0))
5790 subtarget = 0;
5792 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5793 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5794 OPTAB_WIDEN);
5796 if (tem && code == EQ)
5797 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5801 if (tem && normalizep)
5802 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5803 GET_MODE_BITSIZE (mode) - 1,
5804 subtarget, normalizep == 1);
5806 if (tem)
5808 if (!target)
5810 else if (GET_MODE (tem) != target_mode)
5812 convert_move (target, tem, 0);
5813 tem = target;
5815 else if (!subtarget)
5817 emit_move_insn (target, tem);
5818 tem = target;
5821 else
5822 delete_insns_since (last);
5824 return tem;
5827 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5828 and storing in TARGET. Normally return TARGET.
5829 Return 0 if that cannot be done.
5831 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5832 it is VOIDmode, they cannot both be CONST_INT.
5834 UNSIGNEDP is for the case where we have to widen the operands
5835 to perform the operation. It says to use zero-extension.
5837 NORMALIZEP is 1 if we should convert the result to be either zero
5838 or one. Normalize is -1 if we should convert the result to be
5839 either zero or -1. If NORMALIZEP is zero, the result will be left
5840 "raw" out of the scc insn. */
5843 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5844 machine_mode mode, int unsignedp, int normalizep)
5846 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5847 enum rtx_code rcode;
5848 rtx subtarget;
5849 rtx tem, trueval;
5850 rtx_insn *last;
5852 /* If we compare constants, we shouldn't use a store-flag operation,
5853 but a constant load. We can get there via the vanilla route that
5854 usually generates a compare-branch sequence, but will in this case
5855 fold the comparison to a constant, and thus elide the branch. */
5856 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5857 return NULL_RTX;
5859 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5860 target_mode);
5861 if (tem)
5862 return tem;
5864 /* If we reached here, we can't do this with a scc insn, however there
5865 are some comparisons that can be done in other ways. Don't do any
5866 of these cases if branches are very cheap. */
5867 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5868 return 0;
5870 /* See what we need to return. We can only return a 1, -1, or the
5871 sign bit. */
5873 if (normalizep == 0)
5875 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5876 normalizep = STORE_FLAG_VALUE;
5878 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5880 else
5881 return 0;
5884 last = get_last_insn ();
5886 /* If optimizing, use different pseudo registers for each insn, instead
5887 of reusing the same pseudo. This leads to better CSE, but slows
5888 down the compiler, since there are more pseudos. */
5889 subtarget = (!optimize
5890 && (target_mode == mode)) ? target : NULL_RTX;
5891 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5893 /* For floating-point comparisons, try the reverse comparison or try
5894 changing the "orderedness" of the comparison. */
5895 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5897 enum rtx_code first_code;
5898 bool and_them;
5900 rcode = reverse_condition_maybe_unordered (code);
5901 if (can_compare_p (rcode, mode, ccp_store_flag)
5902 && (code == ORDERED || code == UNORDERED
5903 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5904 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5906 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5907 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5909 /* For the reverse comparison, use either an addition or a XOR. */
5910 if (want_add
5911 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5912 optimize_insn_for_speed_p ()) == 0)
5914 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5915 STORE_FLAG_VALUE, target_mode);
5916 if (tem)
5917 return expand_binop (target_mode, add_optab, tem,
5918 gen_int_mode (normalizep, target_mode),
5919 target, 0, OPTAB_WIDEN);
5921 else if (!want_add
5922 && rtx_cost (trueval, mode, XOR, 1,
5923 optimize_insn_for_speed_p ()) == 0)
5925 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5926 normalizep, target_mode);
5927 if (tem)
5928 return expand_binop (target_mode, xor_optab, tem, trueval,
5929 target, INTVAL (trueval) >= 0,
5930 OPTAB_WIDEN);
5934 delete_insns_since (last);
5936 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5937 if (code == ORDERED || code == UNORDERED)
5938 return 0;
5940 and_them = split_comparison (code, mode, &first_code, &code);
5942 /* If there are no NaNs, the first comparison should always fall through.
5943 Effectively change the comparison to the other one. */
5944 if (!HONOR_NANS (mode))
5946 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5947 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5948 target_mode);
5951 if (!HAVE_conditional_move)
5952 return 0;
5954 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5955 conditional move. */
5956 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5957 normalizep, target_mode);
5958 if (tem == 0)
5959 return 0;
5961 if (and_them)
5962 tem = emit_conditional_move (target, code, op0, op1, mode,
5963 tem, const0_rtx, GET_MODE (tem), 0);
5964 else
5965 tem = emit_conditional_move (target, code, op0, op1, mode,
5966 trueval, tem, GET_MODE (tem), 0);
5968 if (tem == 0)
5969 delete_insns_since (last);
5970 return tem;
5973 /* The remaining tricks only apply to integer comparisons. */
5975 scalar_int_mode int_mode;
5976 if (is_int_mode (mode, &int_mode))
5977 return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
5978 unsignedp, normalizep, trueval);
5980 return 0;
5983 /* Like emit_store_flag, but always succeeds. */
5986 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5987 machine_mode mode, int unsignedp, int normalizep)
5989 rtx tem;
5990 rtx_code_label *label;
5991 rtx trueval, falseval;
5993 /* First see if emit_store_flag can do the job. */
5994 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5995 if (tem != 0)
5996 return tem;
5998 if (!target)
5999 target = gen_reg_rtx (word_mode);
6001 /* If this failed, we have to do this with set/compare/jump/set code.
6002 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
6003 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6004 if (code == NE
6005 && GET_MODE_CLASS (mode) == MODE_INT
6006 && REG_P (target)
6007 && op0 == target
6008 && op1 == const0_rtx)
6010 label = gen_label_rtx ();
6011 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6012 NULL_RTX, NULL, label,
6013 profile_probability::uninitialized ());
6014 emit_move_insn (target, trueval);
6015 emit_label (label);
6016 return target;
6019 if (!REG_P (target)
6020 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6021 target = gen_reg_rtx (GET_MODE (target));
6023 /* Jump in the right direction if the target cannot implement CODE
6024 but can jump on its reverse condition. */
6025 falseval = const0_rtx;
6026 if (! can_compare_p (code, mode, ccp_jump)
6027 && (! FLOAT_MODE_P (mode)
6028 || code == ORDERED || code == UNORDERED
6029 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6030 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6032 enum rtx_code rcode;
6033 if (FLOAT_MODE_P (mode))
6034 rcode = reverse_condition_maybe_unordered (code);
6035 else
6036 rcode = reverse_condition (code);
6038 /* Canonicalize to UNORDERED for the libcall. */
6039 if (can_compare_p (rcode, mode, ccp_jump)
6040 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6042 falseval = trueval;
6043 trueval = const0_rtx;
6044 code = rcode;
6048 emit_move_insn (target, trueval);
6049 label = gen_label_rtx ();
6050 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6051 label, profile_probability::uninitialized ());
6053 emit_move_insn (target, falseval);
6054 emit_label (label);
6056 return target;
6059 /* Perform possibly multi-word comparison and conditional jump to LABEL
6060 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6061 now a thin wrapper around do_compare_rtx_and_jump. */
6063 static void
6064 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6065 rtx_code_label *label)
6067 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6068 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6069 NULL, label, profile_probability::uninitialized ());