[ARM] Cleanup 64-bit multiplies
[official-gcc.git] / gcc / expmed.c
blobf1975fe33fe6ace731be3ef424928d6c1cf5fe33
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2019 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "dojump.h"
40 #include "explow.h"
41 #include "expr.h"
42 #include "langhooks.h"
43 #include "tree-vector-builder.h"
45 struct target_expmed default_target_expmed;
46 #if SWITCHABLE_TARGET
47 struct target_expmed *this_target_expmed = &default_target_expmed;
48 #endif
50 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 poly_uint64, poly_uint64,
54 machine_mode, rtx, bool, bool);
55 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 poly_uint64, poly_uint64,
59 rtx, scalar_int_mode, bool);
60 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT,
63 rtx, scalar_int_mode, bool);
64 static void store_split_bit_field (rtx, opt_scalar_int_mode,
65 unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT,
67 poly_uint64, poly_uint64,
68 rtx, scalar_int_mode, bool);
69 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
70 unsigned HOST_WIDE_INT,
71 unsigned HOST_WIDE_INT, int, rtx,
72 machine_mode, machine_mode, bool, bool);
73 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
74 unsigned HOST_WIDE_INT,
75 unsigned HOST_WIDE_INT, rtx, int, bool);
76 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
77 unsigned HOST_WIDE_INT,
78 unsigned HOST_WIDE_INT, rtx, int, bool);
79 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
80 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
81 unsigned HOST_WIDE_INT,
82 unsigned HOST_WIDE_INT, int, bool);
83 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
84 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
85 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
87 /* Return a constant integer mask value of mode MODE with BITSIZE ones
88 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
89 The mask is truncated if necessary to the width of mode MODE. The
90 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
92 static inline rtx
93 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
95 return immed_wide_int_const
96 (wi::shifted_mask (bitpos, bitsize, complement,
97 GET_MODE_PRECISION (mode)), mode);
100 /* Test whether a value is zero of a power of two. */
101 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
102 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
104 struct init_expmed_rtl
106 rtx reg;
107 rtx plus;
108 rtx neg;
109 rtx mult;
110 rtx sdiv;
111 rtx udiv;
112 rtx sdiv_32;
113 rtx smod_32;
114 rtx wide_mult;
115 rtx wide_lshr;
116 rtx wide_trunc;
117 rtx shift;
118 rtx shift_mult;
119 rtx shift_add;
120 rtx shift_sub0;
121 rtx shift_sub1;
122 rtx zext;
123 rtx trunc;
125 rtx pow2[MAX_BITS_PER_WORD];
126 rtx cint[MAX_BITS_PER_WORD];
129 static void
130 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
131 scalar_int_mode from_mode, bool speed)
133 int to_size, from_size;
134 rtx which;
136 to_size = GET_MODE_PRECISION (to_mode);
137 from_size = GET_MODE_PRECISION (from_mode);
139 /* Most partial integers have a precision less than the "full"
140 integer it requires for storage. In case one doesn't, for
141 comparison purposes here, reduce the bit size by one in that
142 case. */
143 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
144 && pow2p_hwi (to_size))
145 to_size --;
146 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
147 && pow2p_hwi (from_size))
148 from_size --;
150 /* Assume cost of zero-extend and sign-extend is the same. */
151 which = (to_size < from_size ? all->trunc : all->zext);
153 PUT_MODE (all->reg, from_mode);
154 set_convert_cost (to_mode, from_mode, speed,
155 set_src_cost (which, to_mode, speed));
158 static void
159 init_expmed_one_mode (struct init_expmed_rtl *all,
160 machine_mode mode, int speed)
162 int m, n, mode_bitsize;
163 machine_mode mode_from;
165 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
167 PUT_MODE (all->reg, mode);
168 PUT_MODE (all->plus, mode);
169 PUT_MODE (all->neg, mode);
170 PUT_MODE (all->mult, mode);
171 PUT_MODE (all->sdiv, mode);
172 PUT_MODE (all->udiv, mode);
173 PUT_MODE (all->sdiv_32, mode);
174 PUT_MODE (all->smod_32, mode);
175 PUT_MODE (all->wide_trunc, mode);
176 PUT_MODE (all->shift, mode);
177 PUT_MODE (all->shift_mult, mode);
178 PUT_MODE (all->shift_add, mode);
179 PUT_MODE (all->shift_sub0, mode);
180 PUT_MODE (all->shift_sub1, mode);
181 PUT_MODE (all->zext, mode);
182 PUT_MODE (all->trunc, mode);
184 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
185 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
186 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
187 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
188 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
190 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
191 <= 2 * add_cost (speed, mode)));
192 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
193 <= 4 * add_cost (speed, mode)));
195 set_shift_cost (speed, mode, 0, 0);
197 int cost = add_cost (speed, mode);
198 set_shiftadd_cost (speed, mode, 0, cost);
199 set_shiftsub0_cost (speed, mode, 0, cost);
200 set_shiftsub1_cost (speed, mode, 0, cost);
203 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
204 for (m = 1; m < n; m++)
206 XEXP (all->shift, 1) = all->cint[m];
207 XEXP (all->shift_mult, 1) = all->pow2[m];
209 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
210 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
211 speed));
212 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
213 speed));
214 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
215 speed));
218 scalar_int_mode int_mode_to;
219 if (is_a <scalar_int_mode> (mode, &int_mode_to))
221 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
222 mode_from = (machine_mode)(mode_from + 1))
223 init_expmed_one_conv (all, int_mode_to,
224 as_a <scalar_int_mode> (mode_from), speed);
226 scalar_int_mode wider_mode;
227 if (GET_MODE_CLASS (int_mode_to) == MODE_INT
228 && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
230 PUT_MODE (all->zext, wider_mode);
231 PUT_MODE (all->wide_mult, wider_mode);
232 PUT_MODE (all->wide_lshr, wider_mode);
233 XEXP (all->wide_lshr, 1)
234 = gen_int_shift_amount (wider_mode, mode_bitsize);
236 set_mul_widen_cost (speed, wider_mode,
237 set_src_cost (all->wide_mult, wider_mode, speed));
238 set_mul_highpart_cost (speed, int_mode_to,
239 set_src_cost (all->wide_trunc,
240 int_mode_to, speed));
245 void
246 init_expmed (void)
248 struct init_expmed_rtl all;
249 machine_mode mode = QImode;
250 int m, speed;
252 memset (&all, 0, sizeof all);
253 for (m = 1; m < MAX_BITS_PER_WORD; m++)
255 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
256 all.cint[m] = GEN_INT (m);
259 /* Avoid using hard regs in ways which may be unsupported. */
260 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
261 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
262 all.neg = gen_rtx_NEG (mode, all.reg);
263 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
264 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
265 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
266 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
267 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
268 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
269 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
270 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
271 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
272 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
273 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
274 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
275 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
276 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
277 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
279 for (speed = 0; speed < 2; speed++)
281 crtl->maybe_hot_insn_p = speed;
282 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
284 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
285 mode = (machine_mode)(mode + 1))
286 init_expmed_one_mode (&all, mode, speed);
288 if (MIN_MODE_PARTIAL_INT != VOIDmode)
289 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
290 mode = (machine_mode)(mode + 1))
291 init_expmed_one_mode (&all, mode, speed);
293 if (MIN_MODE_VECTOR_INT != VOIDmode)
294 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
295 mode = (machine_mode)(mode + 1))
296 init_expmed_one_mode (&all, mode, speed);
299 if (alg_hash_used_p ())
301 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
302 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
304 else
305 set_alg_hash_used_p (true);
306 default_rtl_profile ();
308 ggc_free (all.trunc);
309 ggc_free (all.shift_sub1);
310 ggc_free (all.shift_sub0);
311 ggc_free (all.shift_add);
312 ggc_free (all.shift_mult);
313 ggc_free (all.shift);
314 ggc_free (all.wide_trunc);
315 ggc_free (all.wide_lshr);
316 ggc_free (all.wide_mult);
317 ggc_free (all.zext);
318 ggc_free (all.smod_32);
319 ggc_free (all.sdiv_32);
320 ggc_free (all.udiv);
321 ggc_free (all.sdiv);
322 ggc_free (all.mult);
323 ggc_free (all.neg);
324 ggc_free (all.plus);
325 ggc_free (all.reg);
328 /* Return an rtx representing minus the value of X.
329 MODE is the intended mode of the result,
330 useful if X is a CONST_INT. */
333 negate_rtx (machine_mode mode, rtx x)
335 rtx result = simplify_unary_operation (NEG, mode, x, mode);
337 if (result == 0)
338 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
340 return result;
343 /* Whether reverse storage order is supported on the target. */
344 static int reverse_storage_order_supported = -1;
346 /* Check whether reverse storage order is supported on the target. */
348 static void
349 check_reverse_storage_order_support (void)
351 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
353 reverse_storage_order_supported = 0;
354 sorry ("reverse scalar storage order");
356 else
357 reverse_storage_order_supported = 1;
360 /* Whether reverse FP storage order is supported on the target. */
361 static int reverse_float_storage_order_supported = -1;
363 /* Check whether reverse FP storage order is supported on the target. */
365 static void
366 check_reverse_float_storage_order_support (void)
368 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
370 reverse_float_storage_order_supported = 0;
371 sorry ("reverse floating-point scalar storage order");
373 else
374 reverse_float_storage_order_supported = 1;
377 /* Return an rtx representing value of X with reverse storage order.
378 MODE is the intended mode of the result,
379 useful if X is a CONST_INT. */
382 flip_storage_order (machine_mode mode, rtx x)
384 scalar_int_mode int_mode;
385 rtx result;
387 if (mode == QImode)
388 return x;
390 if (COMPLEX_MODE_P (mode))
392 rtx real = read_complex_part (x, false);
393 rtx imag = read_complex_part (x, true);
395 real = flip_storage_order (GET_MODE_INNER (mode), real);
396 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
398 return gen_rtx_CONCAT (mode, real, imag);
401 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
402 check_reverse_storage_order_support ();
404 if (!is_a <scalar_int_mode> (mode, &int_mode))
406 if (FLOAT_MODE_P (mode)
407 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
408 check_reverse_float_storage_order_support ();
410 if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
412 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
413 return x;
415 x = gen_lowpart (int_mode, x);
418 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
419 if (result == 0)
420 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
422 if (int_mode != mode)
423 result = gen_lowpart (mode, result);
425 return result;
428 /* If MODE is set, adjust bitfield memory MEM so that it points to the
429 first unit of mode MODE that contains a bitfield of size BITSIZE at
430 bit position BITNUM. If MODE is not set, return a BLKmode reference
431 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
432 of the field within the new memory. */
434 static rtx
435 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
436 unsigned HOST_WIDE_INT bitsize,
437 unsigned HOST_WIDE_INT bitnum,
438 unsigned HOST_WIDE_INT *new_bitnum)
440 scalar_int_mode imode;
441 if (mode.exists (&imode))
443 unsigned int unit = GET_MODE_BITSIZE (imode);
444 *new_bitnum = bitnum % unit;
445 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
446 return adjust_bitfield_address (mem, imode, offset);
448 else
450 *new_bitnum = bitnum % BITS_PER_UNIT;
451 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
452 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
453 / BITS_PER_UNIT);
454 return adjust_bitfield_address_size (mem, BLKmode, offset, size);
458 /* The caller wants to perform insertion or extraction PATTERN on a
459 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
460 BITREGION_START and BITREGION_END are as for store_bit_field
461 and FIELDMODE is the natural mode of the field.
463 Search for a mode that is compatible with the memory access
464 restrictions and (where applicable) with a register insertion or
465 extraction. Return the new memory on success, storing the adjusted
466 bit position in *NEW_BITNUM. Return null otherwise. */
468 static rtx
469 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
470 rtx op0, HOST_WIDE_INT bitsize,
471 HOST_WIDE_INT bitnum,
472 poly_uint64 bitregion_start,
473 poly_uint64 bitregion_end,
474 machine_mode fieldmode,
475 unsigned HOST_WIDE_INT *new_bitnum)
477 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
478 bitregion_end, MEM_ALIGN (op0),
479 MEM_VOLATILE_P (op0));
480 scalar_int_mode best_mode;
481 if (iter.next_mode (&best_mode))
483 /* We can use a memory in BEST_MODE. See whether this is true for
484 any wider modes. All other things being equal, we prefer to
485 use the widest mode possible because it tends to expose more
486 CSE opportunities. */
487 if (!iter.prefer_smaller_modes ())
489 /* Limit the search to the mode required by the corresponding
490 register insertion or extraction instruction, if any. */
491 scalar_int_mode limit_mode = word_mode;
492 extraction_insn insn;
493 if (get_best_reg_extraction_insn (&insn, pattern,
494 GET_MODE_BITSIZE (best_mode),
495 fieldmode))
496 limit_mode = insn.field_mode;
498 scalar_int_mode wider_mode;
499 while (iter.next_mode (&wider_mode)
500 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
501 best_mode = wider_mode;
503 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
504 new_bitnum);
506 return NULL_RTX;
509 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
510 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
511 offset is then BITNUM / BITS_PER_UNIT. */
513 static bool
514 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
515 machine_mode struct_mode)
517 poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
518 if (BYTES_BIG_ENDIAN)
519 return (multiple_p (bitnum, BITS_PER_UNIT)
520 && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
521 || multiple_p (bitnum + bitsize,
522 regsize * BITS_PER_UNIT)));
523 else
524 return multiple_p (bitnum, regsize * BITS_PER_UNIT);
527 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
528 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
529 Return false if the access would touch memory outside the range
530 BITREGION_START to BITREGION_END for conformance to the C++ memory
531 model. */
533 static bool
534 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
535 unsigned HOST_WIDE_INT bitnum,
536 scalar_int_mode fieldmode,
537 poly_uint64 bitregion_start,
538 poly_uint64 bitregion_end)
540 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
542 /* -fstrict-volatile-bitfields must be enabled and we must have a
543 volatile MEM. */
544 if (!MEM_P (op0)
545 || !MEM_VOLATILE_P (op0)
546 || flag_strict_volatile_bitfields <= 0)
547 return false;
549 /* The bit size must not be larger than the field mode, and
550 the field mode must not be larger than a word. */
551 if (bitsize > modesize || modesize > BITS_PER_WORD)
552 return false;
554 /* Check for cases of unaligned fields that must be split. */
555 if (bitnum % modesize + bitsize > modesize)
556 return false;
558 /* The memory must be sufficiently aligned for a MODESIZE access.
559 This condition guarantees, that the memory access will not
560 touch anything after the end of the structure. */
561 if (MEM_ALIGN (op0) < modesize)
562 return false;
564 /* Check for cases where the C++ memory model applies. */
565 if (maybe_ne (bitregion_end, 0U)
566 && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
567 || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
568 bitregion_end)))
569 return false;
571 return true;
574 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
575 bit number BITNUM can be treated as a simple value of mode MODE.
576 Store the byte offset in *BYTENUM if so. */
578 static bool
579 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
580 machine_mode mode, poly_uint64 *bytenum)
582 return (MEM_P (op0)
583 && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
584 && known_eq (bitsize, GET_MODE_BITSIZE (mode))
585 && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
586 || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
587 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
590 /* Try to use instruction INSV to store VALUE into a field of OP0.
591 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
592 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
593 are as for store_bit_field. */
595 static bool
596 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
597 opt_scalar_int_mode op0_mode,
598 unsigned HOST_WIDE_INT bitsize,
599 unsigned HOST_WIDE_INT bitnum,
600 rtx value, scalar_int_mode value_mode)
602 class expand_operand ops[4];
603 rtx value1;
604 rtx xop0 = op0;
605 rtx_insn *last = get_last_insn ();
606 bool copy_back = false;
608 scalar_int_mode op_mode = insv->field_mode;
609 unsigned int unit = GET_MODE_BITSIZE (op_mode);
610 if (bitsize == 0 || bitsize > unit)
611 return false;
613 if (MEM_P (xop0))
614 /* Get a reference to the first byte of the field. */
615 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
616 &bitnum);
617 else
619 /* Convert from counting within OP0 to counting in OP_MODE. */
620 if (BYTES_BIG_ENDIAN)
621 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
623 /* If xop0 is a register, we need it in OP_MODE
624 to make it acceptable to the format of insv. */
625 if (GET_CODE (xop0) == SUBREG)
626 /* We can't just change the mode, because this might clobber op0,
627 and we will need the original value of op0 if insv fails. */
628 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
629 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
630 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
633 /* If the destination is a paradoxical subreg such that we need a
634 truncate to the inner mode, perform the insertion on a temporary and
635 truncate the result to the original destination. Note that we can't
636 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
637 X) 0)) is (reg:N X). */
638 if (GET_CODE (xop0) == SUBREG
639 && REG_P (SUBREG_REG (xop0))
640 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
641 op_mode))
643 rtx tem = gen_reg_rtx (op_mode);
644 emit_move_insn (tem, xop0);
645 xop0 = tem;
646 copy_back = true;
649 /* There are similar overflow check at the start of store_bit_field_1,
650 but that only check the situation where the field lies completely
651 outside the register, while there do have situation where the field
652 lies partialy in the register, we need to adjust bitsize for this
653 partial overflow situation. Without this fix, pr48335-2.c on big-endian
654 will broken on those arch support bit insert instruction, like arm, aarch64
655 etc. */
656 if (bitsize + bitnum > unit && bitnum < unit)
658 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
659 "destination object, data truncated into %wu-bit",
660 bitsize, unit - bitnum);
661 bitsize = unit - bitnum;
664 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
665 "backwards" from the size of the unit we are inserting into.
666 Otherwise, we count bits from the most significant on a
667 BYTES/BITS_BIG_ENDIAN machine. */
669 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
670 bitnum = unit - bitsize - bitnum;
672 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
673 value1 = value;
674 if (value_mode != op_mode)
676 if (GET_MODE_BITSIZE (value_mode) >= bitsize)
678 rtx tmp;
679 /* Optimization: Don't bother really extending VALUE
680 if it has all the bits we will actually use. However,
681 if we must narrow it, be sure we do it correctly. */
683 if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
685 tmp = simplify_subreg (op_mode, value1, value_mode, 0);
686 if (! tmp)
687 tmp = simplify_gen_subreg (op_mode,
688 force_reg (value_mode, value1),
689 value_mode, 0);
691 else
693 tmp = gen_lowpart_if_possible (op_mode, value1);
694 if (! tmp)
695 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
697 value1 = tmp;
699 else if (CONST_INT_P (value))
700 value1 = gen_int_mode (INTVAL (value), op_mode);
701 else
702 /* Parse phase is supposed to make VALUE's data type
703 match that of the component reference, which is a type
704 at least as wide as the field; so VALUE should have
705 a mode that corresponds to that type. */
706 gcc_assert (CONSTANT_P (value));
709 create_fixed_operand (&ops[0], xop0);
710 create_integer_operand (&ops[1], bitsize);
711 create_integer_operand (&ops[2], bitnum);
712 create_input_operand (&ops[3], value1, op_mode);
713 if (maybe_expand_insn (insv->icode, 4, ops))
715 if (copy_back)
716 convert_move (op0, xop0, true);
717 return true;
719 delete_insns_since (last);
720 return false;
723 /* A subroutine of store_bit_field, with the same arguments. Return true
724 if the operation could be implemented.
726 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
727 no other way of implementing the operation. If FALLBACK_P is false,
728 return false instead. */
730 static bool
731 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
732 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
733 machine_mode fieldmode,
734 rtx value, bool reverse, bool fallback_p)
736 rtx op0 = str_rtx;
738 while (GET_CODE (op0) == SUBREG)
740 bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
741 op0 = SUBREG_REG (op0);
744 /* No action is needed if the target is a register and if the field
745 lies completely outside that register. This can occur if the source
746 code contains an out-of-bounds access to a small array. */
747 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
748 return true;
750 /* Use vec_set patterns for inserting parts of vectors whenever
751 available. */
752 machine_mode outermode = GET_MODE (op0);
753 scalar_mode innermode = GET_MODE_INNER (outermode);
754 poly_uint64 pos;
755 if (VECTOR_MODE_P (outermode)
756 && !MEM_P (op0)
757 && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
758 && fieldmode == innermode
759 && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
760 && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
762 class expand_operand ops[3];
763 enum insn_code icode = optab_handler (vec_set_optab, outermode);
765 create_fixed_operand (&ops[0], op0);
766 create_input_operand (&ops[1], value, innermode);
767 create_integer_operand (&ops[2], pos);
768 if (maybe_expand_insn (icode, 3, ops))
769 return true;
772 /* If the target is a register, overwriting the entire object, or storing
773 a full-word or multi-word field can be done with just a SUBREG. */
774 if (!MEM_P (op0)
775 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
777 /* Use the subreg machinery either to narrow OP0 to the required
778 words or to cope with mode punning between equal-sized modes.
779 In the latter case, use subreg on the rhs side, not lhs. */
780 rtx sub;
781 HOST_WIDE_INT regnum;
782 poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
783 if (known_eq (bitnum, 0U)
784 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
786 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
787 if (sub)
789 if (reverse)
790 sub = flip_storage_order (GET_MODE (op0), sub);
791 emit_move_insn (op0, sub);
792 return true;
795 else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
796 && multiple_p (bitsize, regsize * BITS_PER_UNIT))
798 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
799 regnum * regsize);
800 if (sub)
802 if (reverse)
803 value = flip_storage_order (fieldmode, value);
804 emit_move_insn (sub, value);
805 return true;
810 /* If the target is memory, storing any naturally aligned field can be
811 done with a simple store. For targets that support fast unaligned
812 memory, any naturally sized, unit aligned field can be done directly. */
813 poly_uint64 bytenum;
814 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
816 op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
817 if (reverse)
818 value = flip_storage_order (fieldmode, value);
819 emit_move_insn (op0, value);
820 return true;
823 /* It's possible we'll need to handle other cases here for
824 polynomial bitnum and bitsize. */
826 /* From here on we need to be looking at a fixed-size insertion. */
827 unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
828 unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
830 /* Make sure we are playing with integral modes. Pun with subregs
831 if we aren't. This must come after the entire register case above,
832 since that case is valid for any mode. The following cases are only
833 valid for integral modes. */
834 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
835 scalar_int_mode imode;
836 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
838 if (MEM_P (op0))
839 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
840 0, MEM_SIZE (op0));
841 else
842 op0 = gen_lowpart (op0_mode.require (), op0);
845 return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
846 bitregion_start, bitregion_end,
847 fieldmode, value, reverse, fallback_p);
850 /* Subroutine of store_bit_field_1, with the same arguments, except
851 that BITSIZE and BITNUM are constant. Handle cases specific to
852 integral modes. If OP0_MODE is defined, it is the mode of OP0,
853 otherwise OP0 is a BLKmode MEM. */
855 static bool
856 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
857 unsigned HOST_WIDE_INT bitsize,
858 unsigned HOST_WIDE_INT bitnum,
859 poly_uint64 bitregion_start,
860 poly_uint64 bitregion_end,
861 machine_mode fieldmode,
862 rtx value, bool reverse, bool fallback_p)
864 /* Storing an lsb-aligned field in a register
865 can be done with a movstrict instruction. */
867 if (!MEM_P (op0)
868 && !reverse
869 && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
870 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
871 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
873 class expand_operand ops[2];
874 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
875 rtx arg0 = op0;
876 unsigned HOST_WIDE_INT subreg_off;
878 if (GET_CODE (arg0) == SUBREG)
880 /* Else we've got some float mode source being extracted into
881 a different float mode destination -- this combination of
882 subregs results in Severe Tire Damage. */
883 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
884 || GET_MODE_CLASS (fieldmode) == MODE_INT
885 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
886 arg0 = SUBREG_REG (arg0);
889 subreg_off = bitnum / BITS_PER_UNIT;
890 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
892 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
894 create_fixed_operand (&ops[0], arg0);
895 /* Shrink the source operand to FIELDMODE. */
896 create_convert_operand_to (&ops[1], value, fieldmode, false);
897 if (maybe_expand_insn (icode, 2, ops))
898 return true;
902 /* Handle fields bigger than a word. */
904 if (bitsize > BITS_PER_WORD)
906 /* Here we transfer the words of the field
907 in the order least significant first.
908 This is because the most significant word is the one which may
909 be less than full.
910 However, only do that if the value is not BLKmode. */
912 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
913 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
914 unsigned int i;
915 rtx_insn *last;
917 /* This is the mode we must force value to, so that there will be enough
918 subwords to extract. Note that fieldmode will often (always?) be
919 VOIDmode, because that is what store_field uses to indicate that this
920 is a bit field, but passing VOIDmode to operand_subword_force
921 is not allowed.
923 The mode must be fixed-size, since insertions into variable-sized
924 objects are meant to be handled before calling this function. */
925 fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
926 if (value_mode == VOIDmode)
927 value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
929 last = get_last_insn ();
930 for (i = 0; i < nwords; i++)
932 /* If I is 0, use the low-order word in both field and target;
933 if I is 1, use the next to lowest word; and so on. */
934 unsigned int wordnum = (backwards
935 ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD
936 - i - 1
937 : i);
938 unsigned int bit_offset = (backwards ^ reverse
939 ? MAX ((int) bitsize - ((int) i + 1)
940 * BITS_PER_WORD,
942 : (int) i * BITS_PER_WORD);
943 rtx value_word = operand_subword_force (value, wordnum, value_mode);
944 unsigned HOST_WIDE_INT new_bitsize =
945 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
947 /* If the remaining chunk doesn't have full wordsize we have
948 to make sure that for big-endian machines the higher order
949 bits are used. */
950 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
952 int shift = BITS_PER_WORD - new_bitsize;
953 rtx shift_rtx = gen_int_shift_amount (word_mode, shift);
954 value_word = simplify_expand_binop (word_mode, lshr_optab,
955 value_word, shift_rtx,
956 NULL_RTX, true,
957 OPTAB_LIB_WIDEN);
960 if (!store_bit_field_1 (op0, new_bitsize,
961 bitnum + bit_offset,
962 bitregion_start, bitregion_end,
963 word_mode,
964 value_word, reverse, fallback_p))
966 delete_insns_since (last);
967 return false;
970 return true;
973 /* If VALUE has a floating-point or complex mode, access it as an
974 integer of the corresponding size. This can occur on a machine
975 with 64 bit registers that uses SFmode for float. It can also
976 occur for unaligned float or complex fields. */
977 rtx orig_value = value;
978 scalar_int_mode value_mode;
979 if (GET_MODE (value) == VOIDmode)
980 /* By this point we've dealt with values that are bigger than a word,
981 so word_mode is a conservatively correct choice. */
982 value_mode = word_mode;
983 else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
985 value_mode = int_mode_for_mode (GET_MODE (value)).require ();
986 value = gen_reg_rtx (value_mode);
987 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
990 /* If OP0 is a multi-word register, narrow it to the affected word.
991 If the region spans two words, defer to store_split_bit_field.
992 Don't do this if op0 is a single hard register wider than word
993 such as a float or vector register. */
994 if (!MEM_P (op0)
995 && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
996 && (!REG_P (op0)
997 || !HARD_REGISTER_P (op0)
998 || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1000 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1002 if (!fallback_p)
1003 return false;
1005 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1006 bitregion_start, bitregion_end,
1007 value, value_mode, reverse);
1008 return true;
1010 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1011 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1012 gcc_assert (op0);
1013 op0_mode = word_mode;
1014 bitnum %= BITS_PER_WORD;
1017 /* From here on we can assume that the field to be stored in fits
1018 within a word. If the destination is a register, it too fits
1019 in a word. */
1021 extraction_insn insv;
1022 if (!MEM_P (op0)
1023 && !reverse
1024 && get_best_reg_extraction_insn (&insv, EP_insv,
1025 GET_MODE_BITSIZE (op0_mode.require ()),
1026 fieldmode)
1027 && store_bit_field_using_insv (&insv, op0, op0_mode,
1028 bitsize, bitnum, value, value_mode))
1029 return true;
1031 /* If OP0 is a memory, try copying it to a register and seeing if a
1032 cheap register alternative is available. */
1033 if (MEM_P (op0) && !reverse)
1035 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1036 fieldmode)
1037 && store_bit_field_using_insv (&insv, op0, op0_mode,
1038 bitsize, bitnum, value, value_mode))
1039 return true;
1041 rtx_insn *last = get_last_insn ();
1043 /* Try loading part of OP0 into a register, inserting the bitfield
1044 into that, and then copying the result back to OP0. */
1045 unsigned HOST_WIDE_INT bitpos;
1046 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1047 bitregion_start, bitregion_end,
1048 fieldmode, &bitpos);
1049 if (xop0)
1051 rtx tempreg = copy_to_reg (xop0);
1052 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1053 bitregion_start, bitregion_end,
1054 fieldmode, orig_value, reverse, false))
1056 emit_move_insn (xop0, tempreg);
1057 return true;
1059 delete_insns_since (last);
1063 if (!fallback_p)
1064 return false;
1066 store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1067 bitregion_end, value, value_mode, reverse);
1068 return true;
1071 /* Generate code to store value from rtx VALUE
1072 into a bit-field within structure STR_RTX
1073 containing BITSIZE bits starting at bit BITNUM.
1075 BITREGION_START is bitpos of the first bitfield in this region.
1076 BITREGION_END is the bitpos of the ending bitfield in this region.
1077 These two fields are 0, if the C++ memory model does not apply,
1078 or we are not interested in keeping track of bitfield regions.
1080 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1082 If REVERSE is true, the store is to be done in reverse order. */
1084 void
1085 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1086 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1087 machine_mode fieldmode,
1088 rtx value, bool reverse)
1090 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1091 unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1092 scalar_int_mode int_mode;
1093 if (bitsize.is_constant (&ibitsize)
1094 && bitnum.is_constant (&ibitnum)
1095 && is_a <scalar_int_mode> (fieldmode, &int_mode)
1096 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1097 bitregion_start, bitregion_end))
1099 /* Storing of a full word can be done with a simple store.
1100 We know here that the field can be accessed with one single
1101 instruction. For targets that support unaligned memory,
1102 an unaligned access may be necessary. */
1103 if (ibitsize == GET_MODE_BITSIZE (int_mode))
1105 str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1106 ibitnum / BITS_PER_UNIT);
1107 if (reverse)
1108 value = flip_storage_order (int_mode, value);
1109 gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1110 emit_move_insn (str_rtx, value);
1112 else
1114 rtx temp;
1116 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1117 ibitnum, &ibitnum);
1118 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1119 temp = copy_to_reg (str_rtx);
1120 if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1121 int_mode, value, reverse, true))
1122 gcc_unreachable ();
1124 emit_move_insn (str_rtx, temp);
1127 return;
1130 /* Under the C++0x memory model, we must not touch bits outside the
1131 bit region. Adjust the address to start at the beginning of the
1132 bit region. */
1133 if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1135 scalar_int_mode best_mode;
1136 machine_mode addr_mode = VOIDmode;
1138 poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1139 bitnum -= bitregion_start;
1140 poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1141 bitregion_end -= bitregion_start;
1142 bitregion_start = 0;
1143 if (bitsize.is_constant (&ibitsize)
1144 && bitnum.is_constant (&ibitnum)
1145 && get_best_mode (ibitsize, ibitnum,
1146 bitregion_start, bitregion_end,
1147 MEM_ALIGN (str_rtx), INT_MAX,
1148 MEM_VOLATILE_P (str_rtx), &best_mode))
1149 addr_mode = best_mode;
1150 str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1151 offset, size);
1154 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1155 bitregion_start, bitregion_end,
1156 fieldmode, value, reverse, true))
1157 gcc_unreachable ();
1160 /* Use shifts and boolean operations to store VALUE into a bit field of
1161 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1162 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1163 the mode of VALUE.
1165 If REVERSE is true, the store is to be done in reverse order. */
1167 static void
1168 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1169 unsigned HOST_WIDE_INT bitsize,
1170 unsigned HOST_WIDE_INT bitnum,
1171 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1172 rtx value, scalar_int_mode value_mode, bool reverse)
1174 /* There is a case not handled here:
1175 a structure with a known alignment of just a halfword
1176 and a field split across two aligned halfwords within the structure.
1177 Or likewise a structure with a known alignment of just a byte
1178 and a field split across two bytes.
1179 Such cases are not supposed to be able to occur. */
1181 scalar_int_mode best_mode;
1182 if (MEM_P (op0))
1184 unsigned int max_bitsize = BITS_PER_WORD;
1185 scalar_int_mode imode;
1186 if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1187 max_bitsize = GET_MODE_BITSIZE (imode);
1189 if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1190 MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1191 &best_mode))
1193 /* The only way this should occur is if the field spans word
1194 boundaries. */
1195 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1196 bitregion_start, bitregion_end,
1197 value, value_mode, reverse);
1198 return;
1201 op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1203 else
1204 best_mode = op0_mode.require ();
1206 store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1207 value, value_mode, reverse);
1210 /* Helper function for store_fixed_bit_field, stores
1211 the bit field always using MODE, which is the mode of OP0. The other
1212 arguments are as for store_fixed_bit_field. */
1214 static void
1215 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1216 unsigned HOST_WIDE_INT bitsize,
1217 unsigned HOST_WIDE_INT bitnum,
1218 rtx value, scalar_int_mode value_mode, bool reverse)
1220 rtx temp;
1221 int all_zero = 0;
1222 int all_one = 0;
1224 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1225 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1227 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1228 /* BITNUM is the distance between our msb
1229 and that of the containing datum.
1230 Convert it to the distance from the lsb. */
1231 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1233 /* Now BITNUM is always the distance between our lsb
1234 and that of OP0. */
1236 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1237 we must first convert its mode to MODE. */
1239 if (CONST_INT_P (value))
1241 unsigned HOST_WIDE_INT v = UINTVAL (value);
1243 if (bitsize < HOST_BITS_PER_WIDE_INT)
1244 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1246 if (v == 0)
1247 all_zero = 1;
1248 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1249 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1250 || (bitsize == HOST_BITS_PER_WIDE_INT
1251 && v == HOST_WIDE_INT_M1U))
1252 all_one = 1;
1254 value = lshift_value (mode, v, bitnum);
1256 else
1258 int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1259 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1261 if (value_mode != mode)
1262 value = convert_to_mode (mode, value, 1);
1264 if (must_and)
1265 value = expand_binop (mode, and_optab, value,
1266 mask_rtx (mode, 0, bitsize, 0),
1267 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1268 if (bitnum > 0)
1269 value = expand_shift (LSHIFT_EXPR, mode, value,
1270 bitnum, NULL_RTX, 1);
1273 if (reverse)
1274 value = flip_storage_order (mode, value);
1276 /* Now clear the chosen bits in OP0,
1277 except that if VALUE is -1 we need not bother. */
1278 /* We keep the intermediates in registers to allow CSE to combine
1279 consecutive bitfield assignments. */
1281 temp = force_reg (mode, op0);
1283 if (! all_one)
1285 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1286 if (reverse)
1287 mask = flip_storage_order (mode, mask);
1288 temp = expand_binop (mode, and_optab, temp, mask,
1289 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1290 temp = force_reg (mode, temp);
1293 /* Now logical-or VALUE into OP0, unless it is zero. */
1295 if (! all_zero)
1297 temp = expand_binop (mode, ior_optab, temp, value,
1298 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1299 temp = force_reg (mode, temp);
1302 if (op0 != temp)
1304 op0 = copy_rtx (op0);
1305 emit_move_insn (op0, temp);
1309 /* Store a bit field that is split across multiple accessible memory objects.
1311 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1312 BITSIZE is the field width; BITPOS the position of its first bit
1313 (within the word).
1314 VALUE is the value to store, which has mode VALUE_MODE.
1315 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1316 a BLKmode MEM.
1318 If REVERSE is true, the store is to be done in reverse order.
1320 This does not yet handle fields wider than BITS_PER_WORD. */
1322 static void
1323 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1324 unsigned HOST_WIDE_INT bitsize,
1325 unsigned HOST_WIDE_INT bitpos,
1326 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1327 rtx value, scalar_int_mode value_mode, bool reverse)
1329 unsigned int unit, total_bits, bitsdone = 0;
1331 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1332 much at a time. */
1333 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1334 unit = BITS_PER_WORD;
1335 else
1336 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1338 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1339 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1340 again, and we will mutually recurse forever. */
1341 if (MEM_P (op0) && op0_mode.exists ())
1342 unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1344 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1345 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1346 that VALUE might be a floating-point constant. */
1347 if (CONSTANT_P (value) && !CONST_INT_P (value))
1349 rtx word = gen_lowpart_common (word_mode, value);
1351 if (word && (value != word))
1352 value = word;
1353 else
1354 value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1355 value_mode = word_mode;
1358 total_bits = GET_MODE_BITSIZE (value_mode);
1360 while (bitsdone < bitsize)
1362 unsigned HOST_WIDE_INT thissize;
1363 unsigned HOST_WIDE_INT thispos;
1364 unsigned HOST_WIDE_INT offset;
1365 rtx part;
1367 offset = (bitpos + bitsdone) / unit;
1368 thispos = (bitpos + bitsdone) % unit;
1370 /* When region of bytes we can touch is restricted, decrease
1371 UNIT close to the end of the region as needed. If op0 is a REG
1372 or SUBREG of REG, don't do this, as there can't be data races
1373 on a register and we can expand shorter code in some cases. */
1374 if (maybe_ne (bitregion_end, 0U)
1375 && unit > BITS_PER_UNIT
1376 && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1377 && !REG_P (op0)
1378 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1380 unit = unit / 2;
1381 continue;
1384 /* THISSIZE must not overrun a word boundary. Otherwise,
1385 store_fixed_bit_field will call us again, and we will mutually
1386 recurse forever. */
1387 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1388 thissize = MIN (thissize, unit - thispos);
1390 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1392 /* Fetch successively less significant portions. */
1393 if (CONST_INT_P (value))
1394 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1395 >> (bitsize - bitsdone - thissize))
1396 & ((HOST_WIDE_INT_1 << thissize) - 1));
1397 /* Likewise, but the source is little-endian. */
1398 else if (reverse)
1399 part = extract_fixed_bit_field (word_mode, value, value_mode,
1400 thissize,
1401 bitsize - bitsdone - thissize,
1402 NULL_RTX, 1, false);
1403 else
1404 /* The args are chosen so that the last part includes the
1405 lsb. Give extract_bit_field the value it needs (with
1406 endianness compensation) to fetch the piece we want. */
1407 part = extract_fixed_bit_field (word_mode, value, value_mode,
1408 thissize,
1409 total_bits - bitsize + bitsdone,
1410 NULL_RTX, 1, false);
1412 else
1414 /* Fetch successively more significant portions. */
1415 if (CONST_INT_P (value))
1416 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1417 >> bitsdone)
1418 & ((HOST_WIDE_INT_1 << thissize) - 1));
1419 /* Likewise, but the source is big-endian. */
1420 else if (reverse)
1421 part = extract_fixed_bit_field (word_mode, value, value_mode,
1422 thissize,
1423 total_bits - bitsdone - thissize,
1424 NULL_RTX, 1, false);
1425 else
1426 part = extract_fixed_bit_field (word_mode, value, value_mode,
1427 thissize, bitsdone, NULL_RTX,
1428 1, false);
1431 /* If OP0 is a register, then handle OFFSET here. */
1432 rtx op0_piece = op0;
1433 opt_scalar_int_mode op0_piece_mode = op0_mode;
1434 if (SUBREG_P (op0) || REG_P (op0))
1436 scalar_int_mode imode;
1437 if (op0_mode.exists (&imode)
1438 && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1440 if (offset)
1441 op0_piece = const0_rtx;
1443 else
1445 op0_piece = operand_subword_force (op0,
1446 offset * unit / BITS_PER_WORD,
1447 GET_MODE (op0));
1448 op0_piece_mode = word_mode;
1450 offset &= BITS_PER_WORD / unit - 1;
1453 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1454 it is just an out-of-bounds access. Ignore it. */
1455 if (op0_piece != const0_rtx)
1456 store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1457 offset * unit + thispos, bitregion_start,
1458 bitregion_end, part, word_mode, reverse);
1459 bitsdone += thissize;
1463 /* A subroutine of extract_bit_field_1 that converts return value X
1464 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1465 to extract_bit_field. */
1467 static rtx
1468 convert_extracted_bit_field (rtx x, machine_mode mode,
1469 machine_mode tmode, bool unsignedp)
1471 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1472 return x;
1474 /* If the x mode is not a scalar integral, first convert to the
1475 integer mode of that size and then access it as a floating-point
1476 value via a SUBREG. */
1477 if (!SCALAR_INT_MODE_P (tmode))
1479 scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1480 x = convert_to_mode (int_mode, x, unsignedp);
1481 x = force_reg (int_mode, x);
1482 return gen_lowpart (tmode, x);
1485 return convert_to_mode (tmode, x, unsignedp);
1488 /* Try to use an ext(z)v pattern to extract a field from OP0.
1489 Return the extracted value on success, otherwise return null.
1490 EXTV describes the extraction instruction to use. If OP0_MODE
1491 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1492 The other arguments are as for extract_bit_field. */
1494 static rtx
1495 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1496 opt_scalar_int_mode op0_mode,
1497 unsigned HOST_WIDE_INT bitsize,
1498 unsigned HOST_WIDE_INT bitnum,
1499 int unsignedp, rtx target,
1500 machine_mode mode, machine_mode tmode)
1502 class expand_operand ops[4];
1503 rtx spec_target = target;
1504 rtx spec_target_subreg = 0;
1505 scalar_int_mode ext_mode = extv->field_mode;
1506 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1508 if (bitsize == 0 || unit < bitsize)
1509 return NULL_RTX;
1511 if (MEM_P (op0))
1512 /* Get a reference to the first byte of the field. */
1513 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1514 &bitnum);
1515 else
1517 /* Convert from counting within OP0 to counting in EXT_MODE. */
1518 if (BYTES_BIG_ENDIAN)
1519 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1521 /* If op0 is a register, we need it in EXT_MODE to make it
1522 acceptable to the format of ext(z)v. */
1523 if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1524 return NULL_RTX;
1525 if (REG_P (op0) && op0_mode.require () != ext_mode)
1526 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1529 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1530 "backwards" from the size of the unit we are extracting from.
1531 Otherwise, we count bits from the most significant on a
1532 BYTES/BITS_BIG_ENDIAN machine. */
1534 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1535 bitnum = unit - bitsize - bitnum;
1537 if (target == 0)
1538 target = spec_target = gen_reg_rtx (tmode);
1540 if (GET_MODE (target) != ext_mode)
1542 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1543 between the mode of the extraction (word_mode) and the target
1544 mode. Instead, create a temporary and use convert_move to set
1545 the target. */
1546 if (REG_P (target)
1547 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1549 target = gen_lowpart (ext_mode, target);
1550 if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1551 spec_target_subreg = target;
1553 else
1554 target = gen_reg_rtx (ext_mode);
1557 create_output_operand (&ops[0], target, ext_mode);
1558 create_fixed_operand (&ops[1], op0);
1559 create_integer_operand (&ops[2], bitsize);
1560 create_integer_operand (&ops[3], bitnum);
1561 if (maybe_expand_insn (extv->icode, 4, ops))
1563 target = ops[0].value;
1564 if (target == spec_target)
1565 return target;
1566 if (target == spec_target_subreg)
1567 return spec_target;
1568 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1570 return NULL_RTX;
1573 /* See whether it would be valid to extract the part of OP0 described
1574 by BITNUM and BITSIZE into a value of mode MODE using a subreg
1575 operation. Return the subreg if so, otherwise return null. */
1577 static rtx
1578 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1579 poly_uint64 bitsize, poly_uint64 bitnum)
1581 poly_uint64 bytenum;
1582 if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1583 && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1584 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1585 && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1586 return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1587 return NULL_RTX;
1590 /* A subroutine of extract_bit_field, with the same arguments.
1591 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1592 if we can find no other means of implementing the operation.
1593 if FALLBACK_P is false, return NULL instead. */
1595 static rtx
1596 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1597 int unsignedp, rtx target, machine_mode mode,
1598 machine_mode tmode, bool reverse, bool fallback_p,
1599 rtx *alt_rtl)
1601 rtx op0 = str_rtx;
1602 machine_mode mode1;
1604 if (tmode == VOIDmode)
1605 tmode = mode;
1607 while (GET_CODE (op0) == SUBREG)
1609 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1610 op0 = SUBREG_REG (op0);
1613 /* If we have an out-of-bounds access to a register, just return an
1614 uninitialized register of the required mode. This can occur if the
1615 source code contains an out-of-bounds access to a small array. */
1616 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1617 return gen_reg_rtx (tmode);
1619 if (REG_P (op0)
1620 && mode == GET_MODE (op0)
1621 && known_eq (bitnum, 0U)
1622 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1624 if (reverse)
1625 op0 = flip_storage_order (mode, op0);
1626 /* We're trying to extract a full register from itself. */
1627 return op0;
1630 /* First try to check for vector from vector extractions. */
1631 if (VECTOR_MODE_P (GET_MODE (op0))
1632 && !MEM_P (op0)
1633 && VECTOR_MODE_P (tmode)
1634 && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1635 && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1637 machine_mode new_mode = GET_MODE (op0);
1638 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1640 scalar_mode inner_mode = GET_MODE_INNER (tmode);
1641 poly_uint64 nunits;
1642 if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1643 GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1644 || !mode_for_vector (inner_mode, nunits).exists (&new_mode)
1645 || !VECTOR_MODE_P (new_mode)
1646 || maybe_ne (GET_MODE_SIZE (new_mode),
1647 GET_MODE_SIZE (GET_MODE (op0)))
1648 || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1649 || !targetm.vector_mode_supported_p (new_mode))
1650 new_mode = VOIDmode;
1652 poly_uint64 pos;
1653 if (new_mode != VOIDmode
1654 && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1655 != CODE_FOR_nothing)
1656 && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1658 class expand_operand ops[3];
1659 machine_mode outermode = new_mode;
1660 machine_mode innermode = tmode;
1661 enum insn_code icode
1662 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1664 if (new_mode != GET_MODE (op0))
1665 op0 = gen_lowpart (new_mode, op0);
1666 create_output_operand (&ops[0], target, innermode);
1667 ops[0].target = 1;
1668 create_input_operand (&ops[1], op0, outermode);
1669 create_integer_operand (&ops[2], pos);
1670 if (maybe_expand_insn (icode, 3, ops))
1672 if (alt_rtl && ops[0].target)
1673 *alt_rtl = target;
1674 target = ops[0].value;
1675 if (GET_MODE (target) != mode)
1676 return gen_lowpart (tmode, target);
1677 return target;
1682 /* See if we can get a better vector mode before extracting. */
1683 if (VECTOR_MODE_P (GET_MODE (op0))
1684 && !MEM_P (op0)
1685 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1687 machine_mode new_mode;
1689 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1690 new_mode = MIN_MODE_VECTOR_FLOAT;
1691 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1692 new_mode = MIN_MODE_VECTOR_FRACT;
1693 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1694 new_mode = MIN_MODE_VECTOR_UFRACT;
1695 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1696 new_mode = MIN_MODE_VECTOR_ACCUM;
1697 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1698 new_mode = MIN_MODE_VECTOR_UACCUM;
1699 else
1700 new_mode = MIN_MODE_VECTOR_INT;
1702 FOR_EACH_MODE_FROM (new_mode, new_mode)
1703 if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1704 && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1705 && targetm.vector_mode_supported_p (new_mode))
1706 break;
1707 if (new_mode != VOIDmode)
1708 op0 = gen_lowpart (new_mode, op0);
1711 /* Use vec_extract patterns for extracting parts of vectors whenever
1712 available. If that fails, see whether the current modes and bitregion
1713 give a natural subreg. */
1714 machine_mode outermode = GET_MODE (op0);
1715 if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1717 scalar_mode innermode = GET_MODE_INNER (outermode);
1718 enum insn_code icode
1719 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1720 poly_uint64 pos;
1721 if (icode != CODE_FOR_nothing
1722 && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1723 && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1725 class expand_operand ops[3];
1727 create_output_operand (&ops[0], target, innermode);
1728 ops[0].target = 1;
1729 create_input_operand (&ops[1], op0, outermode);
1730 create_integer_operand (&ops[2], pos);
1731 if (maybe_expand_insn (icode, 3, ops))
1733 if (alt_rtl && ops[0].target)
1734 *alt_rtl = target;
1735 target = ops[0].value;
1736 if (GET_MODE (target) != mode)
1737 return gen_lowpart (tmode, target);
1738 return target;
1741 /* Using subregs is useful if we're extracting one register vector
1742 from a multi-register vector. extract_bit_field_as_subreg checks
1743 for valid bitsize and bitnum, so we don't need to do that here. */
1744 if (VECTOR_MODE_P (mode))
1746 rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1747 if (sub)
1748 return sub;
1752 /* Make sure we are playing with integral modes. Pun with subregs
1753 if we aren't. */
1754 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1755 scalar_int_mode imode;
1756 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1758 if (MEM_P (op0))
1759 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1760 0, MEM_SIZE (op0));
1761 else if (op0_mode.exists (&imode))
1763 op0 = gen_lowpart (imode, op0);
1765 /* If we got a SUBREG, force it into a register since we
1766 aren't going to be able to do another SUBREG on it. */
1767 if (GET_CODE (op0) == SUBREG)
1768 op0 = force_reg (imode, op0);
1770 else
1772 poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1773 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1774 emit_move_insn (mem, op0);
1775 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1779 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1780 If that's wrong, the solution is to test for it and set TARGET to 0
1781 if needed. */
1783 /* Get the mode of the field to use for atomic access or subreg
1784 conversion. */
1785 if (!SCALAR_INT_MODE_P (tmode)
1786 || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1787 mode1 = mode;
1788 gcc_assert (mode1 != BLKmode);
1790 /* Extraction of a full MODE1 value can be done with a subreg as long
1791 as the least significant bit of the value is the least significant
1792 bit of either OP0 or a word of OP0. */
1793 if (!MEM_P (op0) && !reverse)
1795 rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1796 if (sub)
1797 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1800 /* Extraction of a full MODE1 value can be done with a load as long as
1801 the field is on a byte boundary and is sufficiently aligned. */
1802 poly_uint64 bytenum;
1803 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1805 op0 = adjust_bitfield_address (op0, mode1, bytenum);
1806 if (reverse)
1807 op0 = flip_storage_order (mode1, op0);
1808 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1811 /* If we have a memory source and a non-constant bit offset, restrict
1812 the memory to the referenced bytes. This is a worst-case fallback
1813 but is useful for things like vector booleans. */
1814 if (MEM_P (op0) && !bitnum.is_constant ())
1816 bytenum = bits_to_bytes_round_down (bitnum);
1817 bitnum = num_trailing_bits (bitnum);
1818 poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1819 op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1820 op0_mode = opt_scalar_int_mode ();
1823 /* It's possible we'll need to handle other cases here for
1824 polynomial bitnum and bitsize. */
1826 /* From here on we need to be looking at a fixed-size insertion. */
1827 return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1828 bitnum.to_constant (), unsignedp,
1829 target, mode, tmode, reverse, fallback_p);
1832 /* Subroutine of extract_bit_field_1, with the same arguments, except
1833 that BITSIZE and BITNUM are constant. Handle cases specific to
1834 integral modes. If OP0_MODE is defined, it is the mode of OP0,
1835 otherwise OP0 is a BLKmode MEM. */
1837 static rtx
1838 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1839 unsigned HOST_WIDE_INT bitsize,
1840 unsigned HOST_WIDE_INT bitnum, int unsignedp,
1841 rtx target, machine_mode mode, machine_mode tmode,
1842 bool reverse, bool fallback_p)
1844 /* Handle fields bigger than a word. */
1846 if (bitsize > BITS_PER_WORD)
1848 /* Here we transfer the words of the field
1849 in the order least significant first.
1850 This is because the most significant word is the one which may
1851 be less than full. */
1853 const bool backwards = WORDS_BIG_ENDIAN;
1854 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1855 unsigned int i;
1856 rtx_insn *last;
1858 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1859 target = gen_reg_rtx (mode);
1861 /* In case we're about to clobber a base register or something
1862 (see gcc.c-torture/execute/20040625-1.c). */
1863 if (reg_mentioned_p (target, op0))
1864 target = gen_reg_rtx (mode);
1866 /* Indicate for flow that the entire target reg is being set. */
1867 emit_clobber (target);
1869 /* The mode must be fixed-size, since extract_bit_field_1 handles
1870 extractions from variable-sized objects before calling this
1871 function. */
1872 unsigned int target_size
1873 = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1874 last = get_last_insn ();
1875 for (i = 0; i < nwords; i++)
1877 /* If I is 0, use the low-order word in both field and target;
1878 if I is 1, use the next to lowest word; and so on. */
1879 /* Word number in TARGET to use. */
1880 unsigned int wordnum
1881 = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1882 /* Offset from start of field in OP0. */
1883 unsigned int bit_offset = (backwards ^ reverse
1884 ? MAX ((int) bitsize - ((int) i + 1)
1885 * BITS_PER_WORD,
1887 : (int) i * BITS_PER_WORD);
1888 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1889 rtx result_part
1890 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1891 bitsize - i * BITS_PER_WORD),
1892 bitnum + bit_offset, 1, target_part,
1893 mode, word_mode, reverse, fallback_p, NULL);
1895 gcc_assert (target_part);
1896 if (!result_part)
1898 delete_insns_since (last);
1899 return NULL;
1902 if (result_part != target_part)
1903 emit_move_insn (target_part, result_part);
1906 if (unsignedp)
1908 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1909 need to be zero'd out. */
1910 if (target_size > nwords * UNITS_PER_WORD)
1912 unsigned int i, total_words;
1914 total_words = target_size / UNITS_PER_WORD;
1915 for (i = nwords; i < total_words; i++)
1916 emit_move_insn
1917 (operand_subword (target,
1918 backwards ? total_words - i - 1 : i,
1919 1, VOIDmode),
1920 const0_rtx);
1922 return target;
1925 /* Signed bit field: sign-extend with two arithmetic shifts. */
1926 target = expand_shift (LSHIFT_EXPR, mode, target,
1927 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1928 return expand_shift (RSHIFT_EXPR, mode, target,
1929 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1932 /* If OP0 is a multi-word register, narrow it to the affected word.
1933 If the region spans two words, defer to extract_split_bit_field. */
1934 if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1936 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1938 if (!fallback_p)
1939 return NULL_RTX;
1940 target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1941 unsignedp, reverse);
1942 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1944 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1945 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1946 op0_mode = word_mode;
1947 bitnum %= BITS_PER_WORD;
1950 /* From here on we know the desired field is smaller than a word.
1951 If OP0 is a register, it too fits within a word. */
1952 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1953 extraction_insn extv;
1954 if (!MEM_P (op0)
1955 && !reverse
1956 /* ??? We could limit the structure size to the part of OP0 that
1957 contains the field, with appropriate checks for endianness
1958 and TARGET_TRULY_NOOP_TRUNCATION. */
1959 && get_best_reg_extraction_insn (&extv, pattern,
1960 GET_MODE_BITSIZE (op0_mode.require ()),
1961 tmode))
1963 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1964 bitsize, bitnum,
1965 unsignedp, target, mode,
1966 tmode);
1967 if (result)
1968 return result;
1971 /* If OP0 is a memory, try copying it to a register and seeing if a
1972 cheap register alternative is available. */
1973 if (MEM_P (op0) & !reverse)
1975 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1976 tmode))
1978 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1979 bitsize, bitnum,
1980 unsignedp, target, mode,
1981 tmode);
1982 if (result)
1983 return result;
1986 rtx_insn *last = get_last_insn ();
1988 /* Try loading part of OP0 into a register and extracting the
1989 bitfield from that. */
1990 unsigned HOST_WIDE_INT bitpos;
1991 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1992 0, 0, tmode, &bitpos);
1993 if (xop0)
1995 xop0 = copy_to_reg (xop0);
1996 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1997 unsignedp, target,
1998 mode, tmode, reverse, false, NULL);
1999 if (result)
2000 return result;
2001 delete_insns_since (last);
2005 if (!fallback_p)
2006 return NULL;
2008 /* Find a correspondingly-sized integer field, so we can apply
2009 shifts and masks to it. */
2010 scalar_int_mode int_mode;
2011 if (!int_mode_for_mode (tmode).exists (&int_mode))
2012 /* If this fails, we should probably push op0 out to memory and then
2013 do a load. */
2014 int_mode = int_mode_for_mode (mode).require ();
2016 target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2017 bitnum, target, unsignedp, reverse);
2019 /* Complex values must be reversed piecewise, so we need to undo the global
2020 reversal, convert to the complex mode and reverse again. */
2021 if (reverse && COMPLEX_MODE_P (tmode))
2023 target = flip_storage_order (int_mode, target);
2024 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2025 target = flip_storage_order (tmode, target);
2027 else
2028 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2030 return target;
2033 /* Generate code to extract a byte-field from STR_RTX
2034 containing BITSIZE bits, starting at BITNUM,
2035 and put it in TARGET if possible (if TARGET is nonzero).
2036 Regardless of TARGET, we return the rtx for where the value is placed.
2038 STR_RTX is the structure containing the byte (a REG or MEM).
2039 UNSIGNEDP is nonzero if this is an unsigned bit field.
2040 MODE is the natural mode of the field value once extracted.
2041 TMODE is the mode the caller would like the value to have;
2042 but the value may be returned with type MODE instead.
2044 If REVERSE is true, the extraction is to be done in reverse order.
2046 If a TARGET is specified and we can store in it at no extra cost,
2047 we do so, and return TARGET.
2048 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2049 if they are equally easy.
2051 If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2052 then *ALT_RTL is set to TARGET (before legitimziation). */
2055 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2056 int unsignedp, rtx target, machine_mode mode,
2057 machine_mode tmode, bool reverse, rtx *alt_rtl)
2059 machine_mode mode1;
2061 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
2062 if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2063 mode1 = GET_MODE (str_rtx);
2064 else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2065 mode1 = GET_MODE (target);
2066 else
2067 mode1 = tmode;
2069 unsigned HOST_WIDE_INT ibitsize, ibitnum;
2070 scalar_int_mode int_mode;
2071 if (bitsize.is_constant (&ibitsize)
2072 && bitnum.is_constant (&ibitnum)
2073 && is_a <scalar_int_mode> (mode1, &int_mode)
2074 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2075 int_mode, 0, 0))
2077 /* Extraction of a full INT_MODE value can be done with a simple load.
2078 We know here that the field can be accessed with one single
2079 instruction. For targets that support unaligned memory,
2080 an unaligned access may be necessary. */
2081 if (ibitsize == GET_MODE_BITSIZE (int_mode))
2083 rtx result = adjust_bitfield_address (str_rtx, int_mode,
2084 ibitnum / BITS_PER_UNIT);
2085 if (reverse)
2086 result = flip_storage_order (int_mode, result);
2087 gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2088 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2091 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2092 &ibitnum);
2093 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2094 str_rtx = copy_to_reg (str_rtx);
2095 return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2096 target, mode, tmode, reverse, true, alt_rtl);
2099 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2100 target, mode, tmode, reverse, true, alt_rtl);
2103 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2104 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
2105 otherwise OP0 is a BLKmode MEM.
2107 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2108 If REVERSE is true, the extraction is to be done in reverse order.
2110 If TARGET is nonzero, attempts to store the value there
2111 and return TARGET, but this is not guaranteed.
2112 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2114 static rtx
2115 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2116 opt_scalar_int_mode op0_mode,
2117 unsigned HOST_WIDE_INT bitsize,
2118 unsigned HOST_WIDE_INT bitnum, rtx target,
2119 int unsignedp, bool reverse)
2121 scalar_int_mode mode;
2122 if (MEM_P (op0))
2124 if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2125 BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2126 /* The only way this should occur is if the field spans word
2127 boundaries. */
2128 return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2129 unsignedp, reverse);
2131 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2133 else
2134 mode = op0_mode.require ();
2136 return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2137 target, unsignedp, reverse);
2140 /* Helper function for extract_fixed_bit_field, extracts
2141 the bit field always using MODE, which is the mode of OP0.
2142 The other arguments are as for extract_fixed_bit_field. */
2144 static rtx
2145 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2146 unsigned HOST_WIDE_INT bitsize,
2147 unsigned HOST_WIDE_INT bitnum, rtx target,
2148 int unsignedp, bool reverse)
2150 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2151 for invalid input, such as extract equivalent of f5 from
2152 gcc.dg/pr48335-2.c. */
2154 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2155 /* BITNUM is the distance between our msb and that of OP0.
2156 Convert it to the distance from the lsb. */
2157 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2159 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2160 We have reduced the big-endian case to the little-endian case. */
2161 if (reverse)
2162 op0 = flip_storage_order (mode, op0);
2164 if (unsignedp)
2166 if (bitnum)
2168 /* If the field does not already start at the lsb,
2169 shift it so it does. */
2170 /* Maybe propagate the target for the shift. */
2171 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2172 if (tmode != mode)
2173 subtarget = 0;
2174 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2176 /* Convert the value to the desired mode. TMODE must also be a
2177 scalar integer for this conversion to make sense, since we
2178 shouldn't reinterpret the bits. */
2179 scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2180 if (mode != new_mode)
2181 op0 = convert_to_mode (new_mode, op0, 1);
2183 /* Unless the msb of the field used to be the msb when we shifted,
2184 mask out the upper bits. */
2186 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2187 return expand_binop (new_mode, and_optab, op0,
2188 mask_rtx (new_mode, 0, bitsize, 0),
2189 target, 1, OPTAB_LIB_WIDEN);
2190 return op0;
2193 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2194 then arithmetic-shift its lsb to the lsb of the word. */
2195 op0 = force_reg (mode, op0);
2197 /* Find the narrowest integer mode that contains the field. */
2199 opt_scalar_int_mode mode_iter;
2200 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2201 if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2202 break;
2204 mode = mode_iter.require ();
2205 op0 = convert_to_mode (mode, op0, 0);
2207 if (mode != tmode)
2208 target = 0;
2210 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2212 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2213 /* Maybe propagate the target for the shift. */
2214 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2215 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2218 return expand_shift (RSHIFT_EXPR, mode, op0,
2219 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2222 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2223 VALUE << BITPOS. */
2225 static rtx
2226 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2227 int bitpos)
2229 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2232 /* Extract a bit field that is split across two words
2233 and return an RTX for the result.
2235 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2236 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2237 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2238 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2239 a BLKmode MEM.
2241 If REVERSE is true, the extraction is to be done in reverse order. */
2243 static rtx
2244 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2245 unsigned HOST_WIDE_INT bitsize,
2246 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2247 bool reverse)
2249 unsigned int unit;
2250 unsigned int bitsdone = 0;
2251 rtx result = NULL_RTX;
2252 int first = 1;
2254 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2255 much at a time. */
2256 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2257 unit = BITS_PER_WORD;
2258 else
2259 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2261 while (bitsdone < bitsize)
2263 unsigned HOST_WIDE_INT thissize;
2264 rtx part;
2265 unsigned HOST_WIDE_INT thispos;
2266 unsigned HOST_WIDE_INT offset;
2268 offset = (bitpos + bitsdone) / unit;
2269 thispos = (bitpos + bitsdone) % unit;
2271 /* THISSIZE must not overrun a word boundary. Otherwise,
2272 extract_fixed_bit_field will call us again, and we will mutually
2273 recurse forever. */
2274 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2275 thissize = MIN (thissize, unit - thispos);
2277 /* If OP0 is a register, then handle OFFSET here. */
2278 rtx op0_piece = op0;
2279 opt_scalar_int_mode op0_piece_mode = op0_mode;
2280 if (SUBREG_P (op0) || REG_P (op0))
2282 op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2283 op0_piece_mode = word_mode;
2284 offset = 0;
2287 /* Extract the parts in bit-counting order,
2288 whose meaning is determined by BYTES_PER_UNIT.
2289 OFFSET is in UNITs, and UNIT is in bits. */
2290 part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2291 thissize, offset * unit + thispos,
2292 0, 1, reverse);
2293 bitsdone += thissize;
2295 /* Shift this part into place for the result. */
2296 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2298 if (bitsize != bitsdone)
2299 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2300 bitsize - bitsdone, 0, 1);
2302 else
2304 if (bitsdone != thissize)
2305 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2306 bitsdone - thissize, 0, 1);
2309 if (first)
2310 result = part;
2311 else
2312 /* Combine the parts with bitwise or. This works
2313 because we extracted each part as an unsigned bit field. */
2314 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2315 OPTAB_LIB_WIDEN);
2317 first = 0;
2320 /* Unsigned bit field: we are done. */
2321 if (unsignedp)
2322 return result;
2323 /* Signed bit field: sign-extend with two arithmetic shifts. */
2324 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2325 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2326 return expand_shift (RSHIFT_EXPR, word_mode, result,
2327 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2330 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2331 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2332 MODE, fill the upper bits with zeros. Fail if the layout of either
2333 mode is unknown (as for CC modes) or if the extraction would involve
2334 unprofitable mode punning. Return the value on success, otherwise
2335 return null.
2337 This is different from gen_lowpart* in these respects:
2339 - the returned value must always be considered an rvalue
2341 - when MODE is wider than SRC_MODE, the extraction involves
2342 a zero extension
2344 - when MODE is smaller than SRC_MODE, the extraction involves
2345 a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2347 In other words, this routine performs a computation, whereas the
2348 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2349 operations. */
2352 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2354 scalar_int_mode int_mode, src_int_mode;
2356 if (mode == src_mode)
2357 return src;
2359 if (CONSTANT_P (src))
2361 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2362 fails, it will happily create (subreg (symbol_ref)) or similar
2363 invalid SUBREGs. */
2364 poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2365 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2366 if (ret)
2367 return ret;
2369 if (GET_MODE (src) == VOIDmode
2370 || !validate_subreg (mode, src_mode, src, byte))
2371 return NULL_RTX;
2373 src = force_reg (GET_MODE (src), src);
2374 return gen_rtx_SUBREG (mode, src, byte);
2377 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2378 return NULL_RTX;
2380 if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2381 && targetm.modes_tieable_p (mode, src_mode))
2383 rtx x = gen_lowpart_common (mode, src);
2384 if (x)
2385 return x;
2388 if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2389 || !int_mode_for_mode (mode).exists (&int_mode))
2390 return NULL_RTX;
2392 if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2393 return NULL_RTX;
2394 if (!targetm.modes_tieable_p (int_mode, mode))
2395 return NULL_RTX;
2397 src = gen_lowpart (src_int_mode, src);
2398 if (!validate_subreg (int_mode, src_int_mode, src,
2399 subreg_lowpart_offset (int_mode, src_int_mode)))
2400 return NULL_RTX;
2402 src = convert_modes (int_mode, src_int_mode, src, true);
2403 src = gen_lowpart (mode, src);
2404 return src;
2407 /* Add INC into TARGET. */
2409 void
2410 expand_inc (rtx target, rtx inc)
2412 rtx value = expand_binop (GET_MODE (target), add_optab,
2413 target, inc,
2414 target, 0, OPTAB_LIB_WIDEN);
2415 if (value != target)
2416 emit_move_insn (target, value);
2419 /* Subtract DEC from TARGET. */
2421 void
2422 expand_dec (rtx target, rtx dec)
2424 rtx value = expand_binop (GET_MODE (target), sub_optab,
2425 target, dec,
2426 target, 0, OPTAB_LIB_WIDEN);
2427 if (value != target)
2428 emit_move_insn (target, value);
2431 /* Output a shift instruction for expression code CODE,
2432 with SHIFTED being the rtx for the value to shift,
2433 and AMOUNT the rtx for the amount to shift by.
2434 Store the result in the rtx TARGET, if that is convenient.
2435 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2436 Return the rtx for where the value is.
2437 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2438 in which case 0 is returned. */
2440 static rtx
2441 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2442 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2444 rtx op1, temp = 0;
2445 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2446 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2447 optab lshift_optab = ashl_optab;
2448 optab rshift_arith_optab = ashr_optab;
2449 optab rshift_uns_optab = lshr_optab;
2450 optab lrotate_optab = rotl_optab;
2451 optab rrotate_optab = rotr_optab;
2452 machine_mode op1_mode;
2453 scalar_mode scalar_mode = GET_MODE_INNER (mode);
2454 int attempt;
2455 bool speed = optimize_insn_for_speed_p ();
2457 op1 = amount;
2458 op1_mode = GET_MODE (op1);
2460 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2461 shift amount is a vector, use the vector/vector shift patterns. */
2462 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2464 lshift_optab = vashl_optab;
2465 rshift_arith_optab = vashr_optab;
2466 rshift_uns_optab = vlshr_optab;
2467 lrotate_optab = vrotl_optab;
2468 rrotate_optab = vrotr_optab;
2471 /* Previously detected shift-counts computed by NEGATE_EXPR
2472 and shifted in the other direction; but that does not work
2473 on all machines. */
2475 if (SHIFT_COUNT_TRUNCATED)
2477 if (CONST_INT_P (op1)
2478 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2479 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2480 op1 = gen_int_shift_amount (mode,
2481 (unsigned HOST_WIDE_INT) INTVAL (op1)
2482 % GET_MODE_BITSIZE (scalar_mode));
2483 else if (GET_CODE (op1) == SUBREG
2484 && subreg_lowpart_p (op1)
2485 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2486 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2487 op1 = SUBREG_REG (op1);
2490 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2491 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2492 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2493 amount instead. */
2494 if (rotate
2495 && CONST_INT_P (op1)
2496 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2497 GET_MODE_BITSIZE (scalar_mode) - 1))
2499 op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2500 - INTVAL (op1)));
2501 left = !left;
2502 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2505 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2506 Note that this is not the case for bigger values. For instance a rotation
2507 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2508 0x04030201 (bswapsi). */
2509 if (rotate
2510 && CONST_INT_P (op1)
2511 && INTVAL (op1) == BITS_PER_UNIT
2512 && GET_MODE_SIZE (scalar_mode) == 2
2513 && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2514 return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2516 if (op1 == const0_rtx)
2517 return shifted;
2519 /* Check whether its cheaper to implement a left shift by a constant
2520 bit count by a sequence of additions. */
2521 if (code == LSHIFT_EXPR
2522 && CONST_INT_P (op1)
2523 && INTVAL (op1) > 0
2524 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2525 && INTVAL (op1) < MAX_BITS_PER_WORD
2526 && (shift_cost (speed, mode, INTVAL (op1))
2527 > INTVAL (op1) * add_cost (speed, mode))
2528 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2530 int i;
2531 for (i = 0; i < INTVAL (op1); i++)
2533 temp = force_reg (mode, shifted);
2534 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2535 unsignedp, OPTAB_LIB_WIDEN);
2537 return shifted;
2540 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2542 enum optab_methods methods;
2544 if (attempt == 0)
2545 methods = OPTAB_DIRECT;
2546 else if (attempt == 1)
2547 methods = OPTAB_WIDEN;
2548 else
2549 methods = OPTAB_LIB_WIDEN;
2551 if (rotate)
2553 /* Widening does not work for rotation. */
2554 if (methods == OPTAB_WIDEN)
2555 continue;
2556 else if (methods == OPTAB_LIB_WIDEN)
2558 /* If we have been unable to open-code this by a rotation,
2559 do it as the IOR of two shifts. I.e., to rotate A
2560 by N bits, compute
2561 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2562 where C is the bitsize of A.
2564 It is theoretically possible that the target machine might
2565 not be able to perform either shift and hence we would
2566 be making two libcalls rather than just the one for the
2567 shift (similarly if IOR could not be done). We will allow
2568 this extremely unlikely lossage to avoid complicating the
2569 code below. */
2571 rtx subtarget = target == shifted ? 0 : target;
2572 rtx new_amount, other_amount;
2573 rtx temp1;
2575 new_amount = op1;
2576 if (op1 == const0_rtx)
2577 return shifted;
2578 else if (CONST_INT_P (op1))
2579 other_amount = gen_int_shift_amount
2580 (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2581 else
2583 other_amount
2584 = simplify_gen_unary (NEG, GET_MODE (op1),
2585 op1, GET_MODE (op1));
2586 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2587 other_amount
2588 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2589 gen_int_mode (mask, GET_MODE (op1)));
2592 shifted = force_reg (mode, shifted);
2594 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2595 mode, shifted, new_amount, 0, 1);
2596 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2597 mode, shifted, other_amount,
2598 subtarget, 1);
2599 return expand_binop (mode, ior_optab, temp, temp1, target,
2600 unsignedp, methods);
2603 temp = expand_binop (mode,
2604 left ? lrotate_optab : rrotate_optab,
2605 shifted, op1, target, unsignedp, methods);
2607 else if (unsignedp)
2608 temp = expand_binop (mode,
2609 left ? lshift_optab : rshift_uns_optab,
2610 shifted, op1, target, unsignedp, methods);
2612 /* Do arithmetic shifts.
2613 Also, if we are going to widen the operand, we can just as well
2614 use an arithmetic right-shift instead of a logical one. */
2615 if (temp == 0 && ! rotate
2616 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2618 enum optab_methods methods1 = methods;
2620 /* If trying to widen a log shift to an arithmetic shift,
2621 don't accept an arithmetic shift of the same size. */
2622 if (unsignedp)
2623 methods1 = OPTAB_MUST_WIDEN;
2625 /* Arithmetic shift */
2627 temp = expand_binop (mode,
2628 left ? lshift_optab : rshift_arith_optab,
2629 shifted, op1, target, unsignedp, methods1);
2632 /* We used to try extzv here for logical right shifts, but that was
2633 only useful for one machine, the VAX, and caused poor code
2634 generation there for lshrdi3, so the code was deleted and a
2635 define_expand for lshrsi3 was added to vax.md. */
2638 gcc_assert (temp != NULL_RTX || may_fail);
2639 return temp;
2642 /* Output a shift instruction for expression code CODE,
2643 with SHIFTED being the rtx for the value to shift,
2644 and AMOUNT the amount to shift by.
2645 Store the result in the rtx TARGET, if that is convenient.
2646 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2647 Return the rtx for where the value is. */
2650 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2651 poly_int64 amount, rtx target, int unsignedp)
2653 return expand_shift_1 (code, mode, shifted,
2654 gen_int_shift_amount (mode, amount),
2655 target, unsignedp);
2658 /* Likewise, but return 0 if that cannot be done. */
2660 static rtx
2661 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2662 int amount, rtx target, int unsignedp)
2664 return expand_shift_1 (code, mode,
2665 shifted, GEN_INT (amount), target, unsignedp, true);
2668 /* Output a shift instruction for expression code CODE,
2669 with SHIFTED being the rtx for the value to shift,
2670 and AMOUNT the tree for the amount to shift by.
2671 Store the result in the rtx TARGET, if that is convenient.
2672 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2673 Return the rtx for where the value is. */
2676 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2677 tree amount, rtx target, int unsignedp)
2679 return expand_shift_1 (code, mode,
2680 shifted, expand_normal (amount), target, unsignedp);
2684 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2685 const struct mult_cost *, machine_mode mode);
2686 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2687 const struct algorithm *, enum mult_variant);
2688 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2689 static rtx extract_high_half (scalar_int_mode, rtx);
2690 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2691 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2692 int, int);
2693 /* Compute and return the best algorithm for multiplying by T.
2694 The algorithm must cost less than cost_limit
2695 If retval.cost >= COST_LIMIT, no algorithm was found and all
2696 other field of the returned struct are undefined.
2697 MODE is the machine mode of the multiplication. */
2699 static void
2700 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2701 const struct mult_cost *cost_limit, machine_mode mode)
2703 int m;
2704 struct algorithm *alg_in, *best_alg;
2705 struct mult_cost best_cost;
2706 struct mult_cost new_limit;
2707 int op_cost, op_latency;
2708 unsigned HOST_WIDE_INT orig_t = t;
2709 unsigned HOST_WIDE_INT q;
2710 int maxm, hash_index;
2711 bool cache_hit = false;
2712 enum alg_code cache_alg = alg_zero;
2713 bool speed = optimize_insn_for_speed_p ();
2714 scalar_int_mode imode;
2715 struct alg_hash_entry *entry_ptr;
2717 /* Indicate that no algorithm is yet found. If no algorithm
2718 is found, this value will be returned and indicate failure. */
2719 alg_out->cost.cost = cost_limit->cost + 1;
2720 alg_out->cost.latency = cost_limit->latency + 1;
2722 if (cost_limit->cost < 0
2723 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2724 return;
2726 /* Be prepared for vector modes. */
2727 imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2729 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2731 /* Restrict the bits of "t" to the multiplication's mode. */
2732 t &= GET_MODE_MASK (imode);
2734 /* t == 1 can be done in zero cost. */
2735 if (t == 1)
2737 alg_out->ops = 1;
2738 alg_out->cost.cost = 0;
2739 alg_out->cost.latency = 0;
2740 alg_out->op[0] = alg_m;
2741 return;
2744 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2745 fail now. */
2746 if (t == 0)
2748 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2749 return;
2750 else
2752 alg_out->ops = 1;
2753 alg_out->cost.cost = zero_cost (speed);
2754 alg_out->cost.latency = zero_cost (speed);
2755 alg_out->op[0] = alg_zero;
2756 return;
2760 /* We'll be needing a couple extra algorithm structures now. */
2762 alg_in = XALLOCA (struct algorithm);
2763 best_alg = XALLOCA (struct algorithm);
2764 best_cost = *cost_limit;
2766 /* Compute the hash index. */
2767 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2769 /* See if we already know what to do for T. */
2770 entry_ptr = alg_hash_entry_ptr (hash_index);
2771 if (entry_ptr->t == t
2772 && entry_ptr->mode == mode
2773 && entry_ptr->speed == speed
2774 && entry_ptr->alg != alg_unknown)
2776 cache_alg = entry_ptr->alg;
2778 if (cache_alg == alg_impossible)
2780 /* The cache tells us that it's impossible to synthesize
2781 multiplication by T within entry_ptr->cost. */
2782 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2783 /* COST_LIMIT is at least as restrictive as the one
2784 recorded in the hash table, in which case we have no
2785 hope of synthesizing a multiplication. Just
2786 return. */
2787 return;
2789 /* If we get here, COST_LIMIT is less restrictive than the
2790 one recorded in the hash table, so we may be able to
2791 synthesize a multiplication. Proceed as if we didn't
2792 have the cache entry. */
2794 else
2796 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2797 /* The cached algorithm shows that this multiplication
2798 requires more cost than COST_LIMIT. Just return. This
2799 way, we don't clobber this cache entry with
2800 alg_impossible but retain useful information. */
2801 return;
2803 cache_hit = true;
2805 switch (cache_alg)
2807 case alg_shift:
2808 goto do_alg_shift;
2810 case alg_add_t_m2:
2811 case alg_sub_t_m2:
2812 goto do_alg_addsub_t_m2;
2814 case alg_add_factor:
2815 case alg_sub_factor:
2816 goto do_alg_addsub_factor;
2818 case alg_add_t2_m:
2819 goto do_alg_add_t2_m;
2821 case alg_sub_t2_m:
2822 goto do_alg_sub_t2_m;
2824 default:
2825 gcc_unreachable ();
2830 /* If we have a group of zero bits at the low-order part of T, try
2831 multiplying by the remaining bits and then doing a shift. */
2833 if ((t & 1) == 0)
2835 do_alg_shift:
2836 m = ctz_or_zero (t); /* m = number of low zero bits */
2837 if (m < maxm)
2839 q = t >> m;
2840 /* The function expand_shift will choose between a shift and
2841 a sequence of additions, so the observed cost is given as
2842 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2843 op_cost = m * add_cost (speed, mode);
2844 if (shift_cost (speed, mode, m) < op_cost)
2845 op_cost = shift_cost (speed, mode, m);
2846 new_limit.cost = best_cost.cost - op_cost;
2847 new_limit.latency = best_cost.latency - op_cost;
2848 synth_mult (alg_in, q, &new_limit, mode);
2850 alg_in->cost.cost += op_cost;
2851 alg_in->cost.latency += op_cost;
2852 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2854 best_cost = alg_in->cost;
2855 std::swap (alg_in, best_alg);
2856 best_alg->log[best_alg->ops] = m;
2857 best_alg->op[best_alg->ops] = alg_shift;
2860 /* See if treating ORIG_T as a signed number yields a better
2861 sequence. Try this sequence only for a negative ORIG_T
2862 as it would be useless for a non-negative ORIG_T. */
2863 if ((HOST_WIDE_INT) orig_t < 0)
2865 /* Shift ORIG_T as follows because a right shift of a
2866 negative-valued signed type is implementation
2867 defined. */
2868 q = ~(~orig_t >> m);
2869 /* The function expand_shift will choose between a shift
2870 and a sequence of additions, so the observed cost is
2871 given as MIN (m * add_cost(speed, mode),
2872 shift_cost(speed, mode, m)). */
2873 op_cost = m * add_cost (speed, mode);
2874 if (shift_cost (speed, mode, m) < op_cost)
2875 op_cost = shift_cost (speed, mode, m);
2876 new_limit.cost = best_cost.cost - op_cost;
2877 new_limit.latency = best_cost.latency - op_cost;
2878 synth_mult (alg_in, q, &new_limit, mode);
2880 alg_in->cost.cost += op_cost;
2881 alg_in->cost.latency += op_cost;
2882 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2884 best_cost = alg_in->cost;
2885 std::swap (alg_in, best_alg);
2886 best_alg->log[best_alg->ops] = m;
2887 best_alg->op[best_alg->ops] = alg_shift;
2891 if (cache_hit)
2892 goto done;
2895 /* If we have an odd number, add or subtract one. */
2896 if ((t & 1) != 0)
2898 unsigned HOST_WIDE_INT w;
2900 do_alg_addsub_t_m2:
2901 for (w = 1; (w & t) != 0; w <<= 1)
2903 /* If T was -1, then W will be zero after the loop. This is another
2904 case where T ends with ...111. Handling this with (T + 1) and
2905 subtract 1 produces slightly better code and results in algorithm
2906 selection much faster than treating it like the ...0111 case
2907 below. */
2908 if (w == 0
2909 || (w > 2
2910 /* Reject the case where t is 3.
2911 Thus we prefer addition in that case. */
2912 && t != 3))
2914 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2916 op_cost = add_cost (speed, mode);
2917 new_limit.cost = best_cost.cost - op_cost;
2918 new_limit.latency = best_cost.latency - op_cost;
2919 synth_mult (alg_in, t + 1, &new_limit, mode);
2921 alg_in->cost.cost += op_cost;
2922 alg_in->cost.latency += op_cost;
2923 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2925 best_cost = alg_in->cost;
2926 std::swap (alg_in, best_alg);
2927 best_alg->log[best_alg->ops] = 0;
2928 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2931 else
2933 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2935 op_cost = add_cost (speed, mode);
2936 new_limit.cost = best_cost.cost - op_cost;
2937 new_limit.latency = best_cost.latency - op_cost;
2938 synth_mult (alg_in, t - 1, &new_limit, mode);
2940 alg_in->cost.cost += op_cost;
2941 alg_in->cost.latency += op_cost;
2942 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2944 best_cost = alg_in->cost;
2945 std::swap (alg_in, best_alg);
2946 best_alg->log[best_alg->ops] = 0;
2947 best_alg->op[best_alg->ops] = alg_add_t_m2;
2951 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2952 quickly with a - a * n for some appropriate constant n. */
2953 m = exact_log2 (-orig_t + 1);
2954 if (m >= 0 && m < maxm)
2956 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2957 /* If the target has a cheap shift-and-subtract insn use
2958 that in preference to a shift insn followed by a sub insn.
2959 Assume that the shift-and-sub is "atomic" with a latency
2960 equal to it's cost, otherwise assume that on superscalar
2961 hardware the shift may be executed concurrently with the
2962 earlier steps in the algorithm. */
2963 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2965 op_cost = shiftsub1_cost (speed, mode, m);
2966 op_latency = op_cost;
2968 else
2969 op_latency = add_cost (speed, mode);
2971 new_limit.cost = best_cost.cost - op_cost;
2972 new_limit.latency = best_cost.latency - op_latency;
2973 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2974 &new_limit, mode);
2976 alg_in->cost.cost += op_cost;
2977 alg_in->cost.latency += op_latency;
2978 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2980 best_cost = alg_in->cost;
2981 std::swap (alg_in, best_alg);
2982 best_alg->log[best_alg->ops] = m;
2983 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2987 if (cache_hit)
2988 goto done;
2991 /* Look for factors of t of the form
2992 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2993 If we find such a factor, we can multiply by t using an algorithm that
2994 multiplies by q, shift the result by m and add/subtract it to itself.
2996 We search for large factors first and loop down, even if large factors
2997 are less probable than small; if we find a large factor we will find a
2998 good sequence quickly, and therefore be able to prune (by decreasing
2999 COST_LIMIT) the search. */
3001 do_alg_addsub_factor:
3002 for (m = floor_log2 (t - 1); m >= 2; m--)
3004 unsigned HOST_WIDE_INT d;
3006 d = (HOST_WIDE_INT_1U << m) + 1;
3007 if (t % d == 0 && t > d && m < maxm
3008 && (!cache_hit || cache_alg == alg_add_factor))
3010 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3011 if (shiftadd_cost (speed, mode, m) <= op_cost)
3012 op_cost = shiftadd_cost (speed, mode, m);
3014 op_latency = op_cost;
3017 new_limit.cost = best_cost.cost - op_cost;
3018 new_limit.latency = best_cost.latency - op_latency;
3019 synth_mult (alg_in, t / d, &new_limit, mode);
3021 alg_in->cost.cost += op_cost;
3022 alg_in->cost.latency += op_latency;
3023 if (alg_in->cost.latency < op_cost)
3024 alg_in->cost.latency = op_cost;
3025 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3027 best_cost = alg_in->cost;
3028 std::swap (alg_in, best_alg);
3029 best_alg->log[best_alg->ops] = m;
3030 best_alg->op[best_alg->ops] = alg_add_factor;
3032 /* Other factors will have been taken care of in the recursion. */
3033 break;
3036 d = (HOST_WIDE_INT_1U << m) - 1;
3037 if (t % d == 0 && t > d && m < maxm
3038 && (!cache_hit || cache_alg == alg_sub_factor))
3040 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3041 if (shiftsub0_cost (speed, mode, m) <= op_cost)
3042 op_cost = shiftsub0_cost (speed, mode, m);
3044 op_latency = op_cost;
3046 new_limit.cost = best_cost.cost - op_cost;
3047 new_limit.latency = best_cost.latency - op_latency;
3048 synth_mult (alg_in, t / d, &new_limit, mode);
3050 alg_in->cost.cost += op_cost;
3051 alg_in->cost.latency += op_latency;
3052 if (alg_in->cost.latency < op_cost)
3053 alg_in->cost.latency = op_cost;
3054 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3056 best_cost = alg_in->cost;
3057 std::swap (alg_in, best_alg);
3058 best_alg->log[best_alg->ops] = m;
3059 best_alg->op[best_alg->ops] = alg_sub_factor;
3061 break;
3064 if (cache_hit)
3065 goto done;
3067 /* Try shift-and-add (load effective address) instructions,
3068 i.e. do a*3, a*5, a*9. */
3069 if ((t & 1) != 0)
3071 do_alg_add_t2_m:
3072 q = t - 1;
3073 m = ctz_hwi (q);
3074 if (q && m < maxm)
3076 op_cost = shiftadd_cost (speed, mode, m);
3077 new_limit.cost = best_cost.cost - op_cost;
3078 new_limit.latency = best_cost.latency - op_cost;
3079 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3081 alg_in->cost.cost += op_cost;
3082 alg_in->cost.latency += op_cost;
3083 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3085 best_cost = alg_in->cost;
3086 std::swap (alg_in, best_alg);
3087 best_alg->log[best_alg->ops] = m;
3088 best_alg->op[best_alg->ops] = alg_add_t2_m;
3091 if (cache_hit)
3092 goto done;
3094 do_alg_sub_t2_m:
3095 q = t + 1;
3096 m = ctz_hwi (q);
3097 if (q && m < maxm)
3099 op_cost = shiftsub0_cost (speed, mode, m);
3100 new_limit.cost = best_cost.cost - op_cost;
3101 new_limit.latency = best_cost.latency - op_cost;
3102 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3104 alg_in->cost.cost += op_cost;
3105 alg_in->cost.latency += op_cost;
3106 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3108 best_cost = alg_in->cost;
3109 std::swap (alg_in, best_alg);
3110 best_alg->log[best_alg->ops] = m;
3111 best_alg->op[best_alg->ops] = alg_sub_t2_m;
3114 if (cache_hit)
3115 goto done;
3118 done:
3119 /* If best_cost has not decreased, we have not found any algorithm. */
3120 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3122 /* We failed to find an algorithm. Record alg_impossible for
3123 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3124 we are asked to find an algorithm for T within the same or
3125 lower COST_LIMIT, we can immediately return to the
3126 caller. */
3127 entry_ptr->t = t;
3128 entry_ptr->mode = mode;
3129 entry_ptr->speed = speed;
3130 entry_ptr->alg = alg_impossible;
3131 entry_ptr->cost = *cost_limit;
3132 return;
3135 /* Cache the result. */
3136 if (!cache_hit)
3138 entry_ptr->t = t;
3139 entry_ptr->mode = mode;
3140 entry_ptr->speed = speed;
3141 entry_ptr->alg = best_alg->op[best_alg->ops];
3142 entry_ptr->cost.cost = best_cost.cost;
3143 entry_ptr->cost.latency = best_cost.latency;
3146 /* If we are getting a too long sequence for `struct algorithm'
3147 to record, make this search fail. */
3148 if (best_alg->ops == MAX_BITS_PER_WORD)
3149 return;
3151 /* Copy the algorithm from temporary space to the space at alg_out.
3152 We avoid using structure assignment because the majority of
3153 best_alg is normally undefined, and this is a critical function. */
3154 alg_out->ops = best_alg->ops + 1;
3155 alg_out->cost = best_cost;
3156 memcpy (alg_out->op, best_alg->op,
3157 alg_out->ops * sizeof *alg_out->op);
3158 memcpy (alg_out->log, best_alg->log,
3159 alg_out->ops * sizeof *alg_out->log);
3162 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3163 Try three variations:
3165 - a shift/add sequence based on VAL itself
3166 - a shift/add sequence based on -VAL, followed by a negation
3167 - a shift/add sequence based on VAL - 1, followed by an addition.
3169 Return true if the cheapest of these cost less than MULT_COST,
3170 describing the algorithm in *ALG and final fixup in *VARIANT. */
3172 bool
3173 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3174 struct algorithm *alg, enum mult_variant *variant,
3175 int mult_cost)
3177 struct algorithm alg2;
3178 struct mult_cost limit;
3179 int op_cost;
3180 bool speed = optimize_insn_for_speed_p ();
3182 /* Fail quickly for impossible bounds. */
3183 if (mult_cost < 0)
3184 return false;
3186 /* Ensure that mult_cost provides a reasonable upper bound.
3187 Any constant multiplication can be performed with less
3188 than 2 * bits additions. */
3189 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3190 if (mult_cost > op_cost)
3191 mult_cost = op_cost;
3193 *variant = basic_variant;
3194 limit.cost = mult_cost;
3195 limit.latency = mult_cost;
3196 synth_mult (alg, val, &limit, mode);
3198 /* This works only if the inverted value actually fits in an
3199 `unsigned int' */
3200 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3202 op_cost = neg_cost (speed, mode);
3203 if (MULT_COST_LESS (&alg->cost, mult_cost))
3205 limit.cost = alg->cost.cost - op_cost;
3206 limit.latency = alg->cost.latency - op_cost;
3208 else
3210 limit.cost = mult_cost - op_cost;
3211 limit.latency = mult_cost - op_cost;
3214 synth_mult (&alg2, -val, &limit, mode);
3215 alg2.cost.cost += op_cost;
3216 alg2.cost.latency += op_cost;
3217 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3218 *alg = alg2, *variant = negate_variant;
3221 /* This proves very useful for division-by-constant. */
3222 op_cost = add_cost (speed, mode);
3223 if (MULT_COST_LESS (&alg->cost, mult_cost))
3225 limit.cost = alg->cost.cost - op_cost;
3226 limit.latency = alg->cost.latency - op_cost;
3228 else
3230 limit.cost = mult_cost - op_cost;
3231 limit.latency = mult_cost - op_cost;
3234 synth_mult (&alg2, val - 1, &limit, mode);
3235 alg2.cost.cost += op_cost;
3236 alg2.cost.latency += op_cost;
3237 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3238 *alg = alg2, *variant = add_variant;
3240 return MULT_COST_LESS (&alg->cost, mult_cost);
3243 /* A subroutine of expand_mult, used for constant multiplications.
3244 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3245 convenient. Use the shift/add sequence described by ALG and apply
3246 the final fixup specified by VARIANT. */
3248 static rtx
3249 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3250 rtx target, const struct algorithm *alg,
3251 enum mult_variant variant)
3253 unsigned HOST_WIDE_INT val_so_far;
3254 rtx_insn *insn;
3255 rtx accum, tem;
3256 int opno;
3257 machine_mode nmode;
3259 /* Avoid referencing memory over and over and invalid sharing
3260 on SUBREGs. */
3261 op0 = force_reg (mode, op0);
3263 /* ACCUM starts out either as OP0 or as a zero, depending on
3264 the first operation. */
3266 if (alg->op[0] == alg_zero)
3268 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3269 val_so_far = 0;
3271 else if (alg->op[0] == alg_m)
3273 accum = copy_to_mode_reg (mode, op0);
3274 val_so_far = 1;
3276 else
3277 gcc_unreachable ();
3279 for (opno = 1; opno < alg->ops; opno++)
3281 int log = alg->log[opno];
3282 rtx shift_subtarget = optimize ? 0 : accum;
3283 rtx add_target
3284 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3285 && !optimize)
3286 ? target : 0;
3287 rtx accum_target = optimize ? 0 : accum;
3288 rtx accum_inner;
3290 switch (alg->op[opno])
3292 case alg_shift:
3293 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3294 /* REG_EQUAL note will be attached to the following insn. */
3295 emit_move_insn (accum, tem);
3296 val_so_far <<= log;
3297 break;
3299 case alg_add_t_m2:
3300 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3301 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3302 add_target ? add_target : accum_target);
3303 val_so_far += HOST_WIDE_INT_1U << log;
3304 break;
3306 case alg_sub_t_m2:
3307 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3308 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3309 add_target ? add_target : accum_target);
3310 val_so_far -= HOST_WIDE_INT_1U << log;
3311 break;
3313 case alg_add_t2_m:
3314 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3315 log, shift_subtarget, 0);
3316 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3317 add_target ? add_target : accum_target);
3318 val_so_far = (val_so_far << log) + 1;
3319 break;
3321 case alg_sub_t2_m:
3322 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3323 log, shift_subtarget, 0);
3324 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3325 add_target ? add_target : accum_target);
3326 val_so_far = (val_so_far << log) - 1;
3327 break;
3329 case alg_add_factor:
3330 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3331 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3332 add_target ? add_target : accum_target);
3333 val_so_far += val_so_far << log;
3334 break;
3336 case alg_sub_factor:
3337 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3338 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3339 (add_target
3340 ? add_target : (optimize ? 0 : tem)));
3341 val_so_far = (val_so_far << log) - val_so_far;
3342 break;
3344 default:
3345 gcc_unreachable ();
3348 if (SCALAR_INT_MODE_P (mode))
3350 /* Write a REG_EQUAL note on the last insn so that we can cse
3351 multiplication sequences. Note that if ACCUM is a SUBREG,
3352 we've set the inner register and must properly indicate that. */
3353 tem = op0, nmode = mode;
3354 accum_inner = accum;
3355 if (GET_CODE (accum) == SUBREG)
3357 accum_inner = SUBREG_REG (accum);
3358 nmode = GET_MODE (accum_inner);
3359 tem = gen_lowpart (nmode, op0);
3362 /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3363 In that case, only the low bits of accum would be guaranteed to
3364 be equal to the content of the REG_EQUAL note, the upper bits
3365 can be anything. */
3366 if (!paradoxical_subreg_p (tem))
3368 insn = get_last_insn ();
3369 wide_int wval_so_far
3370 = wi::uhwi (val_so_far,
3371 GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3372 rtx c = immed_wide_int_const (wval_so_far, nmode);
3373 set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3374 accum_inner);
3379 if (variant == negate_variant)
3381 val_so_far = -val_so_far;
3382 accum = expand_unop (mode, neg_optab, accum, target, 0);
3384 else if (variant == add_variant)
3386 val_so_far = val_so_far + 1;
3387 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3390 /* Compare only the bits of val and val_so_far that are significant
3391 in the result mode, to avoid sign-/zero-extension confusion. */
3392 nmode = GET_MODE_INNER (mode);
3393 val &= GET_MODE_MASK (nmode);
3394 val_so_far &= GET_MODE_MASK (nmode);
3395 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3397 return accum;
3400 /* Perform a multiplication and return an rtx for the result.
3401 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3402 TARGET is a suggestion for where to store the result (an rtx).
3404 We check specially for a constant integer as OP1.
3405 If you want this check for OP0 as well, then before calling
3406 you should swap the two operands if OP0 would be constant. */
3409 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3410 int unsignedp, bool no_libcall)
3412 enum mult_variant variant;
3413 struct algorithm algorithm;
3414 rtx scalar_op1;
3415 int max_cost;
3416 bool speed = optimize_insn_for_speed_p ();
3417 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3419 if (CONSTANT_P (op0))
3420 std::swap (op0, op1);
3422 /* For vectors, there are several simplifications that can be made if
3423 all elements of the vector constant are identical. */
3424 scalar_op1 = unwrap_const_vec_duplicate (op1);
3426 if (INTEGRAL_MODE_P (mode))
3428 rtx fake_reg;
3429 HOST_WIDE_INT coeff;
3430 bool is_neg;
3431 int mode_bitsize;
3433 if (op1 == CONST0_RTX (mode))
3434 return op1;
3435 if (op1 == CONST1_RTX (mode))
3436 return op0;
3437 if (op1 == CONSTM1_RTX (mode))
3438 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3439 op0, target, 0);
3441 if (do_trapv)
3442 goto skip_synth;
3444 /* If mode is integer vector mode, check if the backend supports
3445 vector lshift (by scalar or vector) at all. If not, we can't use
3446 synthetized multiply. */
3447 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3448 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3449 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3450 goto skip_synth;
3452 /* These are the operations that are potentially turned into
3453 a sequence of shifts and additions. */
3454 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3456 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3457 less than or equal in size to `unsigned int' this doesn't matter.
3458 If the mode is larger than `unsigned int', then synth_mult works
3459 only if the constant value exactly fits in an `unsigned int' without
3460 any truncation. This means that multiplying by negative values does
3461 not work; results are off by 2^32 on a 32 bit machine. */
3462 if (CONST_INT_P (scalar_op1))
3464 coeff = INTVAL (scalar_op1);
3465 is_neg = coeff < 0;
3467 #if TARGET_SUPPORTS_WIDE_INT
3468 else if (CONST_WIDE_INT_P (scalar_op1))
3469 #else
3470 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3471 #endif
3473 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3474 /* Perfect power of 2 (other than 1, which is handled above). */
3475 if (shift > 0)
3476 return expand_shift (LSHIFT_EXPR, mode, op0,
3477 shift, target, unsignedp);
3478 else
3479 goto skip_synth;
3481 else
3482 goto skip_synth;
3484 /* We used to test optimize here, on the grounds that it's better to
3485 produce a smaller program when -O is not used. But this causes
3486 such a terrible slowdown sometimes that it seems better to always
3487 use synth_mult. */
3489 /* Special case powers of two. */
3490 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3491 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3492 return expand_shift (LSHIFT_EXPR, mode, op0,
3493 floor_log2 (coeff), target, unsignedp);
3495 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3497 /* Attempt to handle multiplication of DImode values by negative
3498 coefficients, by performing the multiplication by a positive
3499 multiplier and then inverting the result. */
3500 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3502 /* Its safe to use -coeff even for INT_MIN, as the
3503 result is interpreted as an unsigned coefficient.
3504 Exclude cost of op0 from max_cost to match the cost
3505 calculation of the synth_mult. */
3506 coeff = -(unsigned HOST_WIDE_INT) coeff;
3507 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3508 mode, speed)
3509 - neg_cost (speed, mode));
3510 if (max_cost <= 0)
3511 goto skip_synth;
3513 /* Special case powers of two. */
3514 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3516 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3517 floor_log2 (coeff), target, unsignedp);
3518 return expand_unop (mode, neg_optab, temp, target, 0);
3521 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3522 max_cost))
3524 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3525 &algorithm, variant);
3526 return expand_unop (mode, neg_optab, temp, target, 0);
3528 goto skip_synth;
3531 /* Exclude cost of op0 from max_cost to match the cost
3532 calculation of the synth_mult. */
3533 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3534 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3535 return expand_mult_const (mode, op0, coeff, target,
3536 &algorithm, variant);
3538 skip_synth:
3540 /* Expand x*2.0 as x+x. */
3541 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3542 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3544 op0 = force_reg (GET_MODE (op0), op0);
3545 return expand_binop (mode, add_optab, op0, op0,
3546 target, unsignedp,
3547 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3550 /* This used to use umul_optab if unsigned, but for non-widening multiply
3551 there is no difference between signed and unsigned. */
3552 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3553 op0, op1, target, unsignedp,
3554 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3555 gcc_assert (op0 || no_libcall);
3556 return op0;
3559 /* Return a cost estimate for multiplying a register by the given
3560 COEFFicient in the given MODE and SPEED. */
3563 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3565 int max_cost;
3566 struct algorithm algorithm;
3567 enum mult_variant variant;
3569 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3570 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3571 mode, speed);
3572 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3573 return algorithm.cost.cost;
3574 else
3575 return max_cost;
3578 /* Perform a widening multiplication and return an rtx for the result.
3579 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3580 TARGET is a suggestion for where to store the result (an rtx).
3581 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3582 or smul_widen_optab.
3584 We check specially for a constant integer as OP1, comparing the
3585 cost of a widening multiply against the cost of a sequence of shifts
3586 and adds. */
3589 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3590 int unsignedp, optab this_optab)
3592 bool speed = optimize_insn_for_speed_p ();
3593 rtx cop1;
3595 if (CONST_INT_P (op1)
3596 && GET_MODE (op0) != VOIDmode
3597 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3598 this_optab == umul_widen_optab))
3599 && CONST_INT_P (cop1)
3600 && (INTVAL (cop1) >= 0
3601 || HWI_COMPUTABLE_MODE_P (mode)))
3603 HOST_WIDE_INT coeff = INTVAL (cop1);
3604 int max_cost;
3605 enum mult_variant variant;
3606 struct algorithm algorithm;
3608 if (coeff == 0)
3609 return CONST0_RTX (mode);
3611 /* Special case powers of two. */
3612 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3614 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3615 return expand_shift (LSHIFT_EXPR, mode, op0,
3616 floor_log2 (coeff), target, unsignedp);
3619 /* Exclude cost of op0 from max_cost to match the cost
3620 calculation of the synth_mult. */
3621 max_cost = mul_widen_cost (speed, mode);
3622 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3623 max_cost))
3625 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3626 return expand_mult_const (mode, op0, coeff, target,
3627 &algorithm, variant);
3630 return expand_binop (mode, this_optab, op0, op1, target,
3631 unsignedp, OPTAB_LIB_WIDEN);
3634 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3635 replace division by D, and put the least significant N bits of the result
3636 in *MULTIPLIER_PTR and return the most significant bit.
3638 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3639 needed precision is in PRECISION (should be <= N).
3641 PRECISION should be as small as possible so this function can choose
3642 multiplier more freely.
3644 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3645 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3647 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3648 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3650 unsigned HOST_WIDE_INT
3651 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3652 unsigned HOST_WIDE_INT *multiplier_ptr,
3653 int *post_shift_ptr, int *lgup_ptr)
3655 int lgup, post_shift;
3656 int pow, pow2;
3658 /* lgup = ceil(log2(divisor)); */
3659 lgup = ceil_log2 (d);
3661 gcc_assert (lgup <= n);
3663 pow = n + lgup;
3664 pow2 = n + lgup - precision;
3666 /* mlow = 2^(N + lgup)/d */
3667 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3668 wide_int mlow = wi::udiv_trunc (val, d);
3670 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3671 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3672 wide_int mhigh = wi::udiv_trunc (val, d);
3674 /* If precision == N, then mlow, mhigh exceed 2^N
3675 (but they do not exceed 2^(N+1)). */
3677 /* Reduce to lowest terms. */
3678 for (post_shift = lgup; post_shift > 0; post_shift--)
3680 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3681 HOST_BITS_PER_WIDE_INT);
3682 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3683 HOST_BITS_PER_WIDE_INT);
3684 if (ml_lo >= mh_lo)
3685 break;
3687 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3688 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3691 *post_shift_ptr = post_shift;
3692 *lgup_ptr = lgup;
3693 if (n < HOST_BITS_PER_WIDE_INT)
3695 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3696 *multiplier_ptr = mhigh.to_uhwi () & mask;
3697 return mhigh.to_uhwi () > mask;
3699 else
3701 *multiplier_ptr = mhigh.to_uhwi ();
3702 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3706 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3707 congruent to 1 (mod 2**N). */
3709 static unsigned HOST_WIDE_INT
3710 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3712 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3714 /* The algorithm notes that the choice y = x satisfies
3715 x*y == 1 mod 2^3, since x is assumed odd.
3716 Each iteration doubles the number of bits of significance in y. */
3718 unsigned HOST_WIDE_INT mask;
3719 unsigned HOST_WIDE_INT y = x;
3720 int nbit = 3;
3722 mask = (n == HOST_BITS_PER_WIDE_INT
3723 ? HOST_WIDE_INT_M1U
3724 : (HOST_WIDE_INT_1U << n) - 1);
3726 while (nbit < n)
3728 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3729 nbit *= 2;
3731 return y;
3734 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3735 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3736 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3737 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3738 become signed.
3740 The result is put in TARGET if that is convenient.
3742 MODE is the mode of operation. */
3745 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3746 rtx op1, rtx target, int unsignedp)
3748 rtx tem;
3749 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3751 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3752 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3753 tem = expand_and (mode, tem, op1, NULL_RTX);
3754 adj_operand
3755 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3756 adj_operand);
3758 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3759 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3760 tem = expand_and (mode, tem, op0, NULL_RTX);
3761 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3762 target);
3764 return target;
3767 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3769 static rtx
3770 extract_high_half (scalar_int_mode mode, rtx op)
3772 if (mode == word_mode)
3773 return gen_highpart (mode, op);
3775 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3777 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3778 GET_MODE_BITSIZE (mode), 0, 1);
3779 return convert_modes (mode, wider_mode, op, 0);
3782 /* Like expmed_mult_highpart, but only consider using a multiplication
3783 optab. OP1 is an rtx for the constant operand. */
3785 static rtx
3786 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3787 rtx target, int unsignedp, int max_cost)
3789 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3790 optab moptab;
3791 rtx tem;
3792 int size;
3793 bool speed = optimize_insn_for_speed_p ();
3795 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3797 size = GET_MODE_BITSIZE (mode);
3799 /* Firstly, try using a multiplication insn that only generates the needed
3800 high part of the product, and in the sign flavor of unsignedp. */
3801 if (mul_highpart_cost (speed, mode) < max_cost)
3803 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3804 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3805 unsignedp, OPTAB_DIRECT);
3806 if (tem)
3807 return tem;
3810 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3811 Need to adjust the result after the multiplication. */
3812 if (size - 1 < BITS_PER_WORD
3813 && (mul_highpart_cost (speed, mode)
3814 + 2 * shift_cost (speed, mode, size-1)
3815 + 4 * add_cost (speed, mode) < max_cost))
3817 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3818 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3819 unsignedp, OPTAB_DIRECT);
3820 if (tem)
3821 /* We used the wrong signedness. Adjust the result. */
3822 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3823 tem, unsignedp);
3826 /* Try widening multiplication. */
3827 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3828 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3829 && mul_widen_cost (speed, wider_mode) < max_cost)
3831 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3832 unsignedp, OPTAB_WIDEN);
3833 if (tem)
3834 return extract_high_half (mode, tem);
3837 /* Try widening the mode and perform a non-widening multiplication. */
3838 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3839 && size - 1 < BITS_PER_WORD
3840 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3841 < max_cost))
3843 rtx_insn *insns;
3844 rtx wop0, wop1;
3846 /* We need to widen the operands, for example to ensure the
3847 constant multiplier is correctly sign or zero extended.
3848 Use a sequence to clean-up any instructions emitted by
3849 the conversions if things don't work out. */
3850 start_sequence ();
3851 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3852 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3853 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3854 unsignedp, OPTAB_WIDEN);
3855 insns = get_insns ();
3856 end_sequence ();
3858 if (tem)
3860 emit_insn (insns);
3861 return extract_high_half (mode, tem);
3865 /* Try widening multiplication of opposite signedness, and adjust. */
3866 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3867 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3868 && size - 1 < BITS_PER_WORD
3869 && (mul_widen_cost (speed, wider_mode)
3870 + 2 * shift_cost (speed, mode, size-1)
3871 + 4 * add_cost (speed, mode) < max_cost))
3873 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3874 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3875 if (tem != 0)
3877 tem = extract_high_half (mode, tem);
3878 /* We used the wrong signedness. Adjust the result. */
3879 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3880 target, unsignedp);
3884 return 0;
3887 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3888 putting the high half of the result in TARGET if that is convenient,
3889 and return where the result is. If the operation cannot be performed,
3890 0 is returned.
3892 MODE is the mode of operation and result.
3894 UNSIGNEDP nonzero means unsigned multiply.
3896 MAX_COST is the total allowed cost for the expanded RTL. */
3898 static rtx
3899 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3900 rtx target, int unsignedp, int max_cost)
3902 unsigned HOST_WIDE_INT cnst1;
3903 int extra_cost;
3904 bool sign_adjust = false;
3905 enum mult_variant variant;
3906 struct algorithm alg;
3907 rtx tem;
3908 bool speed = optimize_insn_for_speed_p ();
3910 /* We can't support modes wider than HOST_BITS_PER_INT. */
3911 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3913 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3915 /* We can't optimize modes wider than BITS_PER_WORD.
3916 ??? We might be able to perform double-word arithmetic if
3917 mode == word_mode, however all the cost calculations in
3918 synth_mult etc. assume single-word operations. */
3919 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3920 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3921 return expmed_mult_highpart_optab (mode, op0, op1, target,
3922 unsignedp, max_cost);
3924 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3926 /* Check whether we try to multiply by a negative constant. */
3927 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3929 sign_adjust = true;
3930 extra_cost += add_cost (speed, mode);
3933 /* See whether shift/add multiplication is cheap enough. */
3934 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3935 max_cost - extra_cost))
3937 /* See whether the specialized multiplication optabs are
3938 cheaper than the shift/add version. */
3939 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3940 alg.cost.cost + extra_cost);
3941 if (tem)
3942 return tem;
3944 tem = convert_to_mode (wider_mode, op0, unsignedp);
3945 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3946 tem = extract_high_half (mode, tem);
3948 /* Adjust result for signedness. */
3949 if (sign_adjust)
3950 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3952 return tem;
3954 return expmed_mult_highpart_optab (mode, op0, op1, target,
3955 unsignedp, max_cost);
3959 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3961 static rtx
3962 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3964 rtx result, temp, shift;
3965 rtx_code_label *label;
3966 int logd;
3967 int prec = GET_MODE_PRECISION (mode);
3969 logd = floor_log2 (d);
3970 result = gen_reg_rtx (mode);
3972 /* Avoid conditional branches when they're expensive. */
3973 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3974 && optimize_insn_for_speed_p ())
3976 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3977 mode, 0, -1);
3978 if (signmask)
3980 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3981 signmask = force_reg (mode, signmask);
3982 shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
3984 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3985 which instruction sequence to use. If logical right shifts
3986 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3987 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3989 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3990 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3991 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3992 > COSTS_N_INSNS (2)))
3994 temp = expand_binop (mode, xor_optab, op0, signmask,
3995 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3996 temp = expand_binop (mode, sub_optab, temp, signmask,
3997 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3998 temp = expand_binop (mode, and_optab, temp,
3999 gen_int_mode (masklow, mode),
4000 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4001 temp = expand_binop (mode, xor_optab, temp, signmask,
4002 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4003 temp = expand_binop (mode, sub_optab, temp, signmask,
4004 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4006 else
4008 signmask = expand_binop (mode, lshr_optab, signmask, shift,
4009 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4010 signmask = force_reg (mode, signmask);
4012 temp = expand_binop (mode, add_optab, op0, signmask,
4013 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4014 temp = expand_binop (mode, and_optab, temp,
4015 gen_int_mode (masklow, mode),
4016 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4017 temp = expand_binop (mode, sub_optab, temp, signmask,
4018 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4020 return temp;
4024 /* Mask contains the mode's signbit and the significant bits of the
4025 modulus. By including the signbit in the operation, many targets
4026 can avoid an explicit compare operation in the following comparison
4027 against zero. */
4028 wide_int mask = wi::mask (logd, false, prec);
4029 mask = wi::set_bit (mask, prec - 1);
4031 temp = expand_binop (mode, and_optab, op0,
4032 immed_wide_int_const (mask, mode),
4033 result, 1, OPTAB_LIB_WIDEN);
4034 if (temp != result)
4035 emit_move_insn (result, temp);
4037 label = gen_label_rtx ();
4038 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4040 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4041 0, OPTAB_LIB_WIDEN);
4043 mask = wi::mask (logd, true, prec);
4044 temp = expand_binop (mode, ior_optab, temp,
4045 immed_wide_int_const (mask, mode),
4046 result, 1, OPTAB_LIB_WIDEN);
4047 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4048 0, OPTAB_LIB_WIDEN);
4049 if (temp != result)
4050 emit_move_insn (result, temp);
4051 emit_label (label);
4052 return result;
4055 /* Expand signed division of OP0 by a power of two D in mode MODE.
4056 This routine is only called for positive values of D. */
4058 static rtx
4059 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4061 rtx temp;
4062 rtx_code_label *label;
4063 int logd;
4065 logd = floor_log2 (d);
4067 if (d == 2
4068 && BRANCH_COST (optimize_insn_for_speed_p (),
4069 false) >= 1)
4071 temp = gen_reg_rtx (mode);
4072 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4073 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4074 0, OPTAB_LIB_WIDEN);
4075 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4078 if (HAVE_conditional_move
4079 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4081 rtx temp2;
4083 start_sequence ();
4084 temp2 = copy_to_mode_reg (mode, op0);
4085 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4086 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4087 temp = force_reg (mode, temp);
4089 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
4090 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4091 mode, temp, temp2, mode, 0);
4092 if (temp2)
4094 rtx_insn *seq = get_insns ();
4095 end_sequence ();
4096 emit_insn (seq);
4097 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4099 end_sequence ();
4102 if (BRANCH_COST (optimize_insn_for_speed_p (),
4103 false) >= 2)
4105 int ushift = GET_MODE_BITSIZE (mode) - logd;
4107 temp = gen_reg_rtx (mode);
4108 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4109 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4110 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4111 > COSTS_N_INSNS (1))
4112 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4113 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4114 else
4115 temp = expand_shift (RSHIFT_EXPR, mode, temp,
4116 ushift, NULL_RTX, 1);
4117 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4118 0, OPTAB_LIB_WIDEN);
4119 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4122 label = gen_label_rtx ();
4123 temp = copy_to_mode_reg (mode, op0);
4124 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4125 expand_inc (temp, gen_int_mode (d - 1, mode));
4126 emit_label (label);
4127 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4130 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4131 if that is convenient, and returning where the result is.
4132 You may request either the quotient or the remainder as the result;
4133 specify REM_FLAG nonzero to get the remainder.
4135 CODE is the expression code for which kind of division this is;
4136 it controls how rounding is done. MODE is the machine mode to use.
4137 UNSIGNEDP nonzero means do unsigned division. */
4139 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4140 and then correct it by or'ing in missing high bits
4141 if result of ANDI is nonzero.
4142 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4143 This could optimize to a bfexts instruction.
4144 But C doesn't use these operations, so their optimizations are
4145 left for later. */
4146 /* ??? For modulo, we don't actually need the highpart of the first product,
4147 the low part will do nicely. And for small divisors, the second multiply
4148 can also be a low-part only multiply or even be completely left out.
4149 E.g. to calculate the remainder of a division by 3 with a 32 bit
4150 multiply, multiply with 0x55555556 and extract the upper two bits;
4151 the result is exact for inputs up to 0x1fffffff.
4152 The input range can be reduced by using cross-sum rules.
4153 For odd divisors >= 3, the following table gives right shift counts
4154 so that if a number is shifted by an integer multiple of the given
4155 amount, the remainder stays the same:
4156 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4157 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4158 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4159 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4160 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4162 Cross-sum rules for even numbers can be derived by leaving as many bits
4163 to the right alone as the divisor has zeros to the right.
4164 E.g. if x is an unsigned 32 bit number:
4165 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4169 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4170 rtx op0, rtx op1, rtx target, int unsignedp)
4172 machine_mode compute_mode;
4173 rtx tquotient;
4174 rtx quotient = 0, remainder = 0;
4175 rtx_insn *last;
4176 rtx_insn *insn;
4177 optab optab1, optab2;
4178 int op1_is_constant, op1_is_pow2 = 0;
4179 int max_cost, extra_cost;
4180 static HOST_WIDE_INT last_div_const = 0;
4181 bool speed = optimize_insn_for_speed_p ();
4183 op1_is_constant = CONST_INT_P (op1);
4184 if (op1_is_constant)
4186 wide_int ext_op1 = rtx_mode_t (op1, mode);
4187 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4188 || (! unsignedp
4189 && wi::popcount (wi::neg (ext_op1)) == 1));
4193 This is the structure of expand_divmod:
4195 First comes code to fix up the operands so we can perform the operations
4196 correctly and efficiently.
4198 Second comes a switch statement with code specific for each rounding mode.
4199 For some special operands this code emits all RTL for the desired
4200 operation, for other cases, it generates only a quotient and stores it in
4201 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4202 to indicate that it has not done anything.
4204 Last comes code that finishes the operation. If QUOTIENT is set and
4205 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4206 QUOTIENT is not set, it is computed using trunc rounding.
4208 We try to generate special code for division and remainder when OP1 is a
4209 constant. If |OP1| = 2**n we can use shifts and some other fast
4210 operations. For other values of OP1, we compute a carefully selected
4211 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4212 by m.
4214 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4215 half of the product. Different strategies for generating the product are
4216 implemented in expmed_mult_highpart.
4218 If what we actually want is the remainder, we generate that by another
4219 by-constant multiplication and a subtraction. */
4221 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4222 code below will malfunction if we are, so check here and handle
4223 the special case if so. */
4224 if (op1 == const1_rtx)
4225 return rem_flag ? const0_rtx : op0;
4227 /* When dividing by -1, we could get an overflow.
4228 negv_optab can handle overflows. */
4229 if (! unsignedp && op1 == constm1_rtx)
4231 if (rem_flag)
4232 return const0_rtx;
4233 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4234 ? negv_optab : neg_optab, op0, target, 0);
4237 if (target
4238 /* Don't use the function value register as a target
4239 since we have to read it as well as write it,
4240 and function-inlining gets confused by this. */
4241 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4242 /* Don't clobber an operand while doing a multi-step calculation. */
4243 || ((rem_flag || op1_is_constant)
4244 && (reg_mentioned_p (target, op0)
4245 || (MEM_P (op0) && MEM_P (target))))
4246 || reg_mentioned_p (target, op1)
4247 || (MEM_P (op1) && MEM_P (target))))
4248 target = 0;
4250 /* Get the mode in which to perform this computation. Normally it will
4251 be MODE, but sometimes we can't do the desired operation in MODE.
4252 If so, pick a wider mode in which we can do the operation. Convert
4253 to that mode at the start to avoid repeated conversions.
4255 First see what operations we need. These depend on the expression
4256 we are evaluating. (We assume that divxx3 insns exist under the
4257 same conditions that modxx3 insns and that these insns don't normally
4258 fail. If these assumptions are not correct, we may generate less
4259 efficient code in some cases.)
4261 Then see if we find a mode in which we can open-code that operation
4262 (either a division, modulus, or shift). Finally, check for the smallest
4263 mode for which we can do the operation with a library call. */
4265 /* We might want to refine this now that we have division-by-constant
4266 optimization. Since expmed_mult_highpart tries so many variants, it is
4267 not straightforward to generalize this. Maybe we should make an array
4268 of possible modes in init_expmed? Save this for GCC 2.7. */
4270 optab1 = (op1_is_pow2
4271 ? (unsignedp ? lshr_optab : ashr_optab)
4272 : (unsignedp ? udiv_optab : sdiv_optab));
4273 optab2 = (op1_is_pow2 ? optab1
4274 : (unsignedp ? udivmod_optab : sdivmod_optab));
4276 FOR_EACH_MODE_FROM (compute_mode, mode)
4277 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4278 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4279 break;
4281 if (compute_mode == VOIDmode)
4282 FOR_EACH_MODE_FROM (compute_mode, mode)
4283 if (optab_libfunc (optab1, compute_mode)
4284 || optab_libfunc (optab2, compute_mode))
4285 break;
4287 /* If we still couldn't find a mode, use MODE, but expand_binop will
4288 probably die. */
4289 if (compute_mode == VOIDmode)
4290 compute_mode = mode;
4292 if (target && GET_MODE (target) == compute_mode)
4293 tquotient = target;
4294 else
4295 tquotient = gen_reg_rtx (compute_mode);
4297 #if 0
4298 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4299 (mode), and thereby get better code when OP1 is a constant. Do that
4300 later. It will require going over all usages of SIZE below. */
4301 size = GET_MODE_BITSIZE (mode);
4302 #endif
4304 /* Only deduct something for a REM if the last divide done was
4305 for a different constant. Then set the constant of the last
4306 divide. */
4307 max_cost = (unsignedp
4308 ? udiv_cost (speed, compute_mode)
4309 : sdiv_cost (speed, compute_mode));
4310 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4311 && INTVAL (op1) == last_div_const))
4312 max_cost -= (mul_cost (speed, compute_mode)
4313 + add_cost (speed, compute_mode));
4315 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4317 /* Now convert to the best mode to use. */
4318 if (compute_mode != mode)
4320 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4321 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4323 /* convert_modes may have placed op1 into a register, so we
4324 must recompute the following. */
4325 op1_is_constant = CONST_INT_P (op1);
4326 if (op1_is_constant)
4328 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4329 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4330 || (! unsignedp
4331 && wi::popcount (wi::neg (ext_op1)) == 1));
4333 else
4334 op1_is_pow2 = 0;
4337 /* If one of the operands is a volatile MEM, copy it into a register. */
4339 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4340 op0 = force_reg (compute_mode, op0);
4341 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4342 op1 = force_reg (compute_mode, op1);
4344 /* If we need the remainder or if OP1 is constant, we need to
4345 put OP0 in a register in case it has any queued subexpressions. */
4346 if (rem_flag || op1_is_constant)
4347 op0 = force_reg (compute_mode, op0);
4349 last = get_last_insn ();
4351 /* Promote floor rounding to trunc rounding for unsigned operations. */
4352 if (unsignedp)
4354 if (code == FLOOR_DIV_EXPR)
4355 code = TRUNC_DIV_EXPR;
4356 if (code == FLOOR_MOD_EXPR)
4357 code = TRUNC_MOD_EXPR;
4358 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4359 code = TRUNC_DIV_EXPR;
4362 if (op1 != const0_rtx)
4363 switch (code)
4365 case TRUNC_MOD_EXPR:
4366 case TRUNC_DIV_EXPR:
4367 if (op1_is_constant)
4369 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4370 int size = GET_MODE_BITSIZE (int_mode);
4371 if (unsignedp)
4373 unsigned HOST_WIDE_INT mh, ml;
4374 int pre_shift, post_shift;
4375 int dummy;
4376 wide_int wd = rtx_mode_t (op1, int_mode);
4377 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4379 if (wi::popcount (wd) == 1)
4381 pre_shift = floor_log2 (d);
4382 if (rem_flag)
4384 unsigned HOST_WIDE_INT mask
4385 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4386 remainder
4387 = expand_binop (int_mode, and_optab, op0,
4388 gen_int_mode (mask, int_mode),
4389 remainder, 1,
4390 OPTAB_LIB_WIDEN);
4391 if (remainder)
4392 return gen_lowpart (mode, remainder);
4394 quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4395 pre_shift, tquotient, 1);
4397 else if (size <= HOST_BITS_PER_WIDE_INT)
4399 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4401 /* Most significant bit of divisor is set; emit an scc
4402 insn. */
4403 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4404 int_mode, 1, 1);
4406 else
4408 /* Find a suitable multiplier and right shift count
4409 instead of multiplying with D. */
4411 mh = choose_multiplier (d, size, size,
4412 &ml, &post_shift, &dummy);
4414 /* If the suggested multiplier is more than SIZE bits,
4415 we can do better for even divisors, using an
4416 initial right shift. */
4417 if (mh != 0 && (d & 1) == 0)
4419 pre_shift = ctz_or_zero (d);
4420 mh = choose_multiplier (d >> pre_shift, size,
4421 size - pre_shift,
4422 &ml, &post_shift, &dummy);
4423 gcc_assert (!mh);
4425 else
4426 pre_shift = 0;
4428 if (mh != 0)
4430 rtx t1, t2, t3, t4;
4432 if (post_shift - 1 >= BITS_PER_WORD)
4433 goto fail1;
4435 extra_cost
4436 = (shift_cost (speed, int_mode, post_shift - 1)
4437 + shift_cost (speed, int_mode, 1)
4438 + 2 * add_cost (speed, int_mode));
4439 t1 = expmed_mult_highpart
4440 (int_mode, op0, gen_int_mode (ml, int_mode),
4441 NULL_RTX, 1, max_cost - extra_cost);
4442 if (t1 == 0)
4443 goto fail1;
4444 t2 = force_operand (gen_rtx_MINUS (int_mode,
4445 op0, t1),
4446 NULL_RTX);
4447 t3 = expand_shift (RSHIFT_EXPR, int_mode,
4448 t2, 1, NULL_RTX, 1);
4449 t4 = force_operand (gen_rtx_PLUS (int_mode,
4450 t1, t3),
4451 NULL_RTX);
4452 quotient = expand_shift
4453 (RSHIFT_EXPR, int_mode, t4,
4454 post_shift - 1, tquotient, 1);
4456 else
4458 rtx t1, t2;
4460 if (pre_shift >= BITS_PER_WORD
4461 || post_shift >= BITS_PER_WORD)
4462 goto fail1;
4464 t1 = expand_shift
4465 (RSHIFT_EXPR, int_mode, op0,
4466 pre_shift, NULL_RTX, 1);
4467 extra_cost
4468 = (shift_cost (speed, int_mode, pre_shift)
4469 + shift_cost (speed, int_mode, post_shift));
4470 t2 = expmed_mult_highpart
4471 (int_mode, t1,
4472 gen_int_mode (ml, int_mode),
4473 NULL_RTX, 1, max_cost - extra_cost);
4474 if (t2 == 0)
4475 goto fail1;
4476 quotient = expand_shift
4477 (RSHIFT_EXPR, int_mode, t2,
4478 post_shift, tquotient, 1);
4482 else /* Too wide mode to use tricky code */
4483 break;
4485 insn = get_last_insn ();
4486 if (insn != last)
4487 set_dst_reg_note (insn, REG_EQUAL,
4488 gen_rtx_UDIV (int_mode, op0, op1),
4489 quotient);
4491 else /* TRUNC_DIV, signed */
4493 unsigned HOST_WIDE_INT ml;
4494 int lgup, post_shift;
4495 rtx mlr;
4496 HOST_WIDE_INT d = INTVAL (op1);
4497 unsigned HOST_WIDE_INT abs_d;
4499 /* Not prepared to handle division/remainder by
4500 0xffffffffffffffff8000000000000000 etc. */
4501 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4502 break;
4504 /* Since d might be INT_MIN, we have to cast to
4505 unsigned HOST_WIDE_INT before negating to avoid
4506 undefined signed overflow. */
4507 abs_d = (d >= 0
4508 ? (unsigned HOST_WIDE_INT) d
4509 : - (unsigned HOST_WIDE_INT) d);
4511 /* n rem d = n rem -d */
4512 if (rem_flag && d < 0)
4514 d = abs_d;
4515 op1 = gen_int_mode (abs_d, int_mode);
4518 if (d == 1)
4519 quotient = op0;
4520 else if (d == -1)
4521 quotient = expand_unop (int_mode, neg_optab, op0,
4522 tquotient, 0);
4523 else if (size <= HOST_BITS_PER_WIDE_INT
4524 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4526 /* This case is not handled correctly below. */
4527 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4528 int_mode, 1, 1);
4529 if (quotient == 0)
4530 goto fail1;
4532 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4533 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4534 && (rem_flag
4535 ? smod_pow2_cheap (speed, int_mode)
4536 : sdiv_pow2_cheap (speed, int_mode))
4537 /* We assume that cheap metric is true if the
4538 optab has an expander for this mode. */
4539 && ((optab_handler ((rem_flag ? smod_optab
4540 : sdiv_optab),
4541 int_mode)
4542 != CODE_FOR_nothing)
4543 || (optab_handler (sdivmod_optab, int_mode)
4544 != CODE_FOR_nothing)))
4546 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4548 if (rem_flag)
4550 remainder = expand_smod_pow2 (int_mode, op0, d);
4551 if (remainder)
4552 return gen_lowpart (mode, remainder);
4555 if (sdiv_pow2_cheap (speed, int_mode)
4556 && ((optab_handler (sdiv_optab, int_mode)
4557 != CODE_FOR_nothing)
4558 || (optab_handler (sdivmod_optab, int_mode)
4559 != CODE_FOR_nothing)))
4560 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4561 int_mode, op0,
4562 gen_int_mode (abs_d,
4563 int_mode),
4564 NULL_RTX, 0);
4565 else
4566 quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4568 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4569 negate the quotient. */
4570 if (d < 0)
4572 insn = get_last_insn ();
4573 if (insn != last
4574 && abs_d < (HOST_WIDE_INT_1U
4575 << (HOST_BITS_PER_WIDE_INT - 1)))
4576 set_dst_reg_note (insn, REG_EQUAL,
4577 gen_rtx_DIV (int_mode, op0,
4578 gen_int_mode
4579 (abs_d,
4580 int_mode)),
4581 quotient);
4583 quotient = expand_unop (int_mode, neg_optab,
4584 quotient, quotient, 0);
4587 else if (size <= HOST_BITS_PER_WIDE_INT)
4589 choose_multiplier (abs_d, size, size - 1,
4590 &ml, &post_shift, &lgup);
4591 if (ml < HOST_WIDE_INT_1U << (size - 1))
4593 rtx t1, t2, t3;
4595 if (post_shift >= BITS_PER_WORD
4596 || size - 1 >= BITS_PER_WORD)
4597 goto fail1;
4599 extra_cost = (shift_cost (speed, int_mode, post_shift)
4600 + shift_cost (speed, int_mode, size - 1)
4601 + add_cost (speed, int_mode));
4602 t1 = expmed_mult_highpart
4603 (int_mode, op0, gen_int_mode (ml, int_mode),
4604 NULL_RTX, 0, max_cost - extra_cost);
4605 if (t1 == 0)
4606 goto fail1;
4607 t2 = expand_shift
4608 (RSHIFT_EXPR, int_mode, t1,
4609 post_shift, NULL_RTX, 0);
4610 t3 = expand_shift
4611 (RSHIFT_EXPR, int_mode, op0,
4612 size - 1, NULL_RTX, 0);
4613 if (d < 0)
4614 quotient
4615 = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4616 tquotient);
4617 else
4618 quotient
4619 = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4620 tquotient);
4622 else
4624 rtx t1, t2, t3, t4;
4626 if (post_shift >= BITS_PER_WORD
4627 || size - 1 >= BITS_PER_WORD)
4628 goto fail1;
4630 ml |= HOST_WIDE_INT_M1U << (size - 1);
4631 mlr = gen_int_mode (ml, int_mode);
4632 extra_cost = (shift_cost (speed, int_mode, post_shift)
4633 + shift_cost (speed, int_mode, size - 1)
4634 + 2 * add_cost (speed, int_mode));
4635 t1 = expmed_mult_highpart (int_mode, op0, mlr,
4636 NULL_RTX, 0,
4637 max_cost - extra_cost);
4638 if (t1 == 0)
4639 goto fail1;
4640 t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4641 NULL_RTX);
4642 t3 = expand_shift
4643 (RSHIFT_EXPR, int_mode, t2,
4644 post_shift, NULL_RTX, 0);
4645 t4 = expand_shift
4646 (RSHIFT_EXPR, int_mode, op0,
4647 size - 1, NULL_RTX, 0);
4648 if (d < 0)
4649 quotient
4650 = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4651 tquotient);
4652 else
4653 quotient
4654 = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4655 tquotient);
4658 else /* Too wide mode to use tricky code */
4659 break;
4661 insn = get_last_insn ();
4662 if (insn != last)
4663 set_dst_reg_note (insn, REG_EQUAL,
4664 gen_rtx_DIV (int_mode, op0, op1),
4665 quotient);
4667 break;
4669 fail1:
4670 delete_insns_since (last);
4671 break;
4673 case FLOOR_DIV_EXPR:
4674 case FLOOR_MOD_EXPR:
4675 /* We will come here only for signed operations. */
4676 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4678 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4679 int size = GET_MODE_BITSIZE (int_mode);
4680 unsigned HOST_WIDE_INT mh, ml;
4681 int pre_shift, lgup, post_shift;
4682 HOST_WIDE_INT d = INTVAL (op1);
4684 if (d > 0)
4686 /* We could just as easily deal with negative constants here,
4687 but it does not seem worth the trouble for GCC 2.6. */
4688 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4690 pre_shift = floor_log2 (d);
4691 if (rem_flag)
4693 unsigned HOST_WIDE_INT mask
4694 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4695 remainder = expand_binop
4696 (int_mode, and_optab, op0,
4697 gen_int_mode (mask, int_mode),
4698 remainder, 0, OPTAB_LIB_WIDEN);
4699 if (remainder)
4700 return gen_lowpart (mode, remainder);
4702 quotient = expand_shift
4703 (RSHIFT_EXPR, int_mode, op0,
4704 pre_shift, tquotient, 0);
4706 else
4708 rtx t1, t2, t3, t4;
4710 mh = choose_multiplier (d, size, size - 1,
4711 &ml, &post_shift, &lgup);
4712 gcc_assert (!mh);
4714 if (post_shift < BITS_PER_WORD
4715 && size - 1 < BITS_PER_WORD)
4717 t1 = expand_shift
4718 (RSHIFT_EXPR, int_mode, op0,
4719 size - 1, NULL_RTX, 0);
4720 t2 = expand_binop (int_mode, xor_optab, op0, t1,
4721 NULL_RTX, 0, OPTAB_WIDEN);
4722 extra_cost = (shift_cost (speed, int_mode, post_shift)
4723 + shift_cost (speed, int_mode, size - 1)
4724 + 2 * add_cost (speed, int_mode));
4725 t3 = expmed_mult_highpart
4726 (int_mode, t2, gen_int_mode (ml, int_mode),
4727 NULL_RTX, 1, max_cost - extra_cost);
4728 if (t3 != 0)
4730 t4 = expand_shift
4731 (RSHIFT_EXPR, int_mode, t3,
4732 post_shift, NULL_RTX, 1);
4733 quotient = expand_binop (int_mode, xor_optab,
4734 t4, t1, tquotient, 0,
4735 OPTAB_WIDEN);
4740 else
4742 rtx nsign, t1, t2, t3, t4;
4743 t1 = force_operand (gen_rtx_PLUS (int_mode,
4744 op0, constm1_rtx), NULL_RTX);
4745 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4746 0, OPTAB_WIDEN);
4747 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4748 size - 1, NULL_RTX, 0);
4749 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4750 NULL_RTX);
4751 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4752 NULL_RTX, 0);
4753 if (t4)
4755 rtx t5;
4756 t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4757 NULL_RTX, 0);
4758 quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4759 tquotient);
4764 if (quotient != 0)
4765 break;
4766 delete_insns_since (last);
4768 /* Try using an instruction that produces both the quotient and
4769 remainder, using truncation. We can easily compensate the quotient
4770 or remainder to get floor rounding, once we have the remainder.
4771 Notice that we compute also the final remainder value here,
4772 and return the result right away. */
4773 if (target == 0 || GET_MODE (target) != compute_mode)
4774 target = gen_reg_rtx (compute_mode);
4776 if (rem_flag)
4778 remainder
4779 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4780 quotient = gen_reg_rtx (compute_mode);
4782 else
4784 quotient
4785 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4786 remainder = gen_reg_rtx (compute_mode);
4789 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4790 quotient, remainder, 0))
4792 /* This could be computed with a branch-less sequence.
4793 Save that for later. */
4794 rtx tem;
4795 rtx_code_label *label = gen_label_rtx ();
4796 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4797 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4798 NULL_RTX, 0, OPTAB_WIDEN);
4799 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4800 expand_dec (quotient, const1_rtx);
4801 expand_inc (remainder, op1);
4802 emit_label (label);
4803 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4806 /* No luck with division elimination or divmod. Have to do it
4807 by conditionally adjusting op0 *and* the result. */
4809 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4810 rtx adjusted_op0;
4811 rtx tem;
4813 quotient = gen_reg_rtx (compute_mode);
4814 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4815 label1 = gen_label_rtx ();
4816 label2 = gen_label_rtx ();
4817 label3 = gen_label_rtx ();
4818 label4 = gen_label_rtx ();
4819 label5 = gen_label_rtx ();
4820 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4821 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4822 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4823 quotient, 0, OPTAB_LIB_WIDEN);
4824 if (tem != quotient)
4825 emit_move_insn (quotient, tem);
4826 emit_jump_insn (targetm.gen_jump (label5));
4827 emit_barrier ();
4828 emit_label (label1);
4829 expand_inc (adjusted_op0, const1_rtx);
4830 emit_jump_insn (targetm.gen_jump (label4));
4831 emit_barrier ();
4832 emit_label (label2);
4833 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4834 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4835 quotient, 0, OPTAB_LIB_WIDEN);
4836 if (tem != quotient)
4837 emit_move_insn (quotient, tem);
4838 emit_jump_insn (targetm.gen_jump (label5));
4839 emit_barrier ();
4840 emit_label (label3);
4841 expand_dec (adjusted_op0, const1_rtx);
4842 emit_label (label4);
4843 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4844 quotient, 0, OPTAB_LIB_WIDEN);
4845 if (tem != quotient)
4846 emit_move_insn (quotient, tem);
4847 expand_dec (quotient, const1_rtx);
4848 emit_label (label5);
4850 break;
4852 case CEIL_DIV_EXPR:
4853 case CEIL_MOD_EXPR:
4854 if (unsignedp)
4856 if (op1_is_constant
4857 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4858 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4859 || INTVAL (op1) >= 0))
4861 scalar_int_mode int_mode
4862 = as_a <scalar_int_mode> (compute_mode);
4863 rtx t1, t2, t3;
4864 unsigned HOST_WIDE_INT d = INTVAL (op1);
4865 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4866 floor_log2 (d), tquotient, 1);
4867 t2 = expand_binop (int_mode, and_optab, op0,
4868 gen_int_mode (d - 1, int_mode),
4869 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4870 t3 = gen_reg_rtx (int_mode);
4871 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4872 if (t3 == 0)
4874 rtx_code_label *lab;
4875 lab = gen_label_rtx ();
4876 do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4877 expand_inc (t1, const1_rtx);
4878 emit_label (lab);
4879 quotient = t1;
4881 else
4882 quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4883 tquotient);
4884 break;
4887 /* Try using an instruction that produces both the quotient and
4888 remainder, using truncation. We can easily compensate the
4889 quotient or remainder to get ceiling rounding, once we have the
4890 remainder. Notice that we compute also the final remainder
4891 value here, and return the result right away. */
4892 if (target == 0 || GET_MODE (target) != compute_mode)
4893 target = gen_reg_rtx (compute_mode);
4895 if (rem_flag)
4897 remainder = (REG_P (target)
4898 ? target : gen_reg_rtx (compute_mode));
4899 quotient = gen_reg_rtx (compute_mode);
4901 else
4903 quotient = (REG_P (target)
4904 ? target : gen_reg_rtx (compute_mode));
4905 remainder = gen_reg_rtx (compute_mode);
4908 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4909 remainder, 1))
4911 /* This could be computed with a branch-less sequence.
4912 Save that for later. */
4913 rtx_code_label *label = gen_label_rtx ();
4914 do_cmp_and_jump (remainder, const0_rtx, EQ,
4915 compute_mode, label);
4916 expand_inc (quotient, const1_rtx);
4917 expand_dec (remainder, op1);
4918 emit_label (label);
4919 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4922 /* No luck with division elimination or divmod. Have to do it
4923 by conditionally adjusting op0 *and* the result. */
4925 rtx_code_label *label1, *label2;
4926 rtx adjusted_op0, tem;
4928 quotient = gen_reg_rtx (compute_mode);
4929 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4930 label1 = gen_label_rtx ();
4931 label2 = gen_label_rtx ();
4932 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4933 compute_mode, label1);
4934 emit_move_insn (quotient, const0_rtx);
4935 emit_jump_insn (targetm.gen_jump (label2));
4936 emit_barrier ();
4937 emit_label (label1);
4938 expand_dec (adjusted_op0, const1_rtx);
4939 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4940 quotient, 1, OPTAB_LIB_WIDEN);
4941 if (tem != quotient)
4942 emit_move_insn (quotient, tem);
4943 expand_inc (quotient, const1_rtx);
4944 emit_label (label2);
4947 else /* signed */
4949 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4950 && INTVAL (op1) >= 0)
4952 /* This is extremely similar to the code for the unsigned case
4953 above. For 2.7 we should merge these variants, but for
4954 2.6.1 I don't want to touch the code for unsigned since that
4955 get used in C. The signed case will only be used by other
4956 languages (Ada). */
4958 rtx t1, t2, t3;
4959 unsigned HOST_WIDE_INT d = INTVAL (op1);
4960 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4961 floor_log2 (d), tquotient, 0);
4962 t2 = expand_binop (compute_mode, and_optab, op0,
4963 gen_int_mode (d - 1, compute_mode),
4964 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4965 t3 = gen_reg_rtx (compute_mode);
4966 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4967 compute_mode, 1, 1);
4968 if (t3 == 0)
4970 rtx_code_label *lab;
4971 lab = gen_label_rtx ();
4972 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4973 expand_inc (t1, const1_rtx);
4974 emit_label (lab);
4975 quotient = t1;
4977 else
4978 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4979 t1, t3),
4980 tquotient);
4981 break;
4984 /* Try using an instruction that produces both the quotient and
4985 remainder, using truncation. We can easily compensate the
4986 quotient or remainder to get ceiling rounding, once we have the
4987 remainder. Notice that we compute also the final remainder
4988 value here, and return the result right away. */
4989 if (target == 0 || GET_MODE (target) != compute_mode)
4990 target = gen_reg_rtx (compute_mode);
4991 if (rem_flag)
4993 remainder= (REG_P (target)
4994 ? target : gen_reg_rtx (compute_mode));
4995 quotient = gen_reg_rtx (compute_mode);
4997 else
4999 quotient = (REG_P (target)
5000 ? target : gen_reg_rtx (compute_mode));
5001 remainder = gen_reg_rtx (compute_mode);
5004 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5005 remainder, 0))
5007 /* This could be computed with a branch-less sequence.
5008 Save that for later. */
5009 rtx tem;
5010 rtx_code_label *label = gen_label_rtx ();
5011 do_cmp_and_jump (remainder, const0_rtx, EQ,
5012 compute_mode, label);
5013 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5014 NULL_RTX, 0, OPTAB_WIDEN);
5015 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5016 expand_inc (quotient, const1_rtx);
5017 expand_dec (remainder, op1);
5018 emit_label (label);
5019 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5022 /* No luck with division elimination or divmod. Have to do it
5023 by conditionally adjusting op0 *and* the result. */
5025 rtx_code_label *label1, *label2, *label3, *label4, *label5;
5026 rtx adjusted_op0;
5027 rtx tem;
5029 quotient = gen_reg_rtx (compute_mode);
5030 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5031 label1 = gen_label_rtx ();
5032 label2 = gen_label_rtx ();
5033 label3 = gen_label_rtx ();
5034 label4 = gen_label_rtx ();
5035 label5 = gen_label_rtx ();
5036 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5037 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5038 compute_mode, label1);
5039 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5040 quotient, 0, OPTAB_LIB_WIDEN);
5041 if (tem != quotient)
5042 emit_move_insn (quotient, tem);
5043 emit_jump_insn (targetm.gen_jump (label5));
5044 emit_barrier ();
5045 emit_label (label1);
5046 expand_dec (adjusted_op0, const1_rtx);
5047 emit_jump_insn (targetm.gen_jump (label4));
5048 emit_barrier ();
5049 emit_label (label2);
5050 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5051 compute_mode, label3);
5052 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5053 quotient, 0, OPTAB_LIB_WIDEN);
5054 if (tem != quotient)
5055 emit_move_insn (quotient, tem);
5056 emit_jump_insn (targetm.gen_jump (label5));
5057 emit_barrier ();
5058 emit_label (label3);
5059 expand_inc (adjusted_op0, const1_rtx);
5060 emit_label (label4);
5061 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5062 quotient, 0, OPTAB_LIB_WIDEN);
5063 if (tem != quotient)
5064 emit_move_insn (quotient, tem);
5065 expand_inc (quotient, const1_rtx);
5066 emit_label (label5);
5069 break;
5071 case EXACT_DIV_EXPR:
5072 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5074 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5075 int size = GET_MODE_BITSIZE (int_mode);
5076 HOST_WIDE_INT d = INTVAL (op1);
5077 unsigned HOST_WIDE_INT ml;
5078 int pre_shift;
5079 rtx t1;
5081 pre_shift = ctz_or_zero (d);
5082 ml = invert_mod2n (d >> pre_shift, size);
5083 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5084 pre_shift, NULL_RTX, unsignedp);
5085 quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5086 NULL_RTX, 1);
5088 insn = get_last_insn ();
5089 set_dst_reg_note (insn, REG_EQUAL,
5090 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5091 int_mode, op0, op1),
5092 quotient);
5094 break;
5096 case ROUND_DIV_EXPR:
5097 case ROUND_MOD_EXPR:
5098 if (unsignedp)
5100 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5101 rtx tem;
5102 rtx_code_label *label;
5103 label = gen_label_rtx ();
5104 quotient = gen_reg_rtx (int_mode);
5105 remainder = gen_reg_rtx (int_mode);
5106 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5108 rtx tem;
5109 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5110 quotient, 1, OPTAB_LIB_WIDEN);
5111 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5112 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5113 remainder, 1, OPTAB_LIB_WIDEN);
5115 tem = plus_constant (int_mode, op1, -1);
5116 tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5117 do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5118 expand_inc (quotient, const1_rtx);
5119 expand_dec (remainder, op1);
5120 emit_label (label);
5122 else
5124 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5125 int size = GET_MODE_BITSIZE (int_mode);
5126 rtx abs_rem, abs_op1, tem, mask;
5127 rtx_code_label *label;
5128 label = gen_label_rtx ();
5129 quotient = gen_reg_rtx (int_mode);
5130 remainder = gen_reg_rtx (int_mode);
5131 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5133 rtx tem;
5134 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5135 quotient, 0, OPTAB_LIB_WIDEN);
5136 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5137 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5138 remainder, 0, OPTAB_LIB_WIDEN);
5140 abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5141 abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5142 tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5143 1, NULL_RTX, 1);
5144 do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5145 tem = expand_binop (int_mode, xor_optab, op0, op1,
5146 NULL_RTX, 0, OPTAB_WIDEN);
5147 mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5148 size - 1, NULL_RTX, 0);
5149 tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5150 NULL_RTX, 0, OPTAB_WIDEN);
5151 tem = expand_binop (int_mode, sub_optab, tem, mask,
5152 NULL_RTX, 0, OPTAB_WIDEN);
5153 expand_inc (quotient, tem);
5154 tem = expand_binop (int_mode, xor_optab, mask, op1,
5155 NULL_RTX, 0, OPTAB_WIDEN);
5156 tem = expand_binop (int_mode, sub_optab, tem, mask,
5157 NULL_RTX, 0, OPTAB_WIDEN);
5158 expand_dec (remainder, tem);
5159 emit_label (label);
5161 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5163 default:
5164 gcc_unreachable ();
5167 if (quotient == 0)
5169 if (target && GET_MODE (target) != compute_mode)
5170 target = 0;
5172 if (rem_flag)
5174 /* Try to produce the remainder without producing the quotient.
5175 If we seem to have a divmod pattern that does not require widening,
5176 don't try widening here. We should really have a WIDEN argument
5177 to expand_twoval_binop, since what we'd really like to do here is
5178 1) try a mod insn in compute_mode
5179 2) try a divmod insn in compute_mode
5180 3) try a div insn in compute_mode and multiply-subtract to get
5181 remainder
5182 4) try the same things with widening allowed. */
5183 remainder
5184 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5185 op0, op1, target,
5186 unsignedp,
5187 ((optab_handler (optab2, compute_mode)
5188 != CODE_FOR_nothing)
5189 ? OPTAB_DIRECT : OPTAB_WIDEN));
5190 if (remainder == 0)
5192 /* No luck there. Can we do remainder and divide at once
5193 without a library call? */
5194 remainder = gen_reg_rtx (compute_mode);
5195 if (! expand_twoval_binop ((unsignedp
5196 ? udivmod_optab
5197 : sdivmod_optab),
5198 op0, op1,
5199 NULL_RTX, remainder, unsignedp))
5200 remainder = 0;
5203 if (remainder)
5204 return gen_lowpart (mode, remainder);
5207 /* Produce the quotient. Try a quotient insn, but not a library call.
5208 If we have a divmod in this mode, use it in preference to widening
5209 the div (for this test we assume it will not fail). Note that optab2
5210 is set to the one of the two optabs that the call below will use. */
5211 quotient
5212 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5213 op0, op1, rem_flag ? NULL_RTX : target,
5214 unsignedp,
5215 ((optab_handler (optab2, compute_mode)
5216 != CODE_FOR_nothing)
5217 ? OPTAB_DIRECT : OPTAB_WIDEN));
5219 if (quotient == 0)
5221 /* No luck there. Try a quotient-and-remainder insn,
5222 keeping the quotient alone. */
5223 quotient = gen_reg_rtx (compute_mode);
5224 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5225 op0, op1,
5226 quotient, NULL_RTX, unsignedp))
5228 quotient = 0;
5229 if (! rem_flag)
5230 /* Still no luck. If we are not computing the remainder,
5231 use a library call for the quotient. */
5232 quotient = sign_expand_binop (compute_mode,
5233 udiv_optab, sdiv_optab,
5234 op0, op1, target,
5235 unsignedp, OPTAB_LIB_WIDEN);
5240 if (rem_flag)
5242 if (target && GET_MODE (target) != compute_mode)
5243 target = 0;
5245 if (quotient == 0)
5247 /* No divide instruction either. Use library for remainder. */
5248 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5249 op0, op1, target,
5250 unsignedp, OPTAB_LIB_WIDEN);
5251 /* No remainder function. Try a quotient-and-remainder
5252 function, keeping the remainder. */
5253 if (!remainder)
5255 remainder = gen_reg_rtx (compute_mode);
5256 if (!expand_twoval_binop_libfunc
5257 (unsignedp ? udivmod_optab : sdivmod_optab,
5258 op0, op1,
5259 NULL_RTX, remainder,
5260 unsignedp ? UMOD : MOD))
5261 remainder = NULL_RTX;
5264 else
5266 /* We divided. Now finish doing X - Y * (X / Y). */
5267 remainder = expand_mult (compute_mode, quotient, op1,
5268 NULL_RTX, unsignedp);
5269 remainder = expand_binop (compute_mode, sub_optab, op0,
5270 remainder, target, unsignedp,
5271 OPTAB_LIB_WIDEN);
5275 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5278 /* Return a tree node with data type TYPE, describing the value of X.
5279 Usually this is an VAR_DECL, if there is no obvious better choice.
5280 X may be an expression, however we only support those expressions
5281 generated by loop.c. */
5283 tree
5284 make_tree (tree type, rtx x)
5286 tree t;
5288 switch (GET_CODE (x))
5290 case CONST_INT:
5291 case CONST_WIDE_INT:
5292 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5293 return t;
5295 case CONST_DOUBLE:
5296 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5297 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5298 t = wide_int_to_tree (type,
5299 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5300 HOST_BITS_PER_WIDE_INT * 2));
5301 else
5302 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5304 return t;
5306 case CONST_VECTOR:
5308 unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5309 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5310 tree itype = TREE_TYPE (type);
5312 /* Build a tree with vector elements. */
5313 tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5314 unsigned int count = elts.encoded_nelts ();
5315 for (unsigned int i = 0; i < count; ++i)
5317 rtx elt = CONST_VECTOR_ELT (x, i);
5318 elts.quick_push (make_tree (itype, elt));
5321 return elts.build ();
5324 case PLUS:
5325 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5326 make_tree (type, XEXP (x, 1)));
5328 case MINUS:
5329 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5330 make_tree (type, XEXP (x, 1)));
5332 case NEG:
5333 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5335 case MULT:
5336 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5337 make_tree (type, XEXP (x, 1)));
5339 case ASHIFT:
5340 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5341 make_tree (type, XEXP (x, 1)));
5343 case LSHIFTRT:
5344 t = unsigned_type_for (type);
5345 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5346 make_tree (t, XEXP (x, 0)),
5347 make_tree (type, XEXP (x, 1))));
5349 case ASHIFTRT:
5350 t = signed_type_for (type);
5351 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5352 make_tree (t, XEXP (x, 0)),
5353 make_tree (type, XEXP (x, 1))));
5355 case DIV:
5356 if (TREE_CODE (type) != REAL_TYPE)
5357 t = signed_type_for (type);
5358 else
5359 t = type;
5361 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5362 make_tree (t, XEXP (x, 0)),
5363 make_tree (t, XEXP (x, 1))));
5364 case UDIV:
5365 t = unsigned_type_for (type);
5366 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5367 make_tree (t, XEXP (x, 0)),
5368 make_tree (t, XEXP (x, 1))));
5370 case SIGN_EXTEND:
5371 case ZERO_EXTEND:
5372 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5373 GET_CODE (x) == ZERO_EXTEND);
5374 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5376 case CONST:
5377 return make_tree (type, XEXP (x, 0));
5379 case SYMBOL_REF:
5380 t = SYMBOL_REF_DECL (x);
5381 if (t)
5382 return fold_convert (type, build_fold_addr_expr (t));
5383 /* fall through. */
5385 default:
5386 if (CONST_POLY_INT_P (x))
5387 return wide_int_to_tree (t, const_poly_int_value (x));
5389 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5391 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5392 address mode to pointer mode. */
5393 if (POINTER_TYPE_P (type))
5394 x = convert_memory_address_addr_space
5395 (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5397 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5398 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5399 t->decl_with_rtl.rtl = x;
5401 return t;
5405 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5406 and returning TARGET.
5408 If TARGET is 0, a pseudo-register or constant is returned. */
5411 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5413 rtx tem = 0;
5415 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5416 tem = simplify_binary_operation (AND, mode, op0, op1);
5417 if (tem == 0)
5418 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5420 if (target == 0)
5421 target = tem;
5422 else if (tem != target)
5423 emit_move_insn (target, tem);
5424 return target;
5427 /* Helper function for emit_store_flag. */
5429 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5430 machine_mode mode, machine_mode compare_mode,
5431 int unsignedp, rtx x, rtx y, int normalizep,
5432 machine_mode target_mode)
5434 class expand_operand ops[4];
5435 rtx op0, comparison, subtarget;
5436 rtx_insn *last;
5437 scalar_int_mode result_mode = targetm.cstore_mode (icode);
5438 scalar_int_mode int_target_mode;
5440 last = get_last_insn ();
5441 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5442 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5443 if (!x || !y)
5445 delete_insns_since (last);
5446 return NULL_RTX;
5449 if (target_mode == VOIDmode)
5450 int_target_mode = result_mode;
5451 else
5452 int_target_mode = as_a <scalar_int_mode> (target_mode);
5453 if (!target)
5454 target = gen_reg_rtx (int_target_mode);
5456 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5458 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5459 create_fixed_operand (&ops[1], comparison);
5460 create_fixed_operand (&ops[2], x);
5461 create_fixed_operand (&ops[3], y);
5462 if (!maybe_expand_insn (icode, 4, ops))
5464 delete_insns_since (last);
5465 return NULL_RTX;
5467 subtarget = ops[0].value;
5469 /* If we are converting to a wider mode, first convert to
5470 INT_TARGET_MODE, then normalize. This produces better combining
5471 opportunities on machines that have a SIGN_EXTRACT when we are
5472 testing a single bit. This mostly benefits the 68k.
5474 If STORE_FLAG_VALUE does not have the sign bit set when
5475 interpreted in MODE, we can do this conversion as unsigned, which
5476 is usually more efficient. */
5477 if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5479 gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5480 || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5482 bool unsignedp = (STORE_FLAG_VALUE >= 0);
5483 convert_move (target, subtarget, unsignedp);
5485 op0 = target;
5486 result_mode = int_target_mode;
5488 else
5489 op0 = subtarget;
5491 /* If we want to keep subexpressions around, don't reuse our last
5492 target. */
5493 if (optimize)
5494 subtarget = 0;
5496 /* Now normalize to the proper value in MODE. Sometimes we don't
5497 have to do anything. */
5498 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5500 /* STORE_FLAG_VALUE might be the most negative number, so write
5501 the comparison this way to avoid a compiler-time warning. */
5502 else if (- normalizep == STORE_FLAG_VALUE)
5503 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5505 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5506 it hard to use a value of just the sign bit due to ANSI integer
5507 constant typing rules. */
5508 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5509 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5510 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5511 normalizep == 1);
5512 else
5514 gcc_assert (STORE_FLAG_VALUE & 1);
5516 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5517 if (normalizep == -1)
5518 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5521 /* If we were converting to a smaller mode, do the conversion now. */
5522 if (int_target_mode != result_mode)
5524 convert_move (target, op0, 0);
5525 return target;
5527 else
5528 return op0;
5532 /* A subroutine of emit_store_flag only including "tricks" that do not
5533 need a recursive call. These are kept separate to avoid infinite
5534 loops. */
5536 static rtx
5537 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5538 machine_mode mode, int unsignedp, int normalizep,
5539 machine_mode target_mode)
5541 rtx subtarget;
5542 enum insn_code icode;
5543 machine_mode compare_mode;
5544 enum mode_class mclass;
5545 enum rtx_code scode;
5547 if (unsignedp)
5548 code = unsigned_condition (code);
5549 scode = swap_condition (code);
5551 /* If one operand is constant, make it the second one. Only do this
5552 if the other operand is not constant as well. */
5554 if (swap_commutative_operands_p (op0, op1))
5556 std::swap (op0, op1);
5557 code = swap_condition (code);
5560 if (mode == VOIDmode)
5561 mode = GET_MODE (op0);
5563 if (CONST_SCALAR_INT_P (op1))
5564 canonicalize_comparison (mode, &code, &op1);
5566 /* For some comparisons with 1 and -1, we can convert this to
5567 comparisons with zero. This will often produce more opportunities for
5568 store-flag insns. */
5570 switch (code)
5572 case LT:
5573 if (op1 == const1_rtx)
5574 op1 = const0_rtx, code = LE;
5575 break;
5576 case LE:
5577 if (op1 == constm1_rtx)
5578 op1 = const0_rtx, code = LT;
5579 break;
5580 case GE:
5581 if (op1 == const1_rtx)
5582 op1 = const0_rtx, code = GT;
5583 break;
5584 case GT:
5585 if (op1 == constm1_rtx)
5586 op1 = const0_rtx, code = GE;
5587 break;
5588 case GEU:
5589 if (op1 == const1_rtx)
5590 op1 = const0_rtx, code = NE;
5591 break;
5592 case LTU:
5593 if (op1 == const1_rtx)
5594 op1 = const0_rtx, code = EQ;
5595 break;
5596 default:
5597 break;
5600 /* If we are comparing a double-word integer with zero or -1, we can
5601 convert the comparison into one involving a single word. */
5602 scalar_int_mode int_mode;
5603 if (is_int_mode (mode, &int_mode)
5604 && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5605 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5607 rtx tem;
5608 if ((code == EQ || code == NE)
5609 && (op1 == const0_rtx || op1 == constm1_rtx))
5611 rtx op00, op01;
5613 /* Do a logical OR or AND of the two words and compare the
5614 result. */
5615 op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5616 op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5617 tem = expand_binop (word_mode,
5618 op1 == const0_rtx ? ior_optab : and_optab,
5619 op00, op01, NULL_RTX, unsignedp,
5620 OPTAB_DIRECT);
5622 if (tem != 0)
5623 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5624 unsignedp, normalizep);
5626 else if ((code == LT || code == GE) && op1 == const0_rtx)
5628 rtx op0h;
5630 /* If testing the sign bit, can just test on high word. */
5631 op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5632 subreg_highpart_offset (word_mode,
5633 int_mode));
5634 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5635 unsignedp, normalizep);
5637 else
5638 tem = NULL_RTX;
5640 if (tem)
5642 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5643 return tem;
5644 if (!target)
5645 target = gen_reg_rtx (target_mode);
5647 convert_move (target, tem,
5648 !val_signbit_known_set_p (word_mode,
5649 (normalizep ? normalizep
5650 : STORE_FLAG_VALUE)));
5651 return target;
5655 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5656 complement of A (for GE) and shifting the sign bit to the low bit. */
5657 if (op1 == const0_rtx && (code == LT || code == GE)
5658 && is_int_mode (mode, &int_mode)
5659 && (normalizep || STORE_FLAG_VALUE == 1
5660 || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5662 scalar_int_mode int_target_mode;
5663 subtarget = target;
5665 if (!target)
5666 int_target_mode = int_mode;
5667 else
5669 /* If the result is to be wider than OP0, it is best to convert it
5670 first. If it is to be narrower, it is *incorrect* to convert it
5671 first. */
5672 int_target_mode = as_a <scalar_int_mode> (target_mode);
5673 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5675 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5676 int_mode = int_target_mode;
5680 if (int_target_mode != int_mode)
5681 subtarget = 0;
5683 if (code == GE)
5684 op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5685 ((STORE_FLAG_VALUE == 1 || normalizep)
5686 ? 0 : subtarget), 0);
5688 if (STORE_FLAG_VALUE == 1 || normalizep)
5689 /* If we are supposed to produce a 0/1 value, we want to do
5690 a logical shift from the sign bit to the low-order bit; for
5691 a -1/0 value, we do an arithmetic shift. */
5692 op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5693 GET_MODE_BITSIZE (int_mode) - 1,
5694 subtarget, normalizep != -1);
5696 if (int_mode != int_target_mode)
5697 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5699 return op0;
5702 mclass = GET_MODE_CLASS (mode);
5703 FOR_EACH_MODE_FROM (compare_mode, mode)
5705 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5706 icode = optab_handler (cstore_optab, optab_mode);
5707 if (icode != CODE_FOR_nothing)
5709 do_pending_stack_adjust ();
5710 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5711 unsignedp, op0, op1, normalizep, target_mode);
5712 if (tem)
5713 return tem;
5715 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5717 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5718 unsignedp, op1, op0, normalizep, target_mode);
5719 if (tem)
5720 return tem;
5722 break;
5726 return 0;
5729 /* Subroutine of emit_store_flag that handles cases in which the operands
5730 are scalar integers. SUBTARGET is the target to use for temporary
5731 operations and TRUEVAL is the value to store when the condition is
5732 true. All other arguments are as for emit_store_flag. */
5735 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5736 rtx op1, scalar_int_mode mode, int unsignedp,
5737 int normalizep, rtx trueval)
5739 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5740 rtx_insn *last = get_last_insn ();
5742 /* If this is an equality comparison of integers, we can try to exclusive-or
5743 (or subtract) the two operands and use a recursive call to try the
5744 comparison with zero. Don't do any of these cases if branches are
5745 very cheap. */
5747 if ((code == EQ || code == NE) && op1 != const0_rtx)
5749 rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5750 OPTAB_WIDEN);
5752 if (tem == 0)
5753 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5754 OPTAB_WIDEN);
5755 if (tem != 0)
5756 tem = emit_store_flag (target, code, tem, const0_rtx,
5757 mode, unsignedp, normalizep);
5758 if (tem != 0)
5759 return tem;
5761 delete_insns_since (last);
5764 /* For integer comparisons, try the reverse comparison. However, for
5765 small X and if we'd have anyway to extend, implementing "X != 0"
5766 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5767 rtx_code rcode = reverse_condition (code);
5768 if (can_compare_p (rcode, mode, ccp_store_flag)
5769 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5770 && code == NE
5771 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5772 && op1 == const0_rtx))
5774 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5775 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5777 /* Again, for the reverse comparison, use either an addition or a XOR. */
5778 if (want_add
5779 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5780 optimize_insn_for_speed_p ()) == 0)
5782 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5783 STORE_FLAG_VALUE, target_mode);
5784 if (tem != 0)
5785 tem = expand_binop (target_mode, add_optab, tem,
5786 gen_int_mode (normalizep, target_mode),
5787 target, 0, OPTAB_WIDEN);
5788 if (tem != 0)
5789 return tem;
5791 else if (!want_add
5792 && rtx_cost (trueval, mode, XOR, 1,
5793 optimize_insn_for_speed_p ()) == 0)
5795 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5796 normalizep, target_mode);
5797 if (tem != 0)
5798 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5799 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5800 if (tem != 0)
5801 return tem;
5804 delete_insns_since (last);
5807 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5808 the constant zero. Reject all other comparisons at this point. Only
5809 do LE and GT if branches are expensive since they are expensive on
5810 2-operand machines. */
5812 if (op1 != const0_rtx
5813 || (code != EQ && code != NE
5814 && (BRANCH_COST (optimize_insn_for_speed_p (),
5815 false) <= 1 || (code != LE && code != GT))))
5816 return 0;
5818 /* Try to put the result of the comparison in the sign bit. Assume we can't
5819 do the necessary operation below. */
5821 rtx tem = 0;
5823 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5824 the sign bit set. */
5826 if (code == LE)
5828 /* This is destructive, so SUBTARGET can't be OP0. */
5829 if (rtx_equal_p (subtarget, op0))
5830 subtarget = 0;
5832 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5833 OPTAB_WIDEN);
5834 if (tem)
5835 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5836 OPTAB_WIDEN);
5839 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5840 number of bits in the mode of OP0, minus one. */
5842 if (code == GT)
5844 if (rtx_equal_p (subtarget, op0))
5845 subtarget = 0;
5847 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5848 GET_MODE_BITSIZE (mode) - 1,
5849 subtarget, 0);
5850 if (tem)
5851 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5852 OPTAB_WIDEN);
5855 if (code == EQ || code == NE)
5857 /* For EQ or NE, one way to do the comparison is to apply an operation
5858 that converts the operand into a positive number if it is nonzero
5859 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5860 for NE we negate. This puts the result in the sign bit. Then we
5861 normalize with a shift, if needed.
5863 Two operations that can do the above actions are ABS and FFS, so try
5864 them. If that doesn't work, and MODE is smaller than a full word,
5865 we can use zero-extension to the wider mode (an unsigned conversion)
5866 as the operation. */
5868 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5869 that is compensated by the subsequent overflow when subtracting
5870 one / negating. */
5872 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5873 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5874 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5875 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5876 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5878 tem = convert_modes (word_mode, mode, op0, 1);
5879 mode = word_mode;
5882 if (tem != 0)
5884 if (code == EQ)
5885 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5886 0, OPTAB_WIDEN);
5887 else
5888 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5891 /* If we couldn't do it that way, for NE we can "or" the two's complement
5892 of the value with itself. For EQ, we take the one's complement of
5893 that "or", which is an extra insn, so we only handle EQ if branches
5894 are expensive. */
5896 if (tem == 0
5897 && (code == NE
5898 || BRANCH_COST (optimize_insn_for_speed_p (),
5899 false) > 1))
5901 if (rtx_equal_p (subtarget, op0))
5902 subtarget = 0;
5904 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5905 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5906 OPTAB_WIDEN);
5908 if (tem && code == EQ)
5909 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5913 if (tem && normalizep)
5914 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5915 GET_MODE_BITSIZE (mode) - 1,
5916 subtarget, normalizep == 1);
5918 if (tem)
5920 if (!target)
5922 else if (GET_MODE (tem) != target_mode)
5924 convert_move (target, tem, 0);
5925 tem = target;
5927 else if (!subtarget)
5929 emit_move_insn (target, tem);
5930 tem = target;
5933 else
5934 delete_insns_since (last);
5936 return tem;
5939 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5940 and storing in TARGET. Normally return TARGET.
5941 Return 0 if that cannot be done.
5943 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5944 it is VOIDmode, they cannot both be CONST_INT.
5946 UNSIGNEDP is for the case where we have to widen the operands
5947 to perform the operation. It says to use zero-extension.
5949 NORMALIZEP is 1 if we should convert the result to be either zero
5950 or one. Normalize is -1 if we should convert the result to be
5951 either zero or -1. If NORMALIZEP is zero, the result will be left
5952 "raw" out of the scc insn. */
5955 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5956 machine_mode mode, int unsignedp, int normalizep)
5958 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5959 enum rtx_code rcode;
5960 rtx subtarget;
5961 rtx tem, trueval;
5962 rtx_insn *last;
5964 /* If we compare constants, we shouldn't use a store-flag operation,
5965 but a constant load. We can get there via the vanilla route that
5966 usually generates a compare-branch sequence, but will in this case
5967 fold the comparison to a constant, and thus elide the branch. */
5968 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5969 return NULL_RTX;
5971 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5972 target_mode);
5973 if (tem)
5974 return tem;
5976 /* If we reached here, we can't do this with a scc insn, however there
5977 are some comparisons that can be done in other ways. Don't do any
5978 of these cases if branches are very cheap. */
5979 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5980 return 0;
5982 /* See what we need to return. We can only return a 1, -1, or the
5983 sign bit. */
5985 if (normalizep == 0)
5987 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5988 normalizep = STORE_FLAG_VALUE;
5990 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5992 else
5993 return 0;
5996 last = get_last_insn ();
5998 /* If optimizing, use different pseudo registers for each insn, instead
5999 of reusing the same pseudo. This leads to better CSE, but slows
6000 down the compiler, since there are more pseudos. */
6001 subtarget = (!optimize
6002 && (target_mode == mode)) ? target : NULL_RTX;
6003 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6005 /* For floating-point comparisons, try the reverse comparison or try
6006 changing the "orderedness" of the comparison. */
6007 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6009 enum rtx_code first_code;
6010 bool and_them;
6012 rcode = reverse_condition_maybe_unordered (code);
6013 if (can_compare_p (rcode, mode, ccp_store_flag)
6014 && (code == ORDERED || code == UNORDERED
6015 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6016 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6018 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6019 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6021 /* For the reverse comparison, use either an addition or a XOR. */
6022 if (want_add
6023 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6024 optimize_insn_for_speed_p ()) == 0)
6026 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6027 STORE_FLAG_VALUE, target_mode);
6028 if (tem)
6029 return expand_binop (target_mode, add_optab, tem,
6030 gen_int_mode (normalizep, target_mode),
6031 target, 0, OPTAB_WIDEN);
6033 else if (!want_add
6034 && rtx_cost (trueval, mode, XOR, 1,
6035 optimize_insn_for_speed_p ()) == 0)
6037 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6038 normalizep, target_mode);
6039 if (tem)
6040 return expand_binop (target_mode, xor_optab, tem, trueval,
6041 target, INTVAL (trueval) >= 0,
6042 OPTAB_WIDEN);
6046 delete_insns_since (last);
6048 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
6049 if (code == ORDERED || code == UNORDERED)
6050 return 0;
6052 and_them = split_comparison (code, mode, &first_code, &code);
6054 /* If there are no NaNs, the first comparison should always fall through.
6055 Effectively change the comparison to the other one. */
6056 if (!HONOR_NANS (mode))
6058 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6059 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6060 target_mode);
6063 if (!HAVE_conditional_move)
6064 return 0;
6066 /* Do not turn a trapping comparison into a non-trapping one. */
6067 if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6068 && flag_trapping_math)
6069 return 0;
6071 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6072 conditional move. */
6073 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6074 normalizep, target_mode);
6075 if (tem == 0)
6076 return 0;
6078 if (and_them)
6079 tem = emit_conditional_move (target, code, op0, op1, mode,
6080 tem, const0_rtx, GET_MODE (tem), 0);
6081 else
6082 tem = emit_conditional_move (target, code, op0, op1, mode,
6083 trueval, tem, GET_MODE (tem), 0);
6085 if (tem == 0)
6086 delete_insns_since (last);
6087 return tem;
6090 /* The remaining tricks only apply to integer comparisons. */
6092 scalar_int_mode int_mode;
6093 if (is_int_mode (mode, &int_mode))
6094 return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6095 unsignedp, normalizep, trueval);
6097 return 0;
6100 /* Like emit_store_flag, but always succeeds. */
6103 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6104 machine_mode mode, int unsignedp, int normalizep)
6106 rtx tem;
6107 rtx_code_label *label;
6108 rtx trueval, falseval;
6110 /* First see if emit_store_flag can do the job. */
6111 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6112 if (tem != 0)
6113 return tem;
6115 /* If one operand is constant, make it the second one. Only do this
6116 if the other operand is not constant as well. */
6117 if (swap_commutative_operands_p (op0, op1))
6119 std::swap (op0, op1);
6120 code = swap_condition (code);
6123 if (mode == VOIDmode)
6124 mode = GET_MODE (op0);
6126 if (!target)
6127 target = gen_reg_rtx (word_mode);
6129 /* If this failed, we have to do this with set/compare/jump/set code.
6130 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
6131 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6132 if (code == NE
6133 && GET_MODE_CLASS (mode) == MODE_INT
6134 && REG_P (target)
6135 && op0 == target
6136 && op1 == const0_rtx)
6138 label = gen_label_rtx ();
6139 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6140 NULL_RTX, NULL, label,
6141 profile_probability::uninitialized ());
6142 emit_move_insn (target, trueval);
6143 emit_label (label);
6144 return target;
6147 if (!REG_P (target)
6148 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6149 target = gen_reg_rtx (GET_MODE (target));
6151 /* Jump in the right direction if the target cannot implement CODE
6152 but can jump on its reverse condition. */
6153 falseval = const0_rtx;
6154 if (! can_compare_p (code, mode, ccp_jump)
6155 && (! FLOAT_MODE_P (mode)
6156 || code == ORDERED || code == UNORDERED
6157 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6158 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6160 enum rtx_code rcode;
6161 if (FLOAT_MODE_P (mode))
6162 rcode = reverse_condition_maybe_unordered (code);
6163 else
6164 rcode = reverse_condition (code);
6166 /* Canonicalize to UNORDERED for the libcall. */
6167 if (can_compare_p (rcode, mode, ccp_jump)
6168 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6170 falseval = trueval;
6171 trueval = const0_rtx;
6172 code = rcode;
6176 emit_move_insn (target, trueval);
6177 label = gen_label_rtx ();
6178 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6179 label, profile_probability::uninitialized ());
6181 emit_move_insn (target, falseval);
6182 emit_label (label);
6184 return target;
6187 /* Helper function for canonicalize_cmp_for_target. Swap between inclusive
6188 and exclusive ranges in order to create an equivalent comparison. See
6189 canonicalize_cmp_for_target for the possible cases. */
6191 static enum rtx_code
6192 equivalent_cmp_code (enum rtx_code code)
6194 switch (code)
6196 case GT:
6197 return GE;
6198 case GE:
6199 return GT;
6200 case LT:
6201 return LE;
6202 case LE:
6203 return LT;
6204 case GTU:
6205 return GEU;
6206 case GEU:
6207 return GTU;
6208 case LTU:
6209 return LEU;
6210 case LEU:
6211 return LTU;
6213 default:
6214 return code;
6218 /* Choose the more appropiate immediate in scalar integer comparisons. The
6219 purpose of this is to end up with an immediate which can be loaded into a
6220 register in fewer moves, if possible.
6222 For each integer comparison there exists an equivalent choice:
6223 i) a > b or a >= b + 1
6224 ii) a <= b or a < b + 1
6225 iii) a >= b or a > b - 1
6226 iv) a < b or a <= b - 1
6228 MODE is the mode of the first operand.
6229 CODE points to the comparison code.
6230 IMM points to the rtx containing the immediate. *IMM must satisfy
6231 CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6232 on exit. */
6234 void
6235 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6237 if (!SCALAR_INT_MODE_P (mode))
6238 return;
6240 int to_add = 0;
6241 enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6243 /* Extract the immediate value from the rtx. */
6244 wide_int imm_val = rtx_mode_t (*imm, mode);
6246 if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6247 to_add = 1;
6248 else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6249 to_add = -1;
6250 else
6251 return;
6253 /* Check for overflow/underflow in the case of signed values and
6254 wrapping around in the case of unsigned values. If any occur
6255 cancel the optimization. */
6256 wi::overflow_type overflow = wi::OVF_NONE;
6257 wide_int imm_modif;
6259 if (to_add == 1)
6260 imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6261 else
6262 imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6264 if (overflow)
6265 return;
6267 /* The following creates a pseudo; if we cannot do that, bail out. */
6268 if (!can_create_pseudo_p ())
6269 return;
6271 rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6272 rtx new_imm = immed_wide_int_const (imm_modif, mode);
6274 rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6275 rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6277 /* Update the immediate and the code. */
6278 if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6280 *code = equivalent_cmp_code (*code);
6281 *imm = new_imm;
6287 /* Perform possibly multi-word comparison and conditional jump to LABEL
6288 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6289 now a thin wrapper around do_compare_rtx_and_jump. */
6291 static void
6292 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6293 rtx_code_label *label)
6295 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6296 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6297 NULL, label, profile_probability::uninitialized ());