Add testcase of PR c++/92542, already fixed.
[official-gcc.git] / gcc / expmed.c
blob0461027620941ffb9c00a9ede440521f165b69cd
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2020 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Work around tree-optimization/91825. */
22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "predict.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "dojump.h"
42 #include "explow.h"
43 #include "expr.h"
44 #include "langhooks.h"
45 #include "tree-vector-builder.h"
47 struct target_expmed default_target_expmed;
48 #if SWITCHABLE_TARGET
49 struct target_expmed *this_target_expmed = &default_target_expmed;
50 #endif
52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
53 unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 poly_uint64, poly_uint64,
56 machine_mode, rtx, bool, bool);
57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
58 unsigned HOST_WIDE_INT,
59 unsigned HOST_WIDE_INT,
60 poly_uint64, poly_uint64,
61 rtx, scalar_int_mode, bool);
62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
63 unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT,
65 rtx, scalar_int_mode, bool);
66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
67 unsigned HOST_WIDE_INT,
68 unsigned HOST_WIDE_INT,
69 poly_uint64, poly_uint64,
70 rtx, scalar_int_mode, bool);
71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
72 unsigned HOST_WIDE_INT,
73 unsigned HOST_WIDE_INT, int, rtx,
74 machine_mode, machine_mode, bool, bool);
75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
76 unsigned HOST_WIDE_INT,
77 unsigned HOST_WIDE_INT, rtx, int, bool);
78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
79 unsigned HOST_WIDE_INT,
80 unsigned HOST_WIDE_INT, rtx, int, bool);
81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
83 unsigned HOST_WIDE_INT,
84 unsigned HOST_WIDE_INT, int, bool);
85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
90 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
91 The mask is truncated if necessary to the width of mode MODE. The
92 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
94 static inline rtx
95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
97 return immed_wide_int_const
98 (wi::shifted_mask (bitpos, bitsize, complement,
99 GET_MODE_PRECISION (mode)), mode);
102 /* Test whether a value is zero of a power of two. */
103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
104 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
106 struct init_expmed_rtl
108 rtx reg;
109 rtx plus;
110 rtx neg;
111 rtx mult;
112 rtx sdiv;
113 rtx udiv;
114 rtx sdiv_32;
115 rtx smod_32;
116 rtx wide_mult;
117 rtx wide_lshr;
118 rtx wide_trunc;
119 rtx shift;
120 rtx shift_mult;
121 rtx shift_add;
122 rtx shift_sub0;
123 rtx shift_sub1;
124 rtx zext;
125 rtx trunc;
127 rtx pow2[MAX_BITS_PER_WORD];
128 rtx cint[MAX_BITS_PER_WORD];
131 static void
132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
133 scalar_int_mode from_mode, bool speed)
135 int to_size, from_size;
136 rtx which;
138 to_size = GET_MODE_PRECISION (to_mode);
139 from_size = GET_MODE_PRECISION (from_mode);
141 /* Most partial integers have a precision less than the "full"
142 integer it requires for storage. In case one doesn't, for
143 comparison purposes here, reduce the bit size by one in that
144 case. */
145 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
146 && pow2p_hwi (to_size))
147 to_size --;
148 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
149 && pow2p_hwi (from_size))
150 from_size --;
152 /* Assume cost of zero-extend and sign-extend is the same. */
153 which = (to_size < from_size ? all->trunc : all->zext);
155 PUT_MODE (all->reg, from_mode);
156 set_convert_cost (to_mode, from_mode, speed,
157 set_src_cost (which, to_mode, speed));
160 static void
161 init_expmed_one_mode (struct init_expmed_rtl *all,
162 machine_mode mode, int speed)
164 int m, n, mode_bitsize;
165 machine_mode mode_from;
167 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
169 PUT_MODE (all->reg, mode);
170 PUT_MODE (all->plus, mode);
171 PUT_MODE (all->neg, mode);
172 PUT_MODE (all->mult, mode);
173 PUT_MODE (all->sdiv, mode);
174 PUT_MODE (all->udiv, mode);
175 PUT_MODE (all->sdiv_32, mode);
176 PUT_MODE (all->smod_32, mode);
177 PUT_MODE (all->wide_trunc, mode);
178 PUT_MODE (all->shift, mode);
179 PUT_MODE (all->shift_mult, mode);
180 PUT_MODE (all->shift_add, mode);
181 PUT_MODE (all->shift_sub0, mode);
182 PUT_MODE (all->shift_sub1, mode);
183 PUT_MODE (all->zext, mode);
184 PUT_MODE (all->trunc, mode);
186 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
187 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
188 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
189 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
190 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
192 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
193 <= 2 * add_cost (speed, mode)));
194 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
195 <= 4 * add_cost (speed, mode)));
197 set_shift_cost (speed, mode, 0, 0);
199 int cost = add_cost (speed, mode);
200 set_shiftadd_cost (speed, mode, 0, cost);
201 set_shiftsub0_cost (speed, mode, 0, cost);
202 set_shiftsub1_cost (speed, mode, 0, cost);
205 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
206 for (m = 1; m < n; m++)
208 XEXP (all->shift, 1) = all->cint[m];
209 XEXP (all->shift_mult, 1) = all->pow2[m];
211 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
212 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
213 speed));
214 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
215 speed));
216 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
217 speed));
220 scalar_int_mode int_mode_to;
221 if (is_a <scalar_int_mode> (mode, &int_mode_to))
223 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
224 mode_from = (machine_mode)(mode_from + 1))
225 init_expmed_one_conv (all, int_mode_to,
226 as_a <scalar_int_mode> (mode_from), speed);
228 scalar_int_mode wider_mode;
229 if (GET_MODE_CLASS (int_mode_to) == MODE_INT
230 && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
232 PUT_MODE (all->zext, wider_mode);
233 PUT_MODE (all->wide_mult, wider_mode);
234 PUT_MODE (all->wide_lshr, wider_mode);
235 XEXP (all->wide_lshr, 1)
236 = gen_int_shift_amount (wider_mode, mode_bitsize);
238 set_mul_widen_cost (speed, wider_mode,
239 set_src_cost (all->wide_mult, wider_mode, speed));
240 set_mul_highpart_cost (speed, int_mode_to,
241 set_src_cost (all->wide_trunc,
242 int_mode_to, speed));
247 void
248 init_expmed (void)
250 struct init_expmed_rtl all;
251 machine_mode mode = QImode;
252 int m, speed;
254 memset (&all, 0, sizeof all);
255 for (m = 1; m < MAX_BITS_PER_WORD; m++)
257 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
258 all.cint[m] = GEN_INT (m);
261 /* Avoid using hard regs in ways which may be unsupported. */
262 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
263 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
264 all.neg = gen_rtx_NEG (mode, all.reg);
265 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
266 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
267 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
268 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
269 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
270 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
271 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
272 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
273 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
274 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
275 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
276 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
277 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
278 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
279 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
281 for (speed = 0; speed < 2; speed++)
283 crtl->maybe_hot_insn_p = speed;
284 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
286 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
287 mode = (machine_mode)(mode + 1))
288 init_expmed_one_mode (&all, mode, speed);
290 if (MIN_MODE_PARTIAL_INT != VOIDmode)
291 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
292 mode = (machine_mode)(mode + 1))
293 init_expmed_one_mode (&all, mode, speed);
295 if (MIN_MODE_VECTOR_INT != VOIDmode)
296 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
297 mode = (machine_mode)(mode + 1))
298 init_expmed_one_mode (&all, mode, speed);
301 if (alg_hash_used_p ())
303 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
304 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
306 else
307 set_alg_hash_used_p (true);
308 default_rtl_profile ();
310 ggc_free (all.trunc);
311 ggc_free (all.shift_sub1);
312 ggc_free (all.shift_sub0);
313 ggc_free (all.shift_add);
314 ggc_free (all.shift_mult);
315 ggc_free (all.shift);
316 ggc_free (all.wide_trunc);
317 ggc_free (all.wide_lshr);
318 ggc_free (all.wide_mult);
319 ggc_free (all.zext);
320 ggc_free (all.smod_32);
321 ggc_free (all.sdiv_32);
322 ggc_free (all.udiv);
323 ggc_free (all.sdiv);
324 ggc_free (all.mult);
325 ggc_free (all.neg);
326 ggc_free (all.plus);
327 ggc_free (all.reg);
330 /* Return an rtx representing minus the value of X.
331 MODE is the intended mode of the result,
332 useful if X is a CONST_INT. */
335 negate_rtx (machine_mode mode, rtx x)
337 rtx result = simplify_unary_operation (NEG, mode, x, mode);
339 if (result == 0)
340 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
342 return result;
345 /* Whether reverse storage order is supported on the target. */
346 static int reverse_storage_order_supported = -1;
348 /* Check whether reverse storage order is supported on the target. */
350 static void
351 check_reverse_storage_order_support (void)
353 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
355 reverse_storage_order_supported = 0;
356 sorry ("reverse scalar storage order");
358 else
359 reverse_storage_order_supported = 1;
362 /* Whether reverse FP storage order is supported on the target. */
363 static int reverse_float_storage_order_supported = -1;
365 /* Check whether reverse FP storage order is supported on the target. */
367 static void
368 check_reverse_float_storage_order_support (void)
370 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
372 reverse_float_storage_order_supported = 0;
373 sorry ("reverse floating-point scalar storage order");
375 else
376 reverse_float_storage_order_supported = 1;
379 /* Return an rtx representing value of X with reverse storage order.
380 MODE is the intended mode of the result,
381 useful if X is a CONST_INT. */
384 flip_storage_order (machine_mode mode, rtx x)
386 scalar_int_mode int_mode;
387 rtx result;
389 if (mode == QImode)
390 return x;
392 if (COMPLEX_MODE_P (mode))
394 rtx real = read_complex_part (x, false);
395 rtx imag = read_complex_part (x, true);
397 real = flip_storage_order (GET_MODE_INNER (mode), real);
398 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
400 return gen_rtx_CONCAT (mode, real, imag);
403 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
404 check_reverse_storage_order_support ();
406 if (!is_a <scalar_int_mode> (mode, &int_mode))
408 if (FLOAT_MODE_P (mode)
409 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
410 check_reverse_float_storage_order_support ();
412 if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
414 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
415 return x;
417 x = gen_lowpart (int_mode, x);
420 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
421 if (result == 0)
422 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
424 if (int_mode != mode)
425 result = gen_lowpart (mode, result);
427 return result;
430 /* If MODE is set, adjust bitfield memory MEM so that it points to the
431 first unit of mode MODE that contains a bitfield of size BITSIZE at
432 bit position BITNUM. If MODE is not set, return a BLKmode reference
433 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
434 of the field within the new memory. */
436 static rtx
437 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
438 unsigned HOST_WIDE_INT bitsize,
439 unsigned HOST_WIDE_INT bitnum,
440 unsigned HOST_WIDE_INT *new_bitnum)
442 scalar_int_mode imode;
443 if (mode.exists (&imode))
445 unsigned int unit = GET_MODE_BITSIZE (imode);
446 *new_bitnum = bitnum % unit;
447 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
448 return adjust_bitfield_address (mem, imode, offset);
450 else
452 *new_bitnum = bitnum % BITS_PER_UNIT;
453 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
454 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
455 / BITS_PER_UNIT);
456 return adjust_bitfield_address_size (mem, BLKmode, offset, size);
460 /* The caller wants to perform insertion or extraction PATTERN on a
461 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
462 BITREGION_START and BITREGION_END are as for store_bit_field
463 and FIELDMODE is the natural mode of the field.
465 Search for a mode that is compatible with the memory access
466 restrictions and (where applicable) with a register insertion or
467 extraction. Return the new memory on success, storing the adjusted
468 bit position in *NEW_BITNUM. Return null otherwise. */
470 static rtx
471 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
472 rtx op0, HOST_WIDE_INT bitsize,
473 HOST_WIDE_INT bitnum,
474 poly_uint64 bitregion_start,
475 poly_uint64 bitregion_end,
476 machine_mode fieldmode,
477 unsigned HOST_WIDE_INT *new_bitnum)
479 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
480 bitregion_end, MEM_ALIGN (op0),
481 MEM_VOLATILE_P (op0));
482 scalar_int_mode best_mode;
483 if (iter.next_mode (&best_mode))
485 /* We can use a memory in BEST_MODE. See whether this is true for
486 any wider modes. All other things being equal, we prefer to
487 use the widest mode possible because it tends to expose more
488 CSE opportunities. */
489 if (!iter.prefer_smaller_modes ())
491 /* Limit the search to the mode required by the corresponding
492 register insertion or extraction instruction, if any. */
493 scalar_int_mode limit_mode = word_mode;
494 extraction_insn insn;
495 if (get_best_reg_extraction_insn (&insn, pattern,
496 GET_MODE_BITSIZE (best_mode),
497 fieldmode))
498 limit_mode = insn.field_mode;
500 scalar_int_mode wider_mode;
501 while (iter.next_mode (&wider_mode)
502 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
503 best_mode = wider_mode;
505 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
506 new_bitnum);
508 return NULL_RTX;
511 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
512 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
513 offset is then BITNUM / BITS_PER_UNIT. */
515 static bool
516 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
517 machine_mode struct_mode)
519 poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
520 if (BYTES_BIG_ENDIAN)
521 return (multiple_p (bitnum, BITS_PER_UNIT)
522 && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
523 || multiple_p (bitnum + bitsize,
524 regsize * BITS_PER_UNIT)));
525 else
526 return multiple_p (bitnum, regsize * BITS_PER_UNIT);
529 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
530 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
531 Return false if the access would touch memory outside the range
532 BITREGION_START to BITREGION_END for conformance to the C++ memory
533 model. */
535 static bool
536 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
537 unsigned HOST_WIDE_INT bitnum,
538 scalar_int_mode fieldmode,
539 poly_uint64 bitregion_start,
540 poly_uint64 bitregion_end)
542 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
544 /* -fstrict-volatile-bitfields must be enabled and we must have a
545 volatile MEM. */
546 if (!MEM_P (op0)
547 || !MEM_VOLATILE_P (op0)
548 || flag_strict_volatile_bitfields <= 0)
549 return false;
551 /* The bit size must not be larger than the field mode, and
552 the field mode must not be larger than a word. */
553 if (bitsize > modesize || modesize > BITS_PER_WORD)
554 return false;
556 /* Check for cases of unaligned fields that must be split. */
557 if (bitnum % modesize + bitsize > modesize)
558 return false;
560 /* The memory must be sufficiently aligned for a MODESIZE access.
561 This condition guarantees, that the memory access will not
562 touch anything after the end of the structure. */
563 if (MEM_ALIGN (op0) < modesize)
564 return false;
566 /* Check for cases where the C++ memory model applies. */
567 if (maybe_ne (bitregion_end, 0U)
568 && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
569 || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
570 bitregion_end)))
571 return false;
573 return true;
576 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
577 bit number BITNUM can be treated as a simple value of mode MODE.
578 Store the byte offset in *BYTENUM if so. */
580 static bool
581 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
582 machine_mode mode, poly_uint64 *bytenum)
584 return (MEM_P (op0)
585 && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
586 && known_eq (bitsize, GET_MODE_BITSIZE (mode))
587 && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
588 || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
589 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
592 /* Try to use instruction INSV to store VALUE into a field of OP0.
593 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
594 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
595 are as for store_bit_field. */
597 static bool
598 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
599 opt_scalar_int_mode op0_mode,
600 unsigned HOST_WIDE_INT bitsize,
601 unsigned HOST_WIDE_INT bitnum,
602 rtx value, scalar_int_mode value_mode)
604 class expand_operand ops[4];
605 rtx value1;
606 rtx xop0 = op0;
607 rtx_insn *last = get_last_insn ();
608 bool copy_back = false;
610 scalar_int_mode op_mode = insv->field_mode;
611 unsigned int unit = GET_MODE_BITSIZE (op_mode);
612 if (bitsize == 0 || bitsize > unit)
613 return false;
615 if (MEM_P (xop0))
616 /* Get a reference to the first byte of the field. */
617 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
618 &bitnum);
619 else
621 /* Convert from counting within OP0 to counting in OP_MODE. */
622 if (BYTES_BIG_ENDIAN)
623 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
625 /* If xop0 is a register, we need it in OP_MODE
626 to make it acceptable to the format of insv. */
627 if (GET_CODE (xop0) == SUBREG)
628 /* We can't just change the mode, because this might clobber op0,
629 and we will need the original value of op0 if insv fails. */
630 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
631 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
632 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
635 /* If the destination is a paradoxical subreg such that we need a
636 truncate to the inner mode, perform the insertion on a temporary and
637 truncate the result to the original destination. Note that we can't
638 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
639 X) 0)) is (reg:N X). */
640 if (GET_CODE (xop0) == SUBREG
641 && REG_P (SUBREG_REG (xop0))
642 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
643 op_mode))
645 rtx tem = gen_reg_rtx (op_mode);
646 emit_move_insn (tem, xop0);
647 xop0 = tem;
648 copy_back = true;
651 /* There are similar overflow check at the start of store_bit_field_1,
652 but that only check the situation where the field lies completely
653 outside the register, while there do have situation where the field
654 lies partialy in the register, we need to adjust bitsize for this
655 partial overflow situation. Without this fix, pr48335-2.c on big-endian
656 will broken on those arch support bit insert instruction, like arm, aarch64
657 etc. */
658 if (bitsize + bitnum > unit && bitnum < unit)
660 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
661 "destination object, data truncated into %wu-bit",
662 bitsize, unit - bitnum);
663 bitsize = unit - bitnum;
666 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
667 "backwards" from the size of the unit we are inserting into.
668 Otherwise, we count bits from the most significant on a
669 BYTES/BITS_BIG_ENDIAN machine. */
671 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
672 bitnum = unit - bitsize - bitnum;
674 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
675 value1 = value;
676 if (value_mode != op_mode)
678 if (GET_MODE_BITSIZE (value_mode) >= bitsize)
680 rtx tmp;
681 /* Optimization: Don't bother really extending VALUE
682 if it has all the bits we will actually use. However,
683 if we must narrow it, be sure we do it correctly. */
685 if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
687 tmp = simplify_subreg (op_mode, value1, value_mode, 0);
688 if (! tmp)
689 tmp = simplify_gen_subreg (op_mode,
690 force_reg (value_mode, value1),
691 value_mode, 0);
693 else
695 tmp = gen_lowpart_if_possible (op_mode, value1);
696 if (! tmp)
697 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
699 value1 = tmp;
701 else if (CONST_INT_P (value))
702 value1 = gen_int_mode (INTVAL (value), op_mode);
703 else
704 /* Parse phase is supposed to make VALUE's data type
705 match that of the component reference, which is a type
706 at least as wide as the field; so VALUE should have
707 a mode that corresponds to that type. */
708 gcc_assert (CONSTANT_P (value));
711 create_fixed_operand (&ops[0], xop0);
712 create_integer_operand (&ops[1], bitsize);
713 create_integer_operand (&ops[2], bitnum);
714 create_input_operand (&ops[3], value1, op_mode);
715 if (maybe_expand_insn (insv->icode, 4, ops))
717 if (copy_back)
718 convert_move (op0, xop0, true);
719 return true;
721 delete_insns_since (last);
722 return false;
725 /* A subroutine of store_bit_field, with the same arguments. Return true
726 if the operation could be implemented.
728 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
729 no other way of implementing the operation. If FALLBACK_P is false,
730 return false instead. */
732 static bool
733 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
734 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
735 machine_mode fieldmode,
736 rtx value, bool reverse, bool fallback_p)
738 rtx op0 = str_rtx;
740 while (GET_CODE (op0) == SUBREG)
742 bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
743 op0 = SUBREG_REG (op0);
746 /* No action is needed if the target is a register and if the field
747 lies completely outside that register. This can occur if the source
748 code contains an out-of-bounds access to a small array. */
749 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
750 return true;
752 /* Use vec_set patterns for inserting parts of vectors whenever
753 available. */
754 machine_mode outermode = GET_MODE (op0);
755 scalar_mode innermode = GET_MODE_INNER (outermode);
756 poly_uint64 pos;
757 if (VECTOR_MODE_P (outermode)
758 && !MEM_P (op0)
759 && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
760 && fieldmode == innermode
761 && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
762 && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
764 class expand_operand ops[3];
765 enum insn_code icode = optab_handler (vec_set_optab, outermode);
767 create_fixed_operand (&ops[0], op0);
768 create_input_operand (&ops[1], value, innermode);
769 create_integer_operand (&ops[2], pos);
770 if (maybe_expand_insn (icode, 3, ops))
771 return true;
774 /* If the target is a register, overwriting the entire object, or storing
775 a full-word or multi-word field can be done with just a SUBREG. */
776 if (!MEM_P (op0)
777 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
779 /* Use the subreg machinery either to narrow OP0 to the required
780 words or to cope with mode punning between equal-sized modes.
781 In the latter case, use subreg on the rhs side, not lhs. */
782 rtx sub;
783 HOST_WIDE_INT regnum;
784 poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
785 if (known_eq (bitnum, 0U)
786 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
788 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
789 if (sub)
791 if (reverse)
792 sub = flip_storage_order (GET_MODE (op0), sub);
793 emit_move_insn (op0, sub);
794 return true;
797 else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
798 && multiple_p (bitsize, regsize * BITS_PER_UNIT))
800 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
801 regnum * regsize);
802 if (sub)
804 if (reverse)
805 value = flip_storage_order (fieldmode, value);
806 emit_move_insn (sub, value);
807 return true;
812 /* If the target is memory, storing any naturally aligned field can be
813 done with a simple store. For targets that support fast unaligned
814 memory, any naturally sized, unit aligned field can be done directly. */
815 poly_uint64 bytenum;
816 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
818 op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
819 if (reverse)
820 value = flip_storage_order (fieldmode, value);
821 emit_move_insn (op0, value);
822 return true;
825 /* It's possible we'll need to handle other cases here for
826 polynomial bitnum and bitsize. */
828 /* From here on we need to be looking at a fixed-size insertion. */
829 unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
830 unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
832 /* Make sure we are playing with integral modes. Pun with subregs
833 if we aren't. This must come after the entire register case above,
834 since that case is valid for any mode. The following cases are only
835 valid for integral modes. */
836 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
837 scalar_int_mode imode;
838 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
840 if (MEM_P (op0))
841 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
842 0, MEM_SIZE (op0));
843 else if (!op0_mode.exists ())
845 if (ibitnum == 0
846 && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
847 && MEM_P (value)
848 && !reverse)
850 value = adjust_address (value, GET_MODE (op0), 0);
851 emit_move_insn (op0, value);
852 return true;
854 if (!fallback_p)
855 return false;
856 rtx temp = assign_stack_temp (GET_MODE (op0),
857 GET_MODE_SIZE (GET_MODE (op0)));
858 emit_move_insn (temp, op0);
859 store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
860 reverse, fallback_p);
861 emit_move_insn (op0, temp);
862 return true;
864 else
865 op0 = gen_lowpart (op0_mode.require (), op0);
868 return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
869 bitregion_start, bitregion_end,
870 fieldmode, value, reverse, fallback_p);
873 /* Subroutine of store_bit_field_1, with the same arguments, except
874 that BITSIZE and BITNUM are constant. Handle cases specific to
875 integral modes. If OP0_MODE is defined, it is the mode of OP0,
876 otherwise OP0 is a BLKmode MEM. */
878 static bool
879 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
880 unsigned HOST_WIDE_INT bitsize,
881 unsigned HOST_WIDE_INT bitnum,
882 poly_uint64 bitregion_start,
883 poly_uint64 bitregion_end,
884 machine_mode fieldmode,
885 rtx value, bool reverse, bool fallback_p)
887 /* Storing an lsb-aligned field in a register
888 can be done with a movstrict instruction. */
890 if (!MEM_P (op0)
891 && !reverse
892 && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
893 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
894 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
896 class expand_operand ops[2];
897 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
898 rtx arg0 = op0;
899 unsigned HOST_WIDE_INT subreg_off;
901 if (GET_CODE (arg0) == SUBREG)
903 /* Else we've got some float mode source being extracted into
904 a different float mode destination -- this combination of
905 subregs results in Severe Tire Damage. */
906 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
907 || GET_MODE_CLASS (fieldmode) == MODE_INT
908 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
909 arg0 = SUBREG_REG (arg0);
912 subreg_off = bitnum / BITS_PER_UNIT;
913 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
915 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
917 create_fixed_operand (&ops[0], arg0);
918 /* Shrink the source operand to FIELDMODE. */
919 create_convert_operand_to (&ops[1], value, fieldmode, false);
920 if (maybe_expand_insn (icode, 2, ops))
921 return true;
925 /* Handle fields bigger than a word. */
927 if (bitsize > BITS_PER_WORD)
929 /* Here we transfer the words of the field
930 in the order least significant first.
931 This is because the most significant word is the one which may
932 be less than full.
933 However, only do that if the value is not BLKmode. */
935 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
936 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
937 unsigned int i;
938 rtx_insn *last;
940 /* This is the mode we must force value to, so that there will be enough
941 subwords to extract. Note that fieldmode will often (always?) be
942 VOIDmode, because that is what store_field uses to indicate that this
943 is a bit field, but passing VOIDmode to operand_subword_force
944 is not allowed.
946 The mode must be fixed-size, since insertions into variable-sized
947 objects are meant to be handled before calling this function. */
948 fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
949 if (value_mode == VOIDmode)
950 value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
952 last = get_last_insn ();
953 for (i = 0; i < nwords; i++)
955 /* If I is 0, use the low-order word in both field and target;
956 if I is 1, use the next to lowest word; and so on. */
957 unsigned int wordnum = (backwards
958 ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD
959 - i - 1
960 : i);
961 unsigned int bit_offset = (backwards ^ reverse
962 ? MAX ((int) bitsize - ((int) i + 1)
963 * BITS_PER_WORD,
965 : (int) i * BITS_PER_WORD);
966 rtx value_word = operand_subword_force (value, wordnum, value_mode);
967 unsigned HOST_WIDE_INT new_bitsize =
968 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
970 /* If the remaining chunk doesn't have full wordsize we have
971 to make sure that for big-endian machines the higher order
972 bits are used. */
973 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
975 int shift = BITS_PER_WORD - new_bitsize;
976 rtx shift_rtx = gen_int_shift_amount (word_mode, shift);
977 value_word = simplify_expand_binop (word_mode, lshr_optab,
978 value_word, shift_rtx,
979 NULL_RTX, true,
980 OPTAB_LIB_WIDEN);
983 if (!store_bit_field_1 (op0, new_bitsize,
984 bitnum + bit_offset,
985 bitregion_start, bitregion_end,
986 word_mode,
987 value_word, reverse, fallback_p))
989 delete_insns_since (last);
990 return false;
993 return true;
996 /* If VALUE has a floating-point or complex mode, access it as an
997 integer of the corresponding size. This can occur on a machine
998 with 64 bit registers that uses SFmode for float. It can also
999 occur for unaligned float or complex fields. */
1000 rtx orig_value = value;
1001 scalar_int_mode value_mode;
1002 if (GET_MODE (value) == VOIDmode)
1003 /* By this point we've dealt with values that are bigger than a word,
1004 so word_mode is a conservatively correct choice. */
1005 value_mode = word_mode;
1006 else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1008 value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1009 value = gen_reg_rtx (value_mode);
1010 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1013 /* If OP0 is a multi-word register, narrow it to the affected word.
1014 If the region spans two words, defer to store_split_bit_field.
1015 Don't do this if op0 is a single hard register wider than word
1016 such as a float or vector register. */
1017 if (!MEM_P (op0)
1018 && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1019 && (!REG_P (op0)
1020 || !HARD_REGISTER_P (op0)
1021 || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1023 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1025 if (!fallback_p)
1026 return false;
1028 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1029 bitregion_start, bitregion_end,
1030 value, value_mode, reverse);
1031 return true;
1033 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1034 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1035 gcc_assert (op0);
1036 op0_mode = word_mode;
1037 bitnum %= BITS_PER_WORD;
1040 /* From here on we can assume that the field to be stored in fits
1041 within a word. If the destination is a register, it too fits
1042 in a word. */
1044 extraction_insn insv;
1045 if (!MEM_P (op0)
1046 && !reverse
1047 && get_best_reg_extraction_insn (&insv, EP_insv,
1048 GET_MODE_BITSIZE (op0_mode.require ()),
1049 fieldmode)
1050 && store_bit_field_using_insv (&insv, op0, op0_mode,
1051 bitsize, bitnum, value, value_mode))
1052 return true;
1054 /* If OP0 is a memory, try copying it to a register and seeing if a
1055 cheap register alternative is available. */
1056 if (MEM_P (op0) && !reverse)
1058 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1059 fieldmode)
1060 && store_bit_field_using_insv (&insv, op0, op0_mode,
1061 bitsize, bitnum, value, value_mode))
1062 return true;
1064 rtx_insn *last = get_last_insn ();
1066 /* Try loading part of OP0 into a register, inserting the bitfield
1067 into that, and then copying the result back to OP0. */
1068 unsigned HOST_WIDE_INT bitpos;
1069 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1070 bitregion_start, bitregion_end,
1071 fieldmode, &bitpos);
1072 if (xop0)
1074 rtx tempreg = copy_to_reg (xop0);
1075 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1076 bitregion_start, bitregion_end,
1077 fieldmode, orig_value, reverse, false))
1079 emit_move_insn (xop0, tempreg);
1080 return true;
1082 delete_insns_since (last);
1086 if (!fallback_p)
1087 return false;
1089 store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1090 bitregion_end, value, value_mode, reverse);
1091 return true;
1094 /* Generate code to store value from rtx VALUE
1095 into a bit-field within structure STR_RTX
1096 containing BITSIZE bits starting at bit BITNUM.
1098 BITREGION_START is bitpos of the first bitfield in this region.
1099 BITREGION_END is the bitpos of the ending bitfield in this region.
1100 These two fields are 0, if the C++ memory model does not apply,
1101 or we are not interested in keeping track of bitfield regions.
1103 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1105 If REVERSE is true, the store is to be done in reverse order. */
1107 void
1108 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1109 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1110 machine_mode fieldmode,
1111 rtx value, bool reverse)
1113 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1114 unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1115 scalar_int_mode int_mode;
1116 if (bitsize.is_constant (&ibitsize)
1117 && bitnum.is_constant (&ibitnum)
1118 && is_a <scalar_int_mode> (fieldmode, &int_mode)
1119 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1120 bitregion_start, bitregion_end))
1122 /* Storing of a full word can be done with a simple store.
1123 We know here that the field can be accessed with one single
1124 instruction. For targets that support unaligned memory,
1125 an unaligned access may be necessary. */
1126 if (ibitsize == GET_MODE_BITSIZE (int_mode))
1128 str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1129 ibitnum / BITS_PER_UNIT);
1130 if (reverse)
1131 value = flip_storage_order (int_mode, value);
1132 gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1133 emit_move_insn (str_rtx, value);
1135 else
1137 rtx temp;
1139 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1140 ibitnum, &ibitnum);
1141 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1142 temp = copy_to_reg (str_rtx);
1143 if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1144 int_mode, value, reverse, true))
1145 gcc_unreachable ();
1147 emit_move_insn (str_rtx, temp);
1150 return;
1153 /* Under the C++0x memory model, we must not touch bits outside the
1154 bit region. Adjust the address to start at the beginning of the
1155 bit region. */
1156 if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1158 scalar_int_mode best_mode;
1159 machine_mode addr_mode = VOIDmode;
1161 poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1162 bitnum -= bitregion_start;
1163 poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1164 bitregion_end -= bitregion_start;
1165 bitregion_start = 0;
1166 if (bitsize.is_constant (&ibitsize)
1167 && bitnum.is_constant (&ibitnum)
1168 && get_best_mode (ibitsize, ibitnum,
1169 bitregion_start, bitregion_end,
1170 MEM_ALIGN (str_rtx), INT_MAX,
1171 MEM_VOLATILE_P (str_rtx), &best_mode))
1172 addr_mode = best_mode;
1173 str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1174 offset, size);
1177 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1178 bitregion_start, bitregion_end,
1179 fieldmode, value, reverse, true))
1180 gcc_unreachable ();
1183 /* Use shifts and boolean operations to store VALUE into a bit field of
1184 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1185 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1186 the mode of VALUE.
1188 If REVERSE is true, the store is to be done in reverse order. */
1190 static void
1191 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1192 unsigned HOST_WIDE_INT bitsize,
1193 unsigned HOST_WIDE_INT bitnum,
1194 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1195 rtx value, scalar_int_mode value_mode, bool reverse)
1197 /* There is a case not handled here:
1198 a structure with a known alignment of just a halfword
1199 and a field split across two aligned halfwords within the structure.
1200 Or likewise a structure with a known alignment of just a byte
1201 and a field split across two bytes.
1202 Such cases are not supposed to be able to occur. */
1204 scalar_int_mode best_mode;
1205 if (MEM_P (op0))
1207 unsigned int max_bitsize = BITS_PER_WORD;
1208 scalar_int_mode imode;
1209 if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1210 max_bitsize = GET_MODE_BITSIZE (imode);
1212 if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1213 MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1214 &best_mode))
1216 /* The only way this should occur is if the field spans word
1217 boundaries. */
1218 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1219 bitregion_start, bitregion_end,
1220 value, value_mode, reverse);
1221 return;
1224 op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1226 else
1227 best_mode = op0_mode.require ();
1229 store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1230 value, value_mode, reverse);
1233 /* Helper function for store_fixed_bit_field, stores
1234 the bit field always using MODE, which is the mode of OP0. The other
1235 arguments are as for store_fixed_bit_field. */
1237 static void
1238 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1239 unsigned HOST_WIDE_INT bitsize,
1240 unsigned HOST_WIDE_INT bitnum,
1241 rtx value, scalar_int_mode value_mode, bool reverse)
1243 rtx temp;
1244 int all_zero = 0;
1245 int all_one = 0;
1247 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1248 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1250 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1251 /* BITNUM is the distance between our msb
1252 and that of the containing datum.
1253 Convert it to the distance from the lsb. */
1254 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1256 /* Now BITNUM is always the distance between our lsb
1257 and that of OP0. */
1259 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1260 we must first convert its mode to MODE. */
1262 if (CONST_INT_P (value))
1264 unsigned HOST_WIDE_INT v = UINTVAL (value);
1266 if (bitsize < HOST_BITS_PER_WIDE_INT)
1267 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1269 if (v == 0)
1270 all_zero = 1;
1271 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1272 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1273 || (bitsize == HOST_BITS_PER_WIDE_INT
1274 && v == HOST_WIDE_INT_M1U))
1275 all_one = 1;
1277 value = lshift_value (mode, v, bitnum);
1279 else
1281 int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1282 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1284 if (value_mode != mode)
1285 value = convert_to_mode (mode, value, 1);
1287 if (must_and)
1288 value = expand_binop (mode, and_optab, value,
1289 mask_rtx (mode, 0, bitsize, 0),
1290 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1291 if (bitnum > 0)
1292 value = expand_shift (LSHIFT_EXPR, mode, value,
1293 bitnum, NULL_RTX, 1);
1296 if (reverse)
1297 value = flip_storage_order (mode, value);
1299 /* Now clear the chosen bits in OP0,
1300 except that if VALUE is -1 we need not bother. */
1301 /* We keep the intermediates in registers to allow CSE to combine
1302 consecutive bitfield assignments. */
1304 temp = force_reg (mode, op0);
1306 if (! all_one)
1308 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1309 if (reverse)
1310 mask = flip_storage_order (mode, mask);
1311 temp = expand_binop (mode, and_optab, temp, mask,
1312 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1313 temp = force_reg (mode, temp);
1316 /* Now logical-or VALUE into OP0, unless it is zero. */
1318 if (! all_zero)
1320 temp = expand_binop (mode, ior_optab, temp, value,
1321 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1322 temp = force_reg (mode, temp);
1325 if (op0 != temp)
1327 op0 = copy_rtx (op0);
1328 emit_move_insn (op0, temp);
1332 /* Store a bit field that is split across multiple accessible memory objects.
1334 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1335 BITSIZE is the field width; BITPOS the position of its first bit
1336 (within the word).
1337 VALUE is the value to store, which has mode VALUE_MODE.
1338 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1339 a BLKmode MEM.
1341 If REVERSE is true, the store is to be done in reverse order.
1343 This does not yet handle fields wider than BITS_PER_WORD. */
1345 static void
1346 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1347 unsigned HOST_WIDE_INT bitsize,
1348 unsigned HOST_WIDE_INT bitpos,
1349 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1350 rtx value, scalar_int_mode value_mode, bool reverse)
1352 unsigned int unit, total_bits, bitsdone = 0;
1354 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1355 much at a time. */
1356 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1357 unit = BITS_PER_WORD;
1358 else
1359 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1361 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1362 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1363 again, and we will mutually recurse forever. */
1364 if (MEM_P (op0) && op0_mode.exists ())
1365 unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1367 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1368 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1369 that VALUE might be a floating-point constant. */
1370 if (CONSTANT_P (value) && !CONST_INT_P (value))
1372 rtx word = gen_lowpart_common (word_mode, value);
1374 if (word && (value != word))
1375 value = word;
1376 else
1377 value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1378 value_mode = word_mode;
1381 total_bits = GET_MODE_BITSIZE (value_mode);
1383 while (bitsdone < bitsize)
1385 unsigned HOST_WIDE_INT thissize;
1386 unsigned HOST_WIDE_INT thispos;
1387 unsigned HOST_WIDE_INT offset;
1388 rtx part;
1390 offset = (bitpos + bitsdone) / unit;
1391 thispos = (bitpos + bitsdone) % unit;
1393 /* When region of bytes we can touch is restricted, decrease
1394 UNIT close to the end of the region as needed. If op0 is a REG
1395 or SUBREG of REG, don't do this, as there can't be data races
1396 on a register and we can expand shorter code in some cases. */
1397 if (maybe_ne (bitregion_end, 0U)
1398 && unit > BITS_PER_UNIT
1399 && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1400 && !REG_P (op0)
1401 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1403 unit = unit / 2;
1404 continue;
1407 /* THISSIZE must not overrun a word boundary. Otherwise,
1408 store_fixed_bit_field will call us again, and we will mutually
1409 recurse forever. */
1410 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1411 thissize = MIN (thissize, unit - thispos);
1413 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1415 /* Fetch successively less significant portions. */
1416 if (CONST_INT_P (value))
1417 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1418 >> (bitsize - bitsdone - thissize))
1419 & ((HOST_WIDE_INT_1 << thissize) - 1));
1420 /* Likewise, but the source is little-endian. */
1421 else if (reverse)
1422 part = extract_fixed_bit_field (word_mode, value, value_mode,
1423 thissize,
1424 bitsize - bitsdone - thissize,
1425 NULL_RTX, 1, false);
1426 else
1427 /* The args are chosen so that the last part includes the
1428 lsb. Give extract_bit_field the value it needs (with
1429 endianness compensation) to fetch the piece we want. */
1430 part = extract_fixed_bit_field (word_mode, value, value_mode,
1431 thissize,
1432 total_bits - bitsize + bitsdone,
1433 NULL_RTX, 1, false);
1435 else
1437 /* Fetch successively more significant portions. */
1438 if (CONST_INT_P (value))
1439 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1440 >> bitsdone)
1441 & ((HOST_WIDE_INT_1 << thissize) - 1));
1442 /* Likewise, but the source is big-endian. */
1443 else if (reverse)
1444 part = extract_fixed_bit_field (word_mode, value, value_mode,
1445 thissize,
1446 total_bits - bitsdone - thissize,
1447 NULL_RTX, 1, false);
1448 else
1449 part = extract_fixed_bit_field (word_mode, value, value_mode,
1450 thissize, bitsdone, NULL_RTX,
1451 1, false);
1454 /* If OP0 is a register, then handle OFFSET here. */
1455 rtx op0_piece = op0;
1456 opt_scalar_int_mode op0_piece_mode = op0_mode;
1457 if (SUBREG_P (op0) || REG_P (op0))
1459 scalar_int_mode imode;
1460 if (op0_mode.exists (&imode)
1461 && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1463 if (offset)
1464 op0_piece = const0_rtx;
1466 else
1468 op0_piece = operand_subword_force (op0,
1469 offset * unit / BITS_PER_WORD,
1470 GET_MODE (op0));
1471 op0_piece_mode = word_mode;
1473 offset &= BITS_PER_WORD / unit - 1;
1476 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1477 it is just an out-of-bounds access. Ignore it. */
1478 if (op0_piece != const0_rtx)
1479 store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1480 offset * unit + thispos, bitregion_start,
1481 bitregion_end, part, word_mode, reverse);
1482 bitsdone += thissize;
1486 /* A subroutine of extract_bit_field_1 that converts return value X
1487 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1488 to extract_bit_field. */
1490 static rtx
1491 convert_extracted_bit_field (rtx x, machine_mode mode,
1492 machine_mode tmode, bool unsignedp)
1494 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1495 return x;
1497 /* If the x mode is not a scalar integral, first convert to the
1498 integer mode of that size and then access it as a floating-point
1499 value via a SUBREG. */
1500 if (!SCALAR_INT_MODE_P (tmode))
1502 scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1503 x = convert_to_mode (int_mode, x, unsignedp);
1504 x = force_reg (int_mode, x);
1505 return gen_lowpart (tmode, x);
1508 return convert_to_mode (tmode, x, unsignedp);
1511 /* Try to use an ext(z)v pattern to extract a field from OP0.
1512 Return the extracted value on success, otherwise return null.
1513 EXTV describes the extraction instruction to use. If OP0_MODE
1514 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1515 The other arguments are as for extract_bit_field. */
1517 static rtx
1518 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1519 opt_scalar_int_mode op0_mode,
1520 unsigned HOST_WIDE_INT bitsize,
1521 unsigned HOST_WIDE_INT bitnum,
1522 int unsignedp, rtx target,
1523 machine_mode mode, machine_mode tmode)
1525 class expand_operand ops[4];
1526 rtx spec_target = target;
1527 rtx spec_target_subreg = 0;
1528 scalar_int_mode ext_mode = extv->field_mode;
1529 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1531 if (bitsize == 0 || unit < bitsize)
1532 return NULL_RTX;
1534 if (MEM_P (op0))
1535 /* Get a reference to the first byte of the field. */
1536 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1537 &bitnum);
1538 else
1540 /* Convert from counting within OP0 to counting in EXT_MODE. */
1541 if (BYTES_BIG_ENDIAN)
1542 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1544 /* If op0 is a register, we need it in EXT_MODE to make it
1545 acceptable to the format of ext(z)v. */
1546 if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1547 return NULL_RTX;
1548 if (REG_P (op0) && op0_mode.require () != ext_mode)
1549 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1552 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1553 "backwards" from the size of the unit we are extracting from.
1554 Otherwise, we count bits from the most significant on a
1555 BYTES/BITS_BIG_ENDIAN machine. */
1557 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1558 bitnum = unit - bitsize - bitnum;
1560 if (target == 0)
1561 target = spec_target = gen_reg_rtx (tmode);
1563 if (GET_MODE (target) != ext_mode)
1565 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1566 between the mode of the extraction (word_mode) and the target
1567 mode. Instead, create a temporary and use convert_move to set
1568 the target. */
1569 if (REG_P (target)
1570 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1572 target = gen_lowpart (ext_mode, target);
1573 if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1574 spec_target_subreg = target;
1576 else
1577 target = gen_reg_rtx (ext_mode);
1580 create_output_operand (&ops[0], target, ext_mode);
1581 create_fixed_operand (&ops[1], op0);
1582 create_integer_operand (&ops[2], bitsize);
1583 create_integer_operand (&ops[3], bitnum);
1584 if (maybe_expand_insn (extv->icode, 4, ops))
1586 target = ops[0].value;
1587 if (target == spec_target)
1588 return target;
1589 if (target == spec_target_subreg)
1590 return spec_target;
1591 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1593 return NULL_RTX;
1596 /* See whether it would be valid to extract the part of OP0 described
1597 by BITNUM and BITSIZE into a value of mode MODE using a subreg
1598 operation. Return the subreg if so, otherwise return null. */
1600 static rtx
1601 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1602 poly_uint64 bitsize, poly_uint64 bitnum)
1604 poly_uint64 bytenum;
1605 if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1606 && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1607 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1608 && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1609 return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1610 return NULL_RTX;
1613 /* A subroutine of extract_bit_field, with the same arguments.
1614 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1615 if we can find no other means of implementing the operation.
1616 if FALLBACK_P is false, return NULL instead. */
1618 static rtx
1619 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1620 int unsignedp, rtx target, machine_mode mode,
1621 machine_mode tmode, bool reverse, bool fallback_p,
1622 rtx *alt_rtl)
1624 rtx op0 = str_rtx;
1625 machine_mode mode1;
1627 if (tmode == VOIDmode)
1628 tmode = mode;
1630 while (GET_CODE (op0) == SUBREG)
1632 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1633 op0 = SUBREG_REG (op0);
1636 /* If we have an out-of-bounds access to a register, just return an
1637 uninitialized register of the required mode. This can occur if the
1638 source code contains an out-of-bounds access to a small array. */
1639 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1640 return gen_reg_rtx (tmode);
1642 if (REG_P (op0)
1643 && mode == GET_MODE (op0)
1644 && known_eq (bitnum, 0U)
1645 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1647 if (reverse)
1648 op0 = flip_storage_order (mode, op0);
1649 /* We're trying to extract a full register from itself. */
1650 return op0;
1653 /* First try to check for vector from vector extractions. */
1654 if (VECTOR_MODE_P (GET_MODE (op0))
1655 && !MEM_P (op0)
1656 && VECTOR_MODE_P (tmode)
1657 && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1658 && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1660 machine_mode new_mode = GET_MODE (op0);
1661 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1663 scalar_mode inner_mode = GET_MODE_INNER (tmode);
1664 poly_uint64 nunits;
1665 if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1666 GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1667 || !related_vector_mode (tmode, inner_mode,
1668 nunits).exists (&new_mode)
1669 || maybe_ne (GET_MODE_SIZE (new_mode),
1670 GET_MODE_SIZE (GET_MODE (op0))))
1671 new_mode = VOIDmode;
1673 poly_uint64 pos;
1674 if (new_mode != VOIDmode
1675 && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1676 != CODE_FOR_nothing)
1677 && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1679 class expand_operand ops[3];
1680 machine_mode outermode = new_mode;
1681 machine_mode innermode = tmode;
1682 enum insn_code icode
1683 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1685 if (new_mode != GET_MODE (op0))
1686 op0 = gen_lowpart (new_mode, op0);
1687 create_output_operand (&ops[0], target, innermode);
1688 ops[0].target = 1;
1689 create_input_operand (&ops[1], op0, outermode);
1690 create_integer_operand (&ops[2], pos);
1691 if (maybe_expand_insn (icode, 3, ops))
1693 if (alt_rtl && ops[0].target)
1694 *alt_rtl = target;
1695 target = ops[0].value;
1696 if (GET_MODE (target) != mode)
1697 return gen_lowpart (tmode, target);
1698 return target;
1703 /* See if we can get a better vector mode before extracting. */
1704 if (VECTOR_MODE_P (GET_MODE (op0))
1705 && !MEM_P (op0)
1706 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1708 machine_mode new_mode;
1710 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1711 new_mode = MIN_MODE_VECTOR_FLOAT;
1712 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1713 new_mode = MIN_MODE_VECTOR_FRACT;
1714 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1715 new_mode = MIN_MODE_VECTOR_UFRACT;
1716 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1717 new_mode = MIN_MODE_VECTOR_ACCUM;
1718 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1719 new_mode = MIN_MODE_VECTOR_UACCUM;
1720 else
1721 new_mode = MIN_MODE_VECTOR_INT;
1723 FOR_EACH_MODE_FROM (new_mode, new_mode)
1724 if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1725 && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1726 && targetm.vector_mode_supported_p (new_mode))
1727 break;
1728 if (new_mode != VOIDmode)
1729 op0 = gen_lowpart (new_mode, op0);
1732 /* Use vec_extract patterns for extracting parts of vectors whenever
1733 available. If that fails, see whether the current modes and bitregion
1734 give a natural subreg. */
1735 machine_mode outermode = GET_MODE (op0);
1736 if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1738 scalar_mode innermode = GET_MODE_INNER (outermode);
1739 enum insn_code icode
1740 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1741 poly_uint64 pos;
1742 if (icode != CODE_FOR_nothing
1743 && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1744 && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1746 class expand_operand ops[3];
1748 create_output_operand (&ops[0], target, innermode);
1749 ops[0].target = 1;
1750 create_input_operand (&ops[1], op0, outermode);
1751 create_integer_operand (&ops[2], pos);
1752 if (maybe_expand_insn (icode, 3, ops))
1754 if (alt_rtl && ops[0].target)
1755 *alt_rtl = target;
1756 target = ops[0].value;
1757 if (GET_MODE (target) != mode)
1758 return gen_lowpart (tmode, target);
1759 return target;
1762 /* Using subregs is useful if we're extracting one register vector
1763 from a multi-register vector. extract_bit_field_as_subreg checks
1764 for valid bitsize and bitnum, so we don't need to do that here. */
1765 if (VECTOR_MODE_P (mode))
1767 rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1768 if (sub)
1769 return sub;
1773 /* Make sure we are playing with integral modes. Pun with subregs
1774 if we aren't. */
1775 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1776 scalar_int_mode imode;
1777 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1779 if (MEM_P (op0))
1780 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1781 0, MEM_SIZE (op0));
1782 else if (op0_mode.exists (&imode))
1784 op0 = gen_lowpart (imode, op0);
1786 /* If we got a SUBREG, force it into a register since we
1787 aren't going to be able to do another SUBREG on it. */
1788 if (GET_CODE (op0) == SUBREG)
1789 op0 = force_reg (imode, op0);
1791 else
1793 poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1794 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1795 emit_move_insn (mem, op0);
1796 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1800 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1801 If that's wrong, the solution is to test for it and set TARGET to 0
1802 if needed. */
1804 /* Get the mode of the field to use for atomic access or subreg
1805 conversion. */
1806 if (!SCALAR_INT_MODE_P (tmode)
1807 || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1808 mode1 = mode;
1809 gcc_assert (mode1 != BLKmode);
1811 /* Extraction of a full MODE1 value can be done with a subreg as long
1812 as the least significant bit of the value is the least significant
1813 bit of either OP0 or a word of OP0. */
1814 if (!MEM_P (op0) && !reverse)
1816 rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1817 if (sub)
1818 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1821 /* Extraction of a full MODE1 value can be done with a load as long as
1822 the field is on a byte boundary and is sufficiently aligned. */
1823 poly_uint64 bytenum;
1824 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1826 op0 = adjust_bitfield_address (op0, mode1, bytenum);
1827 if (reverse)
1828 op0 = flip_storage_order (mode1, op0);
1829 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1832 /* If we have a memory source and a non-constant bit offset, restrict
1833 the memory to the referenced bytes. This is a worst-case fallback
1834 but is useful for things like vector booleans. */
1835 if (MEM_P (op0) && !bitnum.is_constant ())
1837 bytenum = bits_to_bytes_round_down (bitnum);
1838 bitnum = num_trailing_bits (bitnum);
1839 poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1840 op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1841 op0_mode = opt_scalar_int_mode ();
1844 /* It's possible we'll need to handle other cases here for
1845 polynomial bitnum and bitsize. */
1847 /* From here on we need to be looking at a fixed-size insertion. */
1848 return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1849 bitnum.to_constant (), unsignedp,
1850 target, mode, tmode, reverse, fallback_p);
1853 /* Subroutine of extract_bit_field_1, with the same arguments, except
1854 that BITSIZE and BITNUM are constant. Handle cases specific to
1855 integral modes. If OP0_MODE is defined, it is the mode of OP0,
1856 otherwise OP0 is a BLKmode MEM. */
1858 static rtx
1859 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1860 unsigned HOST_WIDE_INT bitsize,
1861 unsigned HOST_WIDE_INT bitnum, int unsignedp,
1862 rtx target, machine_mode mode, machine_mode tmode,
1863 bool reverse, bool fallback_p)
1865 /* Handle fields bigger than a word. */
1867 if (bitsize > BITS_PER_WORD)
1869 /* Here we transfer the words of the field
1870 in the order least significant first.
1871 This is because the most significant word is the one which may
1872 be less than full. */
1874 const bool backwards = WORDS_BIG_ENDIAN;
1875 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1876 unsigned int i;
1877 rtx_insn *last;
1879 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1880 target = gen_reg_rtx (mode);
1882 /* In case we're about to clobber a base register or something
1883 (see gcc.c-torture/execute/20040625-1.c). */
1884 if (reg_mentioned_p (target, op0))
1885 target = gen_reg_rtx (mode);
1887 /* Indicate for flow that the entire target reg is being set. */
1888 emit_clobber (target);
1890 /* The mode must be fixed-size, since extract_bit_field_1 handles
1891 extractions from variable-sized objects before calling this
1892 function. */
1893 unsigned int target_size
1894 = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1895 last = get_last_insn ();
1896 for (i = 0; i < nwords; i++)
1898 /* If I is 0, use the low-order word in both field and target;
1899 if I is 1, use the next to lowest word; and so on. */
1900 /* Word number in TARGET to use. */
1901 unsigned int wordnum
1902 = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1903 /* Offset from start of field in OP0. */
1904 unsigned int bit_offset = (backwards ^ reverse
1905 ? MAX ((int) bitsize - ((int) i + 1)
1906 * BITS_PER_WORD,
1908 : (int) i * BITS_PER_WORD);
1909 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1910 rtx result_part
1911 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1912 bitsize - i * BITS_PER_WORD),
1913 bitnum + bit_offset, 1, target_part,
1914 mode, word_mode, reverse, fallback_p, NULL);
1916 gcc_assert (target_part);
1917 if (!result_part)
1919 delete_insns_since (last);
1920 return NULL;
1923 if (result_part != target_part)
1924 emit_move_insn (target_part, result_part);
1927 if (unsignedp)
1929 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1930 need to be zero'd out. */
1931 if (target_size > nwords * UNITS_PER_WORD)
1933 unsigned int i, total_words;
1935 total_words = target_size / UNITS_PER_WORD;
1936 for (i = nwords; i < total_words; i++)
1937 emit_move_insn
1938 (operand_subword (target,
1939 backwards ? total_words - i - 1 : i,
1940 1, VOIDmode),
1941 const0_rtx);
1943 return target;
1946 /* Signed bit field: sign-extend with two arithmetic shifts. */
1947 target = expand_shift (LSHIFT_EXPR, mode, target,
1948 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1949 return expand_shift (RSHIFT_EXPR, mode, target,
1950 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1953 /* If OP0 is a multi-word register, narrow it to the affected word.
1954 If the region spans two words, defer to extract_split_bit_field. */
1955 if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1957 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1959 if (!fallback_p)
1960 return NULL_RTX;
1961 target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1962 unsignedp, reverse);
1963 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1965 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1966 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1967 op0_mode = word_mode;
1968 bitnum %= BITS_PER_WORD;
1971 /* From here on we know the desired field is smaller than a word.
1972 If OP0 is a register, it too fits within a word. */
1973 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1974 extraction_insn extv;
1975 if (!MEM_P (op0)
1976 && !reverse
1977 /* ??? We could limit the structure size to the part of OP0 that
1978 contains the field, with appropriate checks for endianness
1979 and TARGET_TRULY_NOOP_TRUNCATION. */
1980 && get_best_reg_extraction_insn (&extv, pattern,
1981 GET_MODE_BITSIZE (op0_mode.require ()),
1982 tmode))
1984 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1985 bitsize, bitnum,
1986 unsignedp, target, mode,
1987 tmode);
1988 if (result)
1989 return result;
1992 /* If OP0 is a memory, try copying it to a register and seeing if a
1993 cheap register alternative is available. */
1994 if (MEM_P (op0) & !reverse)
1996 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1997 tmode))
1999 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2000 bitsize, bitnum,
2001 unsignedp, target, mode,
2002 tmode);
2003 if (result)
2004 return result;
2007 rtx_insn *last = get_last_insn ();
2009 /* Try loading part of OP0 into a register and extracting the
2010 bitfield from that. */
2011 unsigned HOST_WIDE_INT bitpos;
2012 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2013 0, 0, tmode, &bitpos);
2014 if (xop0)
2016 xop0 = copy_to_reg (xop0);
2017 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2018 unsignedp, target,
2019 mode, tmode, reverse, false, NULL);
2020 if (result)
2021 return result;
2022 delete_insns_since (last);
2026 if (!fallback_p)
2027 return NULL;
2029 /* Find a correspondingly-sized integer field, so we can apply
2030 shifts and masks to it. */
2031 scalar_int_mode int_mode;
2032 if (!int_mode_for_mode (tmode).exists (&int_mode))
2033 /* If this fails, we should probably push op0 out to memory and then
2034 do a load. */
2035 int_mode = int_mode_for_mode (mode).require ();
2037 target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2038 bitnum, target, unsignedp, reverse);
2040 /* Complex values must be reversed piecewise, so we need to undo the global
2041 reversal, convert to the complex mode and reverse again. */
2042 if (reverse && COMPLEX_MODE_P (tmode))
2044 target = flip_storage_order (int_mode, target);
2045 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2046 target = flip_storage_order (tmode, target);
2048 else
2049 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2051 return target;
2054 /* Generate code to extract a byte-field from STR_RTX
2055 containing BITSIZE bits, starting at BITNUM,
2056 and put it in TARGET if possible (if TARGET is nonzero).
2057 Regardless of TARGET, we return the rtx for where the value is placed.
2059 STR_RTX is the structure containing the byte (a REG or MEM).
2060 UNSIGNEDP is nonzero if this is an unsigned bit field.
2061 MODE is the natural mode of the field value once extracted.
2062 TMODE is the mode the caller would like the value to have;
2063 but the value may be returned with type MODE instead.
2065 If REVERSE is true, the extraction is to be done in reverse order.
2067 If a TARGET is specified and we can store in it at no extra cost,
2068 we do so, and return TARGET.
2069 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2070 if they are equally easy.
2072 If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2073 then *ALT_RTL is set to TARGET (before legitimziation). */
2076 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2077 int unsignedp, rtx target, machine_mode mode,
2078 machine_mode tmode, bool reverse, rtx *alt_rtl)
2080 machine_mode mode1;
2082 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
2083 if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2084 mode1 = GET_MODE (str_rtx);
2085 else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2086 mode1 = GET_MODE (target);
2087 else
2088 mode1 = tmode;
2090 unsigned HOST_WIDE_INT ibitsize, ibitnum;
2091 scalar_int_mode int_mode;
2092 if (bitsize.is_constant (&ibitsize)
2093 && bitnum.is_constant (&ibitnum)
2094 && is_a <scalar_int_mode> (mode1, &int_mode)
2095 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2096 int_mode, 0, 0))
2098 /* Extraction of a full INT_MODE value can be done with a simple load.
2099 We know here that the field can be accessed with one single
2100 instruction. For targets that support unaligned memory,
2101 an unaligned access may be necessary. */
2102 if (ibitsize == GET_MODE_BITSIZE (int_mode))
2104 rtx result = adjust_bitfield_address (str_rtx, int_mode,
2105 ibitnum / BITS_PER_UNIT);
2106 if (reverse)
2107 result = flip_storage_order (int_mode, result);
2108 gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2109 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2112 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2113 &ibitnum);
2114 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2115 str_rtx = copy_to_reg (str_rtx);
2116 return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2117 target, mode, tmode, reverse, true, alt_rtl);
2120 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2121 target, mode, tmode, reverse, true, alt_rtl);
2124 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2125 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
2126 otherwise OP0 is a BLKmode MEM.
2128 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2129 If REVERSE is true, the extraction is to be done in reverse order.
2131 If TARGET is nonzero, attempts to store the value there
2132 and return TARGET, but this is not guaranteed.
2133 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2135 static rtx
2136 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2137 opt_scalar_int_mode op0_mode,
2138 unsigned HOST_WIDE_INT bitsize,
2139 unsigned HOST_WIDE_INT bitnum, rtx target,
2140 int unsignedp, bool reverse)
2142 scalar_int_mode mode;
2143 if (MEM_P (op0))
2145 if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2146 BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2147 /* The only way this should occur is if the field spans word
2148 boundaries. */
2149 return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2150 unsignedp, reverse);
2152 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2154 else
2155 mode = op0_mode.require ();
2157 return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2158 target, unsignedp, reverse);
2161 /* Helper function for extract_fixed_bit_field, extracts
2162 the bit field always using MODE, which is the mode of OP0.
2163 The other arguments are as for extract_fixed_bit_field. */
2165 static rtx
2166 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2167 unsigned HOST_WIDE_INT bitsize,
2168 unsigned HOST_WIDE_INT bitnum, rtx target,
2169 int unsignedp, bool reverse)
2171 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2172 for invalid input, such as extract equivalent of f5 from
2173 gcc.dg/pr48335-2.c. */
2175 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2176 /* BITNUM is the distance between our msb and that of OP0.
2177 Convert it to the distance from the lsb. */
2178 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2180 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2181 We have reduced the big-endian case to the little-endian case. */
2182 if (reverse)
2183 op0 = flip_storage_order (mode, op0);
2185 if (unsignedp)
2187 if (bitnum)
2189 /* If the field does not already start at the lsb,
2190 shift it so it does. */
2191 /* Maybe propagate the target for the shift. */
2192 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2193 if (tmode != mode)
2194 subtarget = 0;
2195 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2197 /* Convert the value to the desired mode. TMODE must also be a
2198 scalar integer for this conversion to make sense, since we
2199 shouldn't reinterpret the bits. */
2200 scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2201 if (mode != new_mode)
2202 op0 = convert_to_mode (new_mode, op0, 1);
2204 /* Unless the msb of the field used to be the msb when we shifted,
2205 mask out the upper bits. */
2207 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2208 return expand_binop (new_mode, and_optab, op0,
2209 mask_rtx (new_mode, 0, bitsize, 0),
2210 target, 1, OPTAB_LIB_WIDEN);
2211 return op0;
2214 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2215 then arithmetic-shift its lsb to the lsb of the word. */
2216 op0 = force_reg (mode, op0);
2218 /* Find the narrowest integer mode that contains the field. */
2220 opt_scalar_int_mode mode_iter;
2221 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2222 if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2223 break;
2225 mode = mode_iter.require ();
2226 op0 = convert_to_mode (mode, op0, 0);
2228 if (mode != tmode)
2229 target = 0;
2231 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2233 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2234 /* Maybe propagate the target for the shift. */
2235 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2236 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2239 return expand_shift (RSHIFT_EXPR, mode, op0,
2240 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2243 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2244 VALUE << BITPOS. */
2246 static rtx
2247 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2248 int bitpos)
2250 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2253 /* Extract a bit field that is split across two words
2254 and return an RTX for the result.
2256 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2257 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2258 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2259 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2260 a BLKmode MEM.
2262 If REVERSE is true, the extraction is to be done in reverse order. */
2264 static rtx
2265 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2266 unsigned HOST_WIDE_INT bitsize,
2267 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2268 bool reverse)
2270 unsigned int unit;
2271 unsigned int bitsdone = 0;
2272 rtx result = NULL_RTX;
2273 int first = 1;
2275 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2276 much at a time. */
2277 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2278 unit = BITS_PER_WORD;
2279 else
2280 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2282 while (bitsdone < bitsize)
2284 unsigned HOST_WIDE_INT thissize;
2285 rtx part;
2286 unsigned HOST_WIDE_INT thispos;
2287 unsigned HOST_WIDE_INT offset;
2289 offset = (bitpos + bitsdone) / unit;
2290 thispos = (bitpos + bitsdone) % unit;
2292 /* THISSIZE must not overrun a word boundary. Otherwise,
2293 extract_fixed_bit_field will call us again, and we will mutually
2294 recurse forever. */
2295 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2296 thissize = MIN (thissize, unit - thispos);
2298 /* If OP0 is a register, then handle OFFSET here. */
2299 rtx op0_piece = op0;
2300 opt_scalar_int_mode op0_piece_mode = op0_mode;
2301 if (SUBREG_P (op0) || REG_P (op0))
2303 op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2304 op0_piece_mode = word_mode;
2305 offset = 0;
2308 /* Extract the parts in bit-counting order,
2309 whose meaning is determined by BYTES_PER_UNIT.
2310 OFFSET is in UNITs, and UNIT is in bits. */
2311 part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2312 thissize, offset * unit + thispos,
2313 0, 1, reverse);
2314 bitsdone += thissize;
2316 /* Shift this part into place for the result. */
2317 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2319 if (bitsize != bitsdone)
2320 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2321 bitsize - bitsdone, 0, 1);
2323 else
2325 if (bitsdone != thissize)
2326 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2327 bitsdone - thissize, 0, 1);
2330 if (first)
2331 result = part;
2332 else
2333 /* Combine the parts with bitwise or. This works
2334 because we extracted each part as an unsigned bit field. */
2335 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2336 OPTAB_LIB_WIDEN);
2338 first = 0;
2341 /* Unsigned bit field: we are done. */
2342 if (unsignedp)
2343 return result;
2344 /* Signed bit field: sign-extend with two arithmetic shifts. */
2345 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2346 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2347 return expand_shift (RSHIFT_EXPR, word_mode, result,
2348 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2351 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2352 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2353 MODE, fill the upper bits with zeros. Fail if the layout of either
2354 mode is unknown (as for CC modes) or if the extraction would involve
2355 unprofitable mode punning. Return the value on success, otherwise
2356 return null.
2358 This is different from gen_lowpart* in these respects:
2360 - the returned value must always be considered an rvalue
2362 - when MODE is wider than SRC_MODE, the extraction involves
2363 a zero extension
2365 - when MODE is smaller than SRC_MODE, the extraction involves
2366 a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2368 In other words, this routine performs a computation, whereas the
2369 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2370 operations. */
2373 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2375 scalar_int_mode int_mode, src_int_mode;
2377 if (mode == src_mode)
2378 return src;
2380 if (CONSTANT_P (src))
2382 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2383 fails, it will happily create (subreg (symbol_ref)) or similar
2384 invalid SUBREGs. */
2385 poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2386 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2387 if (ret)
2388 return ret;
2390 if (GET_MODE (src) == VOIDmode
2391 || !validate_subreg (mode, src_mode, src, byte))
2392 return NULL_RTX;
2394 src = force_reg (GET_MODE (src), src);
2395 return gen_rtx_SUBREG (mode, src, byte);
2398 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2399 return NULL_RTX;
2401 if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2402 && targetm.modes_tieable_p (mode, src_mode))
2404 rtx x = gen_lowpart_common (mode, src);
2405 if (x)
2406 return x;
2409 if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2410 || !int_mode_for_mode (mode).exists (&int_mode))
2411 return NULL_RTX;
2413 if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2414 return NULL_RTX;
2415 if (!targetm.modes_tieable_p (int_mode, mode))
2416 return NULL_RTX;
2418 src = gen_lowpart (src_int_mode, src);
2419 if (!validate_subreg (int_mode, src_int_mode, src,
2420 subreg_lowpart_offset (int_mode, src_int_mode)))
2421 return NULL_RTX;
2423 src = convert_modes (int_mode, src_int_mode, src, true);
2424 src = gen_lowpart (mode, src);
2425 return src;
2428 /* Add INC into TARGET. */
2430 void
2431 expand_inc (rtx target, rtx inc)
2433 rtx value = expand_binop (GET_MODE (target), add_optab,
2434 target, inc,
2435 target, 0, OPTAB_LIB_WIDEN);
2436 if (value != target)
2437 emit_move_insn (target, value);
2440 /* Subtract DEC from TARGET. */
2442 void
2443 expand_dec (rtx target, rtx dec)
2445 rtx value = expand_binop (GET_MODE (target), sub_optab,
2446 target, dec,
2447 target, 0, OPTAB_LIB_WIDEN);
2448 if (value != target)
2449 emit_move_insn (target, value);
2452 /* Output a shift instruction for expression code CODE,
2453 with SHIFTED being the rtx for the value to shift,
2454 and AMOUNT the rtx for the amount to shift by.
2455 Store the result in the rtx TARGET, if that is convenient.
2456 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2457 Return the rtx for where the value is.
2458 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2459 in which case 0 is returned. */
2461 static rtx
2462 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2463 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2465 rtx op1, temp = 0;
2466 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2467 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2468 optab lshift_optab = ashl_optab;
2469 optab rshift_arith_optab = ashr_optab;
2470 optab rshift_uns_optab = lshr_optab;
2471 optab lrotate_optab = rotl_optab;
2472 optab rrotate_optab = rotr_optab;
2473 machine_mode op1_mode;
2474 scalar_mode scalar_mode = GET_MODE_INNER (mode);
2475 int attempt;
2476 bool speed = optimize_insn_for_speed_p ();
2478 op1 = amount;
2479 op1_mode = GET_MODE (op1);
2481 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2482 shift amount is a vector, use the vector/vector shift patterns. */
2483 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2485 lshift_optab = vashl_optab;
2486 rshift_arith_optab = vashr_optab;
2487 rshift_uns_optab = vlshr_optab;
2488 lrotate_optab = vrotl_optab;
2489 rrotate_optab = vrotr_optab;
2492 /* Previously detected shift-counts computed by NEGATE_EXPR
2493 and shifted in the other direction; but that does not work
2494 on all machines. */
2496 if (SHIFT_COUNT_TRUNCATED)
2498 if (CONST_INT_P (op1)
2499 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2500 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2501 op1 = gen_int_shift_amount (mode,
2502 (unsigned HOST_WIDE_INT) INTVAL (op1)
2503 % GET_MODE_BITSIZE (scalar_mode));
2504 else if (GET_CODE (op1) == SUBREG
2505 && subreg_lowpart_p (op1)
2506 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2507 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2508 op1 = SUBREG_REG (op1);
2511 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2512 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2513 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2514 amount instead. */
2515 if (rotate
2516 && CONST_INT_P (op1)
2517 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2518 GET_MODE_BITSIZE (scalar_mode) - 1))
2520 op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2521 - INTVAL (op1)));
2522 left = !left;
2523 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2526 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2527 Note that this is not the case for bigger values. For instance a rotation
2528 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2529 0x04030201 (bswapsi). */
2530 if (rotate
2531 && CONST_INT_P (op1)
2532 && INTVAL (op1) == BITS_PER_UNIT
2533 && GET_MODE_SIZE (scalar_mode) == 2
2534 && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2535 return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2537 if (op1 == const0_rtx)
2538 return shifted;
2540 /* Check whether its cheaper to implement a left shift by a constant
2541 bit count by a sequence of additions. */
2542 if (code == LSHIFT_EXPR
2543 && CONST_INT_P (op1)
2544 && INTVAL (op1) > 0
2545 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2546 && INTVAL (op1) < MAX_BITS_PER_WORD
2547 && (shift_cost (speed, mode, INTVAL (op1))
2548 > INTVAL (op1) * add_cost (speed, mode))
2549 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2551 int i;
2552 for (i = 0; i < INTVAL (op1); i++)
2554 temp = force_reg (mode, shifted);
2555 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2556 unsignedp, OPTAB_LIB_WIDEN);
2558 return shifted;
2561 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2563 enum optab_methods methods;
2565 if (attempt == 0)
2566 methods = OPTAB_DIRECT;
2567 else if (attempt == 1)
2568 methods = OPTAB_WIDEN;
2569 else
2570 methods = OPTAB_LIB_WIDEN;
2572 if (rotate)
2574 /* Widening does not work for rotation. */
2575 if (methods == OPTAB_WIDEN)
2576 continue;
2577 else if (methods == OPTAB_LIB_WIDEN)
2579 /* If we have been unable to open-code this by a rotation,
2580 do it as the IOR of two shifts. I.e., to rotate A
2581 by N bits, compute
2582 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2583 where C is the bitsize of A.
2585 It is theoretically possible that the target machine might
2586 not be able to perform either shift and hence we would
2587 be making two libcalls rather than just the one for the
2588 shift (similarly if IOR could not be done). We will allow
2589 this extremely unlikely lossage to avoid complicating the
2590 code below. */
2592 rtx subtarget = target == shifted ? 0 : target;
2593 rtx new_amount, other_amount;
2594 rtx temp1;
2596 new_amount = op1;
2597 if (op1 == const0_rtx)
2598 return shifted;
2599 else if (CONST_INT_P (op1))
2600 other_amount = gen_int_shift_amount
2601 (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2602 else
2604 other_amount
2605 = simplify_gen_unary (NEG, GET_MODE (op1),
2606 op1, GET_MODE (op1));
2607 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2608 other_amount
2609 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2610 gen_int_mode (mask, GET_MODE (op1)));
2613 shifted = force_reg (mode, shifted);
2615 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2616 mode, shifted, new_amount, 0, 1);
2617 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2618 mode, shifted, other_amount,
2619 subtarget, 1);
2620 return expand_binop (mode, ior_optab, temp, temp1, target,
2621 unsignedp, methods);
2624 temp = expand_binop (mode,
2625 left ? lrotate_optab : rrotate_optab,
2626 shifted, op1, target, unsignedp, methods);
2628 else if (unsignedp)
2629 temp = expand_binop (mode,
2630 left ? lshift_optab : rshift_uns_optab,
2631 shifted, op1, target, unsignedp, methods);
2633 /* Do arithmetic shifts.
2634 Also, if we are going to widen the operand, we can just as well
2635 use an arithmetic right-shift instead of a logical one. */
2636 if (temp == 0 && ! rotate
2637 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2639 enum optab_methods methods1 = methods;
2641 /* If trying to widen a log shift to an arithmetic shift,
2642 don't accept an arithmetic shift of the same size. */
2643 if (unsignedp)
2644 methods1 = OPTAB_MUST_WIDEN;
2646 /* Arithmetic shift */
2648 temp = expand_binop (mode,
2649 left ? lshift_optab : rshift_arith_optab,
2650 shifted, op1, target, unsignedp, methods1);
2653 /* We used to try extzv here for logical right shifts, but that was
2654 only useful for one machine, the VAX, and caused poor code
2655 generation there for lshrdi3, so the code was deleted and a
2656 define_expand for lshrsi3 was added to vax.md. */
2659 gcc_assert (temp != NULL_RTX || may_fail);
2660 return temp;
2663 /* Output a shift instruction for expression code CODE,
2664 with SHIFTED being the rtx for the value to shift,
2665 and AMOUNT the amount to shift by.
2666 Store the result in the rtx TARGET, if that is convenient.
2667 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2668 Return the rtx for where the value is. */
2671 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2672 poly_int64 amount, rtx target, int unsignedp)
2674 return expand_shift_1 (code, mode, shifted,
2675 gen_int_shift_amount (mode, amount),
2676 target, unsignedp);
2679 /* Likewise, but return 0 if that cannot be done. */
2681 static rtx
2682 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2683 int amount, rtx target, int unsignedp)
2685 return expand_shift_1 (code, mode,
2686 shifted, GEN_INT (amount), target, unsignedp, true);
2689 /* Output a shift instruction for expression code CODE,
2690 with SHIFTED being the rtx for the value to shift,
2691 and AMOUNT the tree for the amount to shift by.
2692 Store the result in the rtx TARGET, if that is convenient.
2693 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2694 Return the rtx for where the value is. */
2697 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2698 tree amount, rtx target, int unsignedp)
2700 return expand_shift_1 (code, mode,
2701 shifted, expand_normal (amount), target, unsignedp);
2705 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2706 const struct mult_cost *, machine_mode mode);
2707 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2708 const struct algorithm *, enum mult_variant);
2709 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2710 static rtx extract_high_half (scalar_int_mode, rtx);
2711 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2712 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2713 int, int);
2714 /* Compute and return the best algorithm for multiplying by T.
2715 The algorithm must cost less than cost_limit
2716 If retval.cost >= COST_LIMIT, no algorithm was found and all
2717 other field of the returned struct are undefined.
2718 MODE is the machine mode of the multiplication. */
2720 static void
2721 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2722 const struct mult_cost *cost_limit, machine_mode mode)
2724 int m;
2725 struct algorithm *alg_in, *best_alg;
2726 struct mult_cost best_cost;
2727 struct mult_cost new_limit;
2728 int op_cost, op_latency;
2729 unsigned HOST_WIDE_INT orig_t = t;
2730 unsigned HOST_WIDE_INT q;
2731 int maxm, hash_index;
2732 bool cache_hit = false;
2733 enum alg_code cache_alg = alg_zero;
2734 bool speed = optimize_insn_for_speed_p ();
2735 scalar_int_mode imode;
2736 struct alg_hash_entry *entry_ptr;
2738 /* Indicate that no algorithm is yet found. If no algorithm
2739 is found, this value will be returned and indicate failure. */
2740 alg_out->cost.cost = cost_limit->cost + 1;
2741 alg_out->cost.latency = cost_limit->latency + 1;
2743 if (cost_limit->cost < 0
2744 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2745 return;
2747 /* Be prepared for vector modes. */
2748 imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2750 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2752 /* Restrict the bits of "t" to the multiplication's mode. */
2753 t &= GET_MODE_MASK (imode);
2755 /* t == 1 can be done in zero cost. */
2756 if (t == 1)
2758 alg_out->ops = 1;
2759 alg_out->cost.cost = 0;
2760 alg_out->cost.latency = 0;
2761 alg_out->op[0] = alg_m;
2762 return;
2765 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2766 fail now. */
2767 if (t == 0)
2769 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2770 return;
2771 else
2773 alg_out->ops = 1;
2774 alg_out->cost.cost = zero_cost (speed);
2775 alg_out->cost.latency = zero_cost (speed);
2776 alg_out->op[0] = alg_zero;
2777 return;
2781 /* We'll be needing a couple extra algorithm structures now. */
2783 alg_in = XALLOCA (struct algorithm);
2784 best_alg = XALLOCA (struct algorithm);
2785 best_cost = *cost_limit;
2787 /* Compute the hash index. */
2788 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2790 /* See if we already know what to do for T. */
2791 entry_ptr = alg_hash_entry_ptr (hash_index);
2792 if (entry_ptr->t == t
2793 && entry_ptr->mode == mode
2794 && entry_ptr->speed == speed
2795 && entry_ptr->alg != alg_unknown)
2797 cache_alg = entry_ptr->alg;
2799 if (cache_alg == alg_impossible)
2801 /* The cache tells us that it's impossible to synthesize
2802 multiplication by T within entry_ptr->cost. */
2803 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2804 /* COST_LIMIT is at least as restrictive as the one
2805 recorded in the hash table, in which case we have no
2806 hope of synthesizing a multiplication. Just
2807 return. */
2808 return;
2810 /* If we get here, COST_LIMIT is less restrictive than the
2811 one recorded in the hash table, so we may be able to
2812 synthesize a multiplication. Proceed as if we didn't
2813 have the cache entry. */
2815 else
2817 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2818 /* The cached algorithm shows that this multiplication
2819 requires more cost than COST_LIMIT. Just return. This
2820 way, we don't clobber this cache entry with
2821 alg_impossible but retain useful information. */
2822 return;
2824 cache_hit = true;
2826 switch (cache_alg)
2828 case alg_shift:
2829 goto do_alg_shift;
2831 case alg_add_t_m2:
2832 case alg_sub_t_m2:
2833 goto do_alg_addsub_t_m2;
2835 case alg_add_factor:
2836 case alg_sub_factor:
2837 goto do_alg_addsub_factor;
2839 case alg_add_t2_m:
2840 goto do_alg_add_t2_m;
2842 case alg_sub_t2_m:
2843 goto do_alg_sub_t2_m;
2845 default:
2846 gcc_unreachable ();
2851 /* If we have a group of zero bits at the low-order part of T, try
2852 multiplying by the remaining bits and then doing a shift. */
2854 if ((t & 1) == 0)
2856 do_alg_shift:
2857 m = ctz_or_zero (t); /* m = number of low zero bits */
2858 if (m < maxm)
2860 q = t >> m;
2861 /* The function expand_shift will choose between a shift and
2862 a sequence of additions, so the observed cost is given as
2863 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2864 op_cost = m * add_cost (speed, mode);
2865 if (shift_cost (speed, mode, m) < op_cost)
2866 op_cost = shift_cost (speed, mode, m);
2867 new_limit.cost = best_cost.cost - op_cost;
2868 new_limit.latency = best_cost.latency - op_cost;
2869 synth_mult (alg_in, q, &new_limit, mode);
2871 alg_in->cost.cost += op_cost;
2872 alg_in->cost.latency += op_cost;
2873 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2875 best_cost = alg_in->cost;
2876 std::swap (alg_in, best_alg);
2877 best_alg->log[best_alg->ops] = m;
2878 best_alg->op[best_alg->ops] = alg_shift;
2881 /* See if treating ORIG_T as a signed number yields a better
2882 sequence. Try this sequence only for a negative ORIG_T
2883 as it would be useless for a non-negative ORIG_T. */
2884 if ((HOST_WIDE_INT) orig_t < 0)
2886 /* Shift ORIG_T as follows because a right shift of a
2887 negative-valued signed type is implementation
2888 defined. */
2889 q = ~(~orig_t >> m);
2890 /* The function expand_shift will choose between a shift
2891 and a sequence of additions, so the observed cost is
2892 given as MIN (m * add_cost(speed, mode),
2893 shift_cost(speed, mode, m)). */
2894 op_cost = m * add_cost (speed, mode);
2895 if (shift_cost (speed, mode, m) < op_cost)
2896 op_cost = shift_cost (speed, mode, m);
2897 new_limit.cost = best_cost.cost - op_cost;
2898 new_limit.latency = best_cost.latency - op_cost;
2899 synth_mult (alg_in, q, &new_limit, mode);
2901 alg_in->cost.cost += op_cost;
2902 alg_in->cost.latency += op_cost;
2903 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2905 best_cost = alg_in->cost;
2906 std::swap (alg_in, best_alg);
2907 best_alg->log[best_alg->ops] = m;
2908 best_alg->op[best_alg->ops] = alg_shift;
2912 if (cache_hit)
2913 goto done;
2916 /* If we have an odd number, add or subtract one. */
2917 if ((t & 1) != 0)
2919 unsigned HOST_WIDE_INT w;
2921 do_alg_addsub_t_m2:
2922 for (w = 1; (w & t) != 0; w <<= 1)
2924 /* If T was -1, then W will be zero after the loop. This is another
2925 case where T ends with ...111. Handling this with (T + 1) and
2926 subtract 1 produces slightly better code and results in algorithm
2927 selection much faster than treating it like the ...0111 case
2928 below. */
2929 if (w == 0
2930 || (w > 2
2931 /* Reject the case where t is 3.
2932 Thus we prefer addition in that case. */
2933 && t != 3))
2935 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2937 op_cost = add_cost (speed, mode);
2938 new_limit.cost = best_cost.cost - op_cost;
2939 new_limit.latency = best_cost.latency - op_cost;
2940 synth_mult (alg_in, t + 1, &new_limit, mode);
2942 alg_in->cost.cost += op_cost;
2943 alg_in->cost.latency += op_cost;
2944 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2946 best_cost = alg_in->cost;
2947 std::swap (alg_in, best_alg);
2948 best_alg->log[best_alg->ops] = 0;
2949 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2952 else
2954 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2956 op_cost = add_cost (speed, mode);
2957 new_limit.cost = best_cost.cost - op_cost;
2958 new_limit.latency = best_cost.latency - op_cost;
2959 synth_mult (alg_in, t - 1, &new_limit, mode);
2961 alg_in->cost.cost += op_cost;
2962 alg_in->cost.latency += op_cost;
2963 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2965 best_cost = alg_in->cost;
2966 std::swap (alg_in, best_alg);
2967 best_alg->log[best_alg->ops] = 0;
2968 best_alg->op[best_alg->ops] = alg_add_t_m2;
2972 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2973 quickly with a - a * n for some appropriate constant n. */
2974 m = exact_log2 (-orig_t + 1);
2975 if (m >= 0 && m < maxm)
2977 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2978 /* If the target has a cheap shift-and-subtract insn use
2979 that in preference to a shift insn followed by a sub insn.
2980 Assume that the shift-and-sub is "atomic" with a latency
2981 equal to it's cost, otherwise assume that on superscalar
2982 hardware the shift may be executed concurrently with the
2983 earlier steps in the algorithm. */
2984 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2986 op_cost = shiftsub1_cost (speed, mode, m);
2987 op_latency = op_cost;
2989 else
2990 op_latency = add_cost (speed, mode);
2992 new_limit.cost = best_cost.cost - op_cost;
2993 new_limit.latency = best_cost.latency - op_latency;
2994 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2995 &new_limit, mode);
2997 alg_in->cost.cost += op_cost;
2998 alg_in->cost.latency += op_latency;
2999 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3001 best_cost = alg_in->cost;
3002 std::swap (alg_in, best_alg);
3003 best_alg->log[best_alg->ops] = m;
3004 best_alg->op[best_alg->ops] = alg_sub_t_m2;
3008 if (cache_hit)
3009 goto done;
3012 /* Look for factors of t of the form
3013 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3014 If we find such a factor, we can multiply by t using an algorithm that
3015 multiplies by q, shift the result by m and add/subtract it to itself.
3017 We search for large factors first and loop down, even if large factors
3018 are less probable than small; if we find a large factor we will find a
3019 good sequence quickly, and therefore be able to prune (by decreasing
3020 COST_LIMIT) the search. */
3022 do_alg_addsub_factor:
3023 for (m = floor_log2 (t - 1); m >= 2; m--)
3025 unsigned HOST_WIDE_INT d;
3027 d = (HOST_WIDE_INT_1U << m) + 1;
3028 if (t % d == 0 && t > d && m < maxm
3029 && (!cache_hit || cache_alg == alg_add_factor))
3031 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3032 if (shiftadd_cost (speed, mode, m) <= op_cost)
3033 op_cost = shiftadd_cost (speed, mode, m);
3035 op_latency = op_cost;
3038 new_limit.cost = best_cost.cost - op_cost;
3039 new_limit.latency = best_cost.latency - op_latency;
3040 synth_mult (alg_in, t / d, &new_limit, mode);
3042 alg_in->cost.cost += op_cost;
3043 alg_in->cost.latency += op_latency;
3044 if (alg_in->cost.latency < op_cost)
3045 alg_in->cost.latency = op_cost;
3046 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3048 best_cost = alg_in->cost;
3049 std::swap (alg_in, best_alg);
3050 best_alg->log[best_alg->ops] = m;
3051 best_alg->op[best_alg->ops] = alg_add_factor;
3053 /* Other factors will have been taken care of in the recursion. */
3054 break;
3057 d = (HOST_WIDE_INT_1U << m) - 1;
3058 if (t % d == 0 && t > d && m < maxm
3059 && (!cache_hit || cache_alg == alg_sub_factor))
3061 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3062 if (shiftsub0_cost (speed, mode, m) <= op_cost)
3063 op_cost = shiftsub0_cost (speed, mode, m);
3065 op_latency = op_cost;
3067 new_limit.cost = best_cost.cost - op_cost;
3068 new_limit.latency = best_cost.latency - op_latency;
3069 synth_mult (alg_in, t / d, &new_limit, mode);
3071 alg_in->cost.cost += op_cost;
3072 alg_in->cost.latency += op_latency;
3073 if (alg_in->cost.latency < op_cost)
3074 alg_in->cost.latency = op_cost;
3075 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3077 best_cost = alg_in->cost;
3078 std::swap (alg_in, best_alg);
3079 best_alg->log[best_alg->ops] = m;
3080 best_alg->op[best_alg->ops] = alg_sub_factor;
3082 break;
3085 if (cache_hit)
3086 goto done;
3088 /* Try shift-and-add (load effective address) instructions,
3089 i.e. do a*3, a*5, a*9. */
3090 if ((t & 1) != 0)
3092 do_alg_add_t2_m:
3093 q = t - 1;
3094 m = ctz_hwi (q);
3095 if (q && m < maxm)
3097 op_cost = shiftadd_cost (speed, mode, m);
3098 new_limit.cost = best_cost.cost - op_cost;
3099 new_limit.latency = best_cost.latency - op_cost;
3100 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3102 alg_in->cost.cost += op_cost;
3103 alg_in->cost.latency += op_cost;
3104 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3106 best_cost = alg_in->cost;
3107 std::swap (alg_in, best_alg);
3108 best_alg->log[best_alg->ops] = m;
3109 best_alg->op[best_alg->ops] = alg_add_t2_m;
3112 if (cache_hit)
3113 goto done;
3115 do_alg_sub_t2_m:
3116 q = t + 1;
3117 m = ctz_hwi (q);
3118 if (q && m < maxm)
3120 op_cost = shiftsub0_cost (speed, mode, m);
3121 new_limit.cost = best_cost.cost - op_cost;
3122 new_limit.latency = best_cost.latency - op_cost;
3123 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3125 alg_in->cost.cost += op_cost;
3126 alg_in->cost.latency += op_cost;
3127 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3129 best_cost = alg_in->cost;
3130 std::swap (alg_in, best_alg);
3131 best_alg->log[best_alg->ops] = m;
3132 best_alg->op[best_alg->ops] = alg_sub_t2_m;
3135 if (cache_hit)
3136 goto done;
3139 done:
3140 /* If best_cost has not decreased, we have not found any algorithm. */
3141 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3143 /* We failed to find an algorithm. Record alg_impossible for
3144 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3145 we are asked to find an algorithm for T within the same or
3146 lower COST_LIMIT, we can immediately return to the
3147 caller. */
3148 entry_ptr->t = t;
3149 entry_ptr->mode = mode;
3150 entry_ptr->speed = speed;
3151 entry_ptr->alg = alg_impossible;
3152 entry_ptr->cost = *cost_limit;
3153 return;
3156 /* Cache the result. */
3157 if (!cache_hit)
3159 entry_ptr->t = t;
3160 entry_ptr->mode = mode;
3161 entry_ptr->speed = speed;
3162 entry_ptr->alg = best_alg->op[best_alg->ops];
3163 entry_ptr->cost.cost = best_cost.cost;
3164 entry_ptr->cost.latency = best_cost.latency;
3167 /* If we are getting a too long sequence for `struct algorithm'
3168 to record, make this search fail. */
3169 if (best_alg->ops == MAX_BITS_PER_WORD)
3170 return;
3172 /* Copy the algorithm from temporary space to the space at alg_out.
3173 We avoid using structure assignment because the majority of
3174 best_alg is normally undefined, and this is a critical function. */
3175 alg_out->ops = best_alg->ops + 1;
3176 alg_out->cost = best_cost;
3177 memcpy (alg_out->op, best_alg->op,
3178 alg_out->ops * sizeof *alg_out->op);
3179 memcpy (alg_out->log, best_alg->log,
3180 alg_out->ops * sizeof *alg_out->log);
3183 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3184 Try three variations:
3186 - a shift/add sequence based on VAL itself
3187 - a shift/add sequence based on -VAL, followed by a negation
3188 - a shift/add sequence based on VAL - 1, followed by an addition.
3190 Return true if the cheapest of these cost less than MULT_COST,
3191 describing the algorithm in *ALG and final fixup in *VARIANT. */
3193 bool
3194 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3195 struct algorithm *alg, enum mult_variant *variant,
3196 int mult_cost)
3198 struct algorithm alg2;
3199 struct mult_cost limit;
3200 int op_cost;
3201 bool speed = optimize_insn_for_speed_p ();
3203 /* Fail quickly for impossible bounds. */
3204 if (mult_cost < 0)
3205 return false;
3207 /* Ensure that mult_cost provides a reasonable upper bound.
3208 Any constant multiplication can be performed with less
3209 than 2 * bits additions. */
3210 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3211 if (mult_cost > op_cost)
3212 mult_cost = op_cost;
3214 *variant = basic_variant;
3215 limit.cost = mult_cost;
3216 limit.latency = mult_cost;
3217 synth_mult (alg, val, &limit, mode);
3219 /* This works only if the inverted value actually fits in an
3220 `unsigned int' */
3221 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3223 op_cost = neg_cost (speed, mode);
3224 if (MULT_COST_LESS (&alg->cost, mult_cost))
3226 limit.cost = alg->cost.cost - op_cost;
3227 limit.latency = alg->cost.latency - op_cost;
3229 else
3231 limit.cost = mult_cost - op_cost;
3232 limit.latency = mult_cost - op_cost;
3235 synth_mult (&alg2, -val, &limit, mode);
3236 alg2.cost.cost += op_cost;
3237 alg2.cost.latency += op_cost;
3238 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3239 *alg = alg2, *variant = negate_variant;
3242 /* This proves very useful for division-by-constant. */
3243 op_cost = add_cost (speed, mode);
3244 if (MULT_COST_LESS (&alg->cost, mult_cost))
3246 limit.cost = alg->cost.cost - op_cost;
3247 limit.latency = alg->cost.latency - op_cost;
3249 else
3251 limit.cost = mult_cost - op_cost;
3252 limit.latency = mult_cost - op_cost;
3255 synth_mult (&alg2, val - 1, &limit, mode);
3256 alg2.cost.cost += op_cost;
3257 alg2.cost.latency += op_cost;
3258 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3259 *alg = alg2, *variant = add_variant;
3261 return MULT_COST_LESS (&alg->cost, mult_cost);
3264 /* A subroutine of expand_mult, used for constant multiplications.
3265 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3266 convenient. Use the shift/add sequence described by ALG and apply
3267 the final fixup specified by VARIANT. */
3269 static rtx
3270 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3271 rtx target, const struct algorithm *alg,
3272 enum mult_variant variant)
3274 unsigned HOST_WIDE_INT val_so_far;
3275 rtx_insn *insn;
3276 rtx accum, tem;
3277 int opno;
3278 machine_mode nmode;
3280 /* Avoid referencing memory over and over and invalid sharing
3281 on SUBREGs. */
3282 op0 = force_reg (mode, op0);
3284 /* ACCUM starts out either as OP0 or as a zero, depending on
3285 the first operation. */
3287 if (alg->op[0] == alg_zero)
3289 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3290 val_so_far = 0;
3292 else if (alg->op[0] == alg_m)
3294 accum = copy_to_mode_reg (mode, op0);
3295 val_so_far = 1;
3297 else
3298 gcc_unreachable ();
3300 for (opno = 1; opno < alg->ops; opno++)
3302 int log = alg->log[opno];
3303 rtx shift_subtarget = optimize ? 0 : accum;
3304 rtx add_target
3305 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3306 && !optimize)
3307 ? target : 0;
3308 rtx accum_target = optimize ? 0 : accum;
3309 rtx accum_inner;
3311 switch (alg->op[opno])
3313 case alg_shift:
3314 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3315 /* REG_EQUAL note will be attached to the following insn. */
3316 emit_move_insn (accum, tem);
3317 val_so_far <<= log;
3318 break;
3320 case alg_add_t_m2:
3321 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3322 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3323 add_target ? add_target : accum_target);
3324 val_so_far += HOST_WIDE_INT_1U << log;
3325 break;
3327 case alg_sub_t_m2:
3328 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3329 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3330 add_target ? add_target : accum_target);
3331 val_so_far -= HOST_WIDE_INT_1U << log;
3332 break;
3334 case alg_add_t2_m:
3335 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3336 log, shift_subtarget, 0);
3337 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3338 add_target ? add_target : accum_target);
3339 val_so_far = (val_so_far << log) + 1;
3340 break;
3342 case alg_sub_t2_m:
3343 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3344 log, shift_subtarget, 0);
3345 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3346 add_target ? add_target : accum_target);
3347 val_so_far = (val_so_far << log) - 1;
3348 break;
3350 case alg_add_factor:
3351 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3352 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3353 add_target ? add_target : accum_target);
3354 val_so_far += val_so_far << log;
3355 break;
3357 case alg_sub_factor:
3358 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3359 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3360 (add_target
3361 ? add_target : (optimize ? 0 : tem)));
3362 val_so_far = (val_so_far << log) - val_so_far;
3363 break;
3365 default:
3366 gcc_unreachable ();
3369 if (SCALAR_INT_MODE_P (mode))
3371 /* Write a REG_EQUAL note on the last insn so that we can cse
3372 multiplication sequences. Note that if ACCUM is a SUBREG,
3373 we've set the inner register and must properly indicate that. */
3374 tem = op0, nmode = mode;
3375 accum_inner = accum;
3376 if (GET_CODE (accum) == SUBREG)
3378 accum_inner = SUBREG_REG (accum);
3379 nmode = GET_MODE (accum_inner);
3380 tem = gen_lowpart (nmode, op0);
3383 /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3384 In that case, only the low bits of accum would be guaranteed to
3385 be equal to the content of the REG_EQUAL note, the upper bits
3386 can be anything. */
3387 if (!paradoxical_subreg_p (tem))
3389 insn = get_last_insn ();
3390 wide_int wval_so_far
3391 = wi::uhwi (val_so_far,
3392 GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3393 rtx c = immed_wide_int_const (wval_so_far, nmode);
3394 set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3395 accum_inner);
3400 if (variant == negate_variant)
3402 val_so_far = -val_so_far;
3403 accum = expand_unop (mode, neg_optab, accum, target, 0);
3405 else if (variant == add_variant)
3407 val_so_far = val_so_far + 1;
3408 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3411 /* Compare only the bits of val and val_so_far that are significant
3412 in the result mode, to avoid sign-/zero-extension confusion. */
3413 nmode = GET_MODE_INNER (mode);
3414 val &= GET_MODE_MASK (nmode);
3415 val_so_far &= GET_MODE_MASK (nmode);
3416 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3418 return accum;
3421 /* Perform a multiplication and return an rtx for the result.
3422 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3423 TARGET is a suggestion for where to store the result (an rtx).
3425 We check specially for a constant integer as OP1.
3426 If you want this check for OP0 as well, then before calling
3427 you should swap the two operands if OP0 would be constant. */
3430 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3431 int unsignedp, bool no_libcall)
3433 enum mult_variant variant;
3434 struct algorithm algorithm;
3435 rtx scalar_op1;
3436 int max_cost;
3437 bool speed = optimize_insn_for_speed_p ();
3438 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3440 if (CONSTANT_P (op0))
3441 std::swap (op0, op1);
3443 /* For vectors, there are several simplifications that can be made if
3444 all elements of the vector constant are identical. */
3445 scalar_op1 = unwrap_const_vec_duplicate (op1);
3447 if (INTEGRAL_MODE_P (mode))
3449 rtx fake_reg;
3450 HOST_WIDE_INT coeff;
3451 bool is_neg;
3452 int mode_bitsize;
3454 if (op1 == CONST0_RTX (mode))
3455 return op1;
3456 if (op1 == CONST1_RTX (mode))
3457 return op0;
3458 if (op1 == CONSTM1_RTX (mode))
3459 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3460 op0, target, 0);
3462 if (do_trapv)
3463 goto skip_synth;
3465 /* If mode is integer vector mode, check if the backend supports
3466 vector lshift (by scalar or vector) at all. If not, we can't use
3467 synthetized multiply. */
3468 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3469 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3470 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3471 goto skip_synth;
3473 /* These are the operations that are potentially turned into
3474 a sequence of shifts and additions. */
3475 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3477 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3478 less than or equal in size to `unsigned int' this doesn't matter.
3479 If the mode is larger than `unsigned int', then synth_mult works
3480 only if the constant value exactly fits in an `unsigned int' without
3481 any truncation. This means that multiplying by negative values does
3482 not work; results are off by 2^32 on a 32 bit machine. */
3483 if (CONST_INT_P (scalar_op1))
3485 coeff = INTVAL (scalar_op1);
3486 is_neg = coeff < 0;
3488 #if TARGET_SUPPORTS_WIDE_INT
3489 else if (CONST_WIDE_INT_P (scalar_op1))
3490 #else
3491 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3492 #endif
3494 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3495 /* Perfect power of 2 (other than 1, which is handled above). */
3496 if (shift > 0)
3497 return expand_shift (LSHIFT_EXPR, mode, op0,
3498 shift, target, unsignedp);
3499 else
3500 goto skip_synth;
3502 else
3503 goto skip_synth;
3505 /* We used to test optimize here, on the grounds that it's better to
3506 produce a smaller program when -O is not used. But this causes
3507 such a terrible slowdown sometimes that it seems better to always
3508 use synth_mult. */
3510 /* Special case powers of two. */
3511 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3512 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3513 return expand_shift (LSHIFT_EXPR, mode, op0,
3514 floor_log2 (coeff), target, unsignedp);
3516 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3518 /* Attempt to handle multiplication of DImode values by negative
3519 coefficients, by performing the multiplication by a positive
3520 multiplier and then inverting the result. */
3521 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3523 /* Its safe to use -coeff even for INT_MIN, as the
3524 result is interpreted as an unsigned coefficient.
3525 Exclude cost of op0 from max_cost to match the cost
3526 calculation of the synth_mult. */
3527 coeff = -(unsigned HOST_WIDE_INT) coeff;
3528 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3529 mode, speed)
3530 - neg_cost (speed, mode));
3531 if (max_cost <= 0)
3532 goto skip_synth;
3534 /* Special case powers of two. */
3535 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3537 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3538 floor_log2 (coeff), target, unsignedp);
3539 return expand_unop (mode, neg_optab, temp, target, 0);
3542 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3543 max_cost))
3545 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3546 &algorithm, variant);
3547 return expand_unop (mode, neg_optab, temp, target, 0);
3549 goto skip_synth;
3552 /* Exclude cost of op0 from max_cost to match the cost
3553 calculation of the synth_mult. */
3554 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3555 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3556 return expand_mult_const (mode, op0, coeff, target,
3557 &algorithm, variant);
3559 skip_synth:
3561 /* Expand x*2.0 as x+x. */
3562 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3563 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3565 op0 = force_reg (GET_MODE (op0), op0);
3566 return expand_binop (mode, add_optab, op0, op0,
3567 target, unsignedp,
3568 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3571 /* This used to use umul_optab if unsigned, but for non-widening multiply
3572 there is no difference between signed and unsigned. */
3573 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3574 op0, op1, target, unsignedp,
3575 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3576 gcc_assert (op0 || no_libcall);
3577 return op0;
3580 /* Return a cost estimate for multiplying a register by the given
3581 COEFFicient in the given MODE and SPEED. */
3584 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3586 int max_cost;
3587 struct algorithm algorithm;
3588 enum mult_variant variant;
3590 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3591 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3592 mode, speed);
3593 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3594 return algorithm.cost.cost;
3595 else
3596 return max_cost;
3599 /* Perform a widening multiplication and return an rtx for the result.
3600 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3601 TARGET is a suggestion for where to store the result (an rtx).
3602 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3603 or smul_widen_optab.
3605 We check specially for a constant integer as OP1, comparing the
3606 cost of a widening multiply against the cost of a sequence of shifts
3607 and adds. */
3610 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3611 int unsignedp, optab this_optab)
3613 bool speed = optimize_insn_for_speed_p ();
3614 rtx cop1;
3616 if (CONST_INT_P (op1)
3617 && GET_MODE (op0) != VOIDmode
3618 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3619 this_optab == umul_widen_optab))
3620 && CONST_INT_P (cop1)
3621 && (INTVAL (cop1) >= 0
3622 || HWI_COMPUTABLE_MODE_P (mode)))
3624 HOST_WIDE_INT coeff = INTVAL (cop1);
3625 int max_cost;
3626 enum mult_variant variant;
3627 struct algorithm algorithm;
3629 if (coeff == 0)
3630 return CONST0_RTX (mode);
3632 /* Special case powers of two. */
3633 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3635 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3636 return expand_shift (LSHIFT_EXPR, mode, op0,
3637 floor_log2 (coeff), target, unsignedp);
3640 /* Exclude cost of op0 from max_cost to match the cost
3641 calculation of the synth_mult. */
3642 max_cost = mul_widen_cost (speed, mode);
3643 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3644 max_cost))
3646 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3647 return expand_mult_const (mode, op0, coeff, target,
3648 &algorithm, variant);
3651 return expand_binop (mode, this_optab, op0, op1, target,
3652 unsignedp, OPTAB_LIB_WIDEN);
3655 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3656 replace division by D, and put the least significant N bits of the result
3657 in *MULTIPLIER_PTR and return the most significant bit.
3659 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3660 needed precision is in PRECISION (should be <= N).
3662 PRECISION should be as small as possible so this function can choose
3663 multiplier more freely.
3665 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3666 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3668 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3669 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3671 unsigned HOST_WIDE_INT
3672 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3673 unsigned HOST_WIDE_INT *multiplier_ptr,
3674 int *post_shift_ptr, int *lgup_ptr)
3676 int lgup, post_shift;
3677 int pow, pow2;
3679 /* lgup = ceil(log2(divisor)); */
3680 lgup = ceil_log2 (d);
3682 gcc_assert (lgup <= n);
3684 pow = n + lgup;
3685 pow2 = n + lgup - precision;
3687 /* mlow = 2^(N + lgup)/d */
3688 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3689 wide_int mlow = wi::udiv_trunc (val, d);
3691 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3692 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3693 wide_int mhigh = wi::udiv_trunc (val, d);
3695 /* If precision == N, then mlow, mhigh exceed 2^N
3696 (but they do not exceed 2^(N+1)). */
3698 /* Reduce to lowest terms. */
3699 for (post_shift = lgup; post_shift > 0; post_shift--)
3701 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3702 HOST_BITS_PER_WIDE_INT);
3703 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3704 HOST_BITS_PER_WIDE_INT);
3705 if (ml_lo >= mh_lo)
3706 break;
3708 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3709 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3712 *post_shift_ptr = post_shift;
3713 *lgup_ptr = lgup;
3714 if (n < HOST_BITS_PER_WIDE_INT)
3716 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3717 *multiplier_ptr = mhigh.to_uhwi () & mask;
3718 return mhigh.to_uhwi () > mask;
3720 else
3722 *multiplier_ptr = mhigh.to_uhwi ();
3723 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3727 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3728 congruent to 1 (mod 2**N). */
3730 static unsigned HOST_WIDE_INT
3731 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3733 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3735 /* The algorithm notes that the choice y = x satisfies
3736 x*y == 1 mod 2^3, since x is assumed odd.
3737 Each iteration doubles the number of bits of significance in y. */
3739 unsigned HOST_WIDE_INT mask;
3740 unsigned HOST_WIDE_INT y = x;
3741 int nbit = 3;
3743 mask = (n == HOST_BITS_PER_WIDE_INT
3744 ? HOST_WIDE_INT_M1U
3745 : (HOST_WIDE_INT_1U << n) - 1);
3747 while (nbit < n)
3749 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3750 nbit *= 2;
3752 return y;
3755 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3756 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3757 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3758 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3759 become signed.
3761 The result is put in TARGET if that is convenient.
3763 MODE is the mode of operation. */
3766 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3767 rtx op1, rtx target, int unsignedp)
3769 rtx tem;
3770 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3772 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3773 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3774 tem = expand_and (mode, tem, op1, NULL_RTX);
3775 adj_operand
3776 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3777 adj_operand);
3779 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3780 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3781 tem = expand_and (mode, tem, op0, NULL_RTX);
3782 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3783 target);
3785 return target;
3788 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3790 static rtx
3791 extract_high_half (scalar_int_mode mode, rtx op)
3793 if (mode == word_mode)
3794 return gen_highpart (mode, op);
3796 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3798 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3799 GET_MODE_BITSIZE (mode), 0, 1);
3800 return convert_modes (mode, wider_mode, op, 0);
3803 /* Like expmed_mult_highpart, but only consider using a multiplication
3804 optab. OP1 is an rtx for the constant operand. */
3806 static rtx
3807 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3808 rtx target, int unsignedp, int max_cost)
3810 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3811 optab moptab;
3812 rtx tem;
3813 int size;
3814 bool speed = optimize_insn_for_speed_p ();
3816 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3818 size = GET_MODE_BITSIZE (mode);
3820 /* Firstly, try using a multiplication insn that only generates the needed
3821 high part of the product, and in the sign flavor of unsignedp. */
3822 if (mul_highpart_cost (speed, mode) < max_cost)
3824 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3825 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3826 unsignedp, OPTAB_DIRECT);
3827 if (tem)
3828 return tem;
3831 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3832 Need to adjust the result after the multiplication. */
3833 if (size - 1 < BITS_PER_WORD
3834 && (mul_highpart_cost (speed, mode)
3835 + 2 * shift_cost (speed, mode, size-1)
3836 + 4 * add_cost (speed, mode) < max_cost))
3838 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3839 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3840 unsignedp, OPTAB_DIRECT);
3841 if (tem)
3842 /* We used the wrong signedness. Adjust the result. */
3843 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3844 tem, unsignedp);
3847 /* Try widening multiplication. */
3848 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3849 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3850 && mul_widen_cost (speed, wider_mode) < max_cost)
3852 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3853 unsignedp, OPTAB_WIDEN);
3854 if (tem)
3855 return extract_high_half (mode, tem);
3858 /* Try widening the mode and perform a non-widening multiplication. */
3859 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3860 && size - 1 < BITS_PER_WORD
3861 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3862 < max_cost))
3864 rtx_insn *insns;
3865 rtx wop0, wop1;
3867 /* We need to widen the operands, for example to ensure the
3868 constant multiplier is correctly sign or zero extended.
3869 Use a sequence to clean-up any instructions emitted by
3870 the conversions if things don't work out. */
3871 start_sequence ();
3872 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3873 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3874 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3875 unsignedp, OPTAB_WIDEN);
3876 insns = get_insns ();
3877 end_sequence ();
3879 if (tem)
3881 emit_insn (insns);
3882 return extract_high_half (mode, tem);
3886 /* Try widening multiplication of opposite signedness, and adjust. */
3887 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3888 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3889 && size - 1 < BITS_PER_WORD
3890 && (mul_widen_cost (speed, wider_mode)
3891 + 2 * shift_cost (speed, mode, size-1)
3892 + 4 * add_cost (speed, mode) < max_cost))
3894 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3895 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3896 if (tem != 0)
3898 tem = extract_high_half (mode, tem);
3899 /* We used the wrong signedness. Adjust the result. */
3900 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3901 target, unsignedp);
3905 return 0;
3908 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3909 putting the high half of the result in TARGET if that is convenient,
3910 and return where the result is. If the operation cannot be performed,
3911 0 is returned.
3913 MODE is the mode of operation and result.
3915 UNSIGNEDP nonzero means unsigned multiply.
3917 MAX_COST is the total allowed cost for the expanded RTL. */
3919 static rtx
3920 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3921 rtx target, int unsignedp, int max_cost)
3923 unsigned HOST_WIDE_INT cnst1;
3924 int extra_cost;
3925 bool sign_adjust = false;
3926 enum mult_variant variant;
3927 struct algorithm alg;
3928 rtx tem;
3929 bool speed = optimize_insn_for_speed_p ();
3931 /* We can't support modes wider than HOST_BITS_PER_INT. */
3932 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3934 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3936 /* We can't optimize modes wider than BITS_PER_WORD.
3937 ??? We might be able to perform double-word arithmetic if
3938 mode == word_mode, however all the cost calculations in
3939 synth_mult etc. assume single-word operations. */
3940 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3941 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3942 return expmed_mult_highpart_optab (mode, op0, op1, target,
3943 unsignedp, max_cost);
3945 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3947 /* Check whether we try to multiply by a negative constant. */
3948 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3950 sign_adjust = true;
3951 extra_cost += add_cost (speed, mode);
3954 /* See whether shift/add multiplication is cheap enough. */
3955 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3956 max_cost - extra_cost))
3958 /* See whether the specialized multiplication optabs are
3959 cheaper than the shift/add version. */
3960 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3961 alg.cost.cost + extra_cost);
3962 if (tem)
3963 return tem;
3965 tem = convert_to_mode (wider_mode, op0, unsignedp);
3966 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3967 tem = extract_high_half (mode, tem);
3969 /* Adjust result for signedness. */
3970 if (sign_adjust)
3971 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3973 return tem;
3975 return expmed_mult_highpart_optab (mode, op0, op1, target,
3976 unsignedp, max_cost);
3980 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3982 static rtx
3983 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3985 rtx result, temp, shift;
3986 rtx_code_label *label;
3987 int logd;
3988 int prec = GET_MODE_PRECISION (mode);
3990 logd = floor_log2 (d);
3991 result = gen_reg_rtx (mode);
3993 /* Avoid conditional branches when they're expensive. */
3994 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3995 && optimize_insn_for_speed_p ())
3997 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3998 mode, 0, -1);
3999 if (signmask)
4001 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4002 signmask = force_reg (mode, signmask);
4003 shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4005 /* Use the rtx_cost of a LSHIFTRT instruction to determine
4006 which instruction sequence to use. If logical right shifts
4007 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4008 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
4010 temp = gen_rtx_LSHIFTRT (mode, result, shift);
4011 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4012 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4013 > COSTS_N_INSNS (2)))
4015 temp = expand_binop (mode, xor_optab, op0, signmask,
4016 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4017 temp = expand_binop (mode, sub_optab, temp, signmask,
4018 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4019 temp = expand_binop (mode, and_optab, temp,
4020 gen_int_mode (masklow, mode),
4021 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4022 temp = expand_binop (mode, xor_optab, temp, signmask,
4023 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4024 temp = expand_binop (mode, sub_optab, temp, signmask,
4025 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4027 else
4029 signmask = expand_binop (mode, lshr_optab, signmask, shift,
4030 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4031 signmask = force_reg (mode, signmask);
4033 temp = expand_binop (mode, add_optab, op0, signmask,
4034 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4035 temp = expand_binop (mode, and_optab, temp,
4036 gen_int_mode (masklow, mode),
4037 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4038 temp = expand_binop (mode, sub_optab, temp, signmask,
4039 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4041 return temp;
4045 /* Mask contains the mode's signbit and the significant bits of the
4046 modulus. By including the signbit in the operation, many targets
4047 can avoid an explicit compare operation in the following comparison
4048 against zero. */
4049 wide_int mask = wi::mask (logd, false, prec);
4050 mask = wi::set_bit (mask, prec - 1);
4052 temp = expand_binop (mode, and_optab, op0,
4053 immed_wide_int_const (mask, mode),
4054 result, 1, OPTAB_LIB_WIDEN);
4055 if (temp != result)
4056 emit_move_insn (result, temp);
4058 label = gen_label_rtx ();
4059 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4061 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4062 0, OPTAB_LIB_WIDEN);
4064 mask = wi::mask (logd, true, prec);
4065 temp = expand_binop (mode, ior_optab, temp,
4066 immed_wide_int_const (mask, mode),
4067 result, 1, OPTAB_LIB_WIDEN);
4068 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4069 0, OPTAB_LIB_WIDEN);
4070 if (temp != result)
4071 emit_move_insn (result, temp);
4072 emit_label (label);
4073 return result;
4076 /* Expand signed division of OP0 by a power of two D in mode MODE.
4077 This routine is only called for positive values of D. */
4079 static rtx
4080 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4082 rtx temp;
4083 rtx_code_label *label;
4084 int logd;
4086 logd = floor_log2 (d);
4088 if (d == 2
4089 && BRANCH_COST (optimize_insn_for_speed_p (),
4090 false) >= 1)
4092 temp = gen_reg_rtx (mode);
4093 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4094 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4095 0, OPTAB_LIB_WIDEN);
4096 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4099 if (HAVE_conditional_move
4100 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4102 rtx temp2;
4104 start_sequence ();
4105 temp2 = copy_to_mode_reg (mode, op0);
4106 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4107 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4108 temp = force_reg (mode, temp);
4110 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
4111 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4112 mode, temp, temp2, mode, 0);
4113 if (temp2)
4115 rtx_insn *seq = get_insns ();
4116 end_sequence ();
4117 emit_insn (seq);
4118 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4120 end_sequence ();
4123 if (BRANCH_COST (optimize_insn_for_speed_p (),
4124 false) >= 2)
4126 int ushift = GET_MODE_BITSIZE (mode) - logd;
4128 temp = gen_reg_rtx (mode);
4129 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4130 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4131 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4132 > COSTS_N_INSNS (1))
4133 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4134 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4135 else
4136 temp = expand_shift (RSHIFT_EXPR, mode, temp,
4137 ushift, NULL_RTX, 1);
4138 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4139 0, OPTAB_LIB_WIDEN);
4140 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4143 label = gen_label_rtx ();
4144 temp = copy_to_mode_reg (mode, op0);
4145 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4146 expand_inc (temp, gen_int_mode (d - 1, mode));
4147 emit_label (label);
4148 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4151 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4152 if that is convenient, and returning where the result is.
4153 You may request either the quotient or the remainder as the result;
4154 specify REM_FLAG nonzero to get the remainder.
4156 CODE is the expression code for which kind of division this is;
4157 it controls how rounding is done. MODE is the machine mode to use.
4158 UNSIGNEDP nonzero means do unsigned division. */
4160 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4161 and then correct it by or'ing in missing high bits
4162 if result of ANDI is nonzero.
4163 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4164 This could optimize to a bfexts instruction.
4165 But C doesn't use these operations, so their optimizations are
4166 left for later. */
4167 /* ??? For modulo, we don't actually need the highpart of the first product,
4168 the low part will do nicely. And for small divisors, the second multiply
4169 can also be a low-part only multiply or even be completely left out.
4170 E.g. to calculate the remainder of a division by 3 with a 32 bit
4171 multiply, multiply with 0x55555556 and extract the upper two bits;
4172 the result is exact for inputs up to 0x1fffffff.
4173 The input range can be reduced by using cross-sum rules.
4174 For odd divisors >= 3, the following table gives right shift counts
4175 so that if a number is shifted by an integer multiple of the given
4176 amount, the remainder stays the same:
4177 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4178 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4179 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4180 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4181 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4183 Cross-sum rules for even numbers can be derived by leaving as many bits
4184 to the right alone as the divisor has zeros to the right.
4185 E.g. if x is an unsigned 32 bit number:
4186 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4190 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4191 rtx op0, rtx op1, rtx target, int unsignedp)
4193 machine_mode compute_mode;
4194 rtx tquotient;
4195 rtx quotient = 0, remainder = 0;
4196 rtx_insn *last;
4197 rtx_insn *insn;
4198 optab optab1, optab2;
4199 int op1_is_constant, op1_is_pow2 = 0;
4200 int max_cost, extra_cost;
4201 static HOST_WIDE_INT last_div_const = 0;
4202 bool speed = optimize_insn_for_speed_p ();
4204 op1_is_constant = CONST_INT_P (op1);
4205 if (op1_is_constant)
4207 wide_int ext_op1 = rtx_mode_t (op1, mode);
4208 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4209 || (! unsignedp
4210 && wi::popcount (wi::neg (ext_op1)) == 1));
4214 This is the structure of expand_divmod:
4216 First comes code to fix up the operands so we can perform the operations
4217 correctly and efficiently.
4219 Second comes a switch statement with code specific for each rounding mode.
4220 For some special operands this code emits all RTL for the desired
4221 operation, for other cases, it generates only a quotient and stores it in
4222 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4223 to indicate that it has not done anything.
4225 Last comes code that finishes the operation. If QUOTIENT is set and
4226 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4227 QUOTIENT is not set, it is computed using trunc rounding.
4229 We try to generate special code for division and remainder when OP1 is a
4230 constant. If |OP1| = 2**n we can use shifts and some other fast
4231 operations. For other values of OP1, we compute a carefully selected
4232 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4233 by m.
4235 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4236 half of the product. Different strategies for generating the product are
4237 implemented in expmed_mult_highpart.
4239 If what we actually want is the remainder, we generate that by another
4240 by-constant multiplication and a subtraction. */
4242 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4243 code below will malfunction if we are, so check here and handle
4244 the special case if so. */
4245 if (op1 == const1_rtx)
4246 return rem_flag ? const0_rtx : op0;
4248 /* When dividing by -1, we could get an overflow.
4249 negv_optab can handle overflows. */
4250 if (! unsignedp && op1 == constm1_rtx)
4252 if (rem_flag)
4253 return const0_rtx;
4254 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4255 ? negv_optab : neg_optab, op0, target, 0);
4258 if (target
4259 /* Don't use the function value register as a target
4260 since we have to read it as well as write it,
4261 and function-inlining gets confused by this. */
4262 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4263 /* Don't clobber an operand while doing a multi-step calculation. */
4264 || ((rem_flag || op1_is_constant)
4265 && (reg_mentioned_p (target, op0)
4266 || (MEM_P (op0) && MEM_P (target))))
4267 || reg_mentioned_p (target, op1)
4268 || (MEM_P (op1) && MEM_P (target))))
4269 target = 0;
4271 /* Get the mode in which to perform this computation. Normally it will
4272 be MODE, but sometimes we can't do the desired operation in MODE.
4273 If so, pick a wider mode in which we can do the operation. Convert
4274 to that mode at the start to avoid repeated conversions.
4276 First see what operations we need. These depend on the expression
4277 we are evaluating. (We assume that divxx3 insns exist under the
4278 same conditions that modxx3 insns and that these insns don't normally
4279 fail. If these assumptions are not correct, we may generate less
4280 efficient code in some cases.)
4282 Then see if we find a mode in which we can open-code that operation
4283 (either a division, modulus, or shift). Finally, check for the smallest
4284 mode for which we can do the operation with a library call. */
4286 /* We might want to refine this now that we have division-by-constant
4287 optimization. Since expmed_mult_highpart tries so many variants, it is
4288 not straightforward to generalize this. Maybe we should make an array
4289 of possible modes in init_expmed? Save this for GCC 2.7. */
4291 optab1 = (op1_is_pow2
4292 ? (unsignedp ? lshr_optab : ashr_optab)
4293 : (unsignedp ? udiv_optab : sdiv_optab));
4294 optab2 = (op1_is_pow2 ? optab1
4295 : (unsignedp ? udivmod_optab : sdivmod_optab));
4297 FOR_EACH_MODE_FROM (compute_mode, mode)
4298 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4299 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4300 break;
4302 if (compute_mode == VOIDmode)
4303 FOR_EACH_MODE_FROM (compute_mode, mode)
4304 if (optab_libfunc (optab1, compute_mode)
4305 || optab_libfunc (optab2, compute_mode))
4306 break;
4308 /* If we still couldn't find a mode, use MODE, but expand_binop will
4309 probably die. */
4310 if (compute_mode == VOIDmode)
4311 compute_mode = mode;
4313 if (target && GET_MODE (target) == compute_mode)
4314 tquotient = target;
4315 else
4316 tquotient = gen_reg_rtx (compute_mode);
4318 #if 0
4319 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4320 (mode), and thereby get better code when OP1 is a constant. Do that
4321 later. It will require going over all usages of SIZE below. */
4322 size = GET_MODE_BITSIZE (mode);
4323 #endif
4325 /* Only deduct something for a REM if the last divide done was
4326 for a different constant. Then set the constant of the last
4327 divide. */
4328 max_cost = (unsignedp
4329 ? udiv_cost (speed, compute_mode)
4330 : sdiv_cost (speed, compute_mode));
4331 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4332 && INTVAL (op1) == last_div_const))
4333 max_cost -= (mul_cost (speed, compute_mode)
4334 + add_cost (speed, compute_mode));
4336 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4338 /* Now convert to the best mode to use. */
4339 if (compute_mode != mode)
4341 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4342 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4344 /* convert_modes may have placed op1 into a register, so we
4345 must recompute the following. */
4346 op1_is_constant = CONST_INT_P (op1);
4347 if (op1_is_constant)
4349 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4350 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4351 || (! unsignedp
4352 && wi::popcount (wi::neg (ext_op1)) == 1));
4354 else
4355 op1_is_pow2 = 0;
4358 /* If one of the operands is a volatile MEM, copy it into a register. */
4360 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4361 op0 = force_reg (compute_mode, op0);
4362 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4363 op1 = force_reg (compute_mode, op1);
4365 /* If we need the remainder or if OP1 is constant, we need to
4366 put OP0 in a register in case it has any queued subexpressions. */
4367 if (rem_flag || op1_is_constant)
4368 op0 = force_reg (compute_mode, op0);
4370 last = get_last_insn ();
4372 /* Promote floor rounding to trunc rounding for unsigned operations. */
4373 if (unsignedp)
4375 if (code == FLOOR_DIV_EXPR)
4376 code = TRUNC_DIV_EXPR;
4377 if (code == FLOOR_MOD_EXPR)
4378 code = TRUNC_MOD_EXPR;
4379 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4380 code = TRUNC_DIV_EXPR;
4383 if (op1 != const0_rtx)
4384 switch (code)
4386 case TRUNC_MOD_EXPR:
4387 case TRUNC_DIV_EXPR:
4388 if (op1_is_constant)
4390 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4391 int size = GET_MODE_BITSIZE (int_mode);
4392 if (unsignedp)
4394 unsigned HOST_WIDE_INT mh, ml;
4395 int pre_shift, post_shift;
4396 int dummy;
4397 wide_int wd = rtx_mode_t (op1, int_mode);
4398 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4400 if (wi::popcount (wd) == 1)
4402 pre_shift = floor_log2 (d);
4403 if (rem_flag)
4405 unsigned HOST_WIDE_INT mask
4406 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4407 remainder
4408 = expand_binop (int_mode, and_optab, op0,
4409 gen_int_mode (mask, int_mode),
4410 remainder, 1,
4411 OPTAB_LIB_WIDEN);
4412 if (remainder)
4413 return gen_lowpart (mode, remainder);
4415 quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4416 pre_shift, tquotient, 1);
4418 else if (size <= HOST_BITS_PER_WIDE_INT)
4420 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4422 /* Most significant bit of divisor is set; emit an scc
4423 insn. */
4424 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4425 int_mode, 1, 1);
4427 else
4429 /* Find a suitable multiplier and right shift count
4430 instead of multiplying with D. */
4432 mh = choose_multiplier (d, size, size,
4433 &ml, &post_shift, &dummy);
4435 /* If the suggested multiplier is more than SIZE bits,
4436 we can do better for even divisors, using an
4437 initial right shift. */
4438 if (mh != 0 && (d & 1) == 0)
4440 pre_shift = ctz_or_zero (d);
4441 mh = choose_multiplier (d >> pre_shift, size,
4442 size - pre_shift,
4443 &ml, &post_shift, &dummy);
4444 gcc_assert (!mh);
4446 else
4447 pre_shift = 0;
4449 if (mh != 0)
4451 rtx t1, t2, t3, t4;
4453 if (post_shift - 1 >= BITS_PER_WORD)
4454 goto fail1;
4456 extra_cost
4457 = (shift_cost (speed, int_mode, post_shift - 1)
4458 + shift_cost (speed, int_mode, 1)
4459 + 2 * add_cost (speed, int_mode));
4460 t1 = expmed_mult_highpart
4461 (int_mode, op0, gen_int_mode (ml, int_mode),
4462 NULL_RTX, 1, max_cost - extra_cost);
4463 if (t1 == 0)
4464 goto fail1;
4465 t2 = force_operand (gen_rtx_MINUS (int_mode,
4466 op0, t1),
4467 NULL_RTX);
4468 t3 = expand_shift (RSHIFT_EXPR, int_mode,
4469 t2, 1, NULL_RTX, 1);
4470 t4 = force_operand (gen_rtx_PLUS (int_mode,
4471 t1, t3),
4472 NULL_RTX);
4473 quotient = expand_shift
4474 (RSHIFT_EXPR, int_mode, t4,
4475 post_shift - 1, tquotient, 1);
4477 else
4479 rtx t1, t2;
4481 if (pre_shift >= BITS_PER_WORD
4482 || post_shift >= BITS_PER_WORD)
4483 goto fail1;
4485 t1 = expand_shift
4486 (RSHIFT_EXPR, int_mode, op0,
4487 pre_shift, NULL_RTX, 1);
4488 extra_cost
4489 = (shift_cost (speed, int_mode, pre_shift)
4490 + shift_cost (speed, int_mode, post_shift));
4491 t2 = expmed_mult_highpart
4492 (int_mode, t1,
4493 gen_int_mode (ml, int_mode),
4494 NULL_RTX, 1, max_cost - extra_cost);
4495 if (t2 == 0)
4496 goto fail1;
4497 quotient = expand_shift
4498 (RSHIFT_EXPR, int_mode, t2,
4499 post_shift, tquotient, 1);
4503 else /* Too wide mode to use tricky code */
4504 break;
4506 insn = get_last_insn ();
4507 if (insn != last)
4508 set_dst_reg_note (insn, REG_EQUAL,
4509 gen_rtx_UDIV (int_mode, op0, op1),
4510 quotient);
4512 else /* TRUNC_DIV, signed */
4514 unsigned HOST_WIDE_INT ml;
4515 int lgup, post_shift;
4516 rtx mlr;
4517 HOST_WIDE_INT d = INTVAL (op1);
4518 unsigned HOST_WIDE_INT abs_d;
4520 /* Not prepared to handle division/remainder by
4521 0xffffffffffffffff8000000000000000 etc. */
4522 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4523 break;
4525 /* Since d might be INT_MIN, we have to cast to
4526 unsigned HOST_WIDE_INT before negating to avoid
4527 undefined signed overflow. */
4528 abs_d = (d >= 0
4529 ? (unsigned HOST_WIDE_INT) d
4530 : - (unsigned HOST_WIDE_INT) d);
4532 /* n rem d = n rem -d */
4533 if (rem_flag && d < 0)
4535 d = abs_d;
4536 op1 = gen_int_mode (abs_d, int_mode);
4539 if (d == 1)
4540 quotient = op0;
4541 else if (d == -1)
4542 quotient = expand_unop (int_mode, neg_optab, op0,
4543 tquotient, 0);
4544 else if (size <= HOST_BITS_PER_WIDE_INT
4545 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4547 /* This case is not handled correctly below. */
4548 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4549 int_mode, 1, 1);
4550 if (quotient == 0)
4551 goto fail1;
4553 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4554 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4555 && (rem_flag
4556 ? smod_pow2_cheap (speed, int_mode)
4557 : sdiv_pow2_cheap (speed, int_mode))
4558 /* We assume that cheap metric is true if the
4559 optab has an expander for this mode. */
4560 && ((optab_handler ((rem_flag ? smod_optab
4561 : sdiv_optab),
4562 int_mode)
4563 != CODE_FOR_nothing)
4564 || (optab_handler (sdivmod_optab, int_mode)
4565 != CODE_FOR_nothing)))
4567 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4569 if (rem_flag)
4571 remainder = expand_smod_pow2 (int_mode, op0, d);
4572 if (remainder)
4573 return gen_lowpart (mode, remainder);
4576 if (sdiv_pow2_cheap (speed, int_mode)
4577 && ((optab_handler (sdiv_optab, int_mode)
4578 != CODE_FOR_nothing)
4579 || (optab_handler (sdivmod_optab, int_mode)
4580 != CODE_FOR_nothing)))
4581 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4582 int_mode, op0,
4583 gen_int_mode (abs_d,
4584 int_mode),
4585 NULL_RTX, 0);
4586 else
4587 quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4589 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4590 negate the quotient. */
4591 if (d < 0)
4593 insn = get_last_insn ();
4594 if (insn != last
4595 && abs_d < (HOST_WIDE_INT_1U
4596 << (HOST_BITS_PER_WIDE_INT - 1)))
4597 set_dst_reg_note (insn, REG_EQUAL,
4598 gen_rtx_DIV (int_mode, op0,
4599 gen_int_mode
4600 (abs_d,
4601 int_mode)),
4602 quotient);
4604 quotient = expand_unop (int_mode, neg_optab,
4605 quotient, quotient, 0);
4608 else if (size <= HOST_BITS_PER_WIDE_INT)
4610 choose_multiplier (abs_d, size, size - 1,
4611 &ml, &post_shift, &lgup);
4612 if (ml < HOST_WIDE_INT_1U << (size - 1))
4614 rtx t1, t2, t3;
4616 if (post_shift >= BITS_PER_WORD
4617 || size - 1 >= BITS_PER_WORD)
4618 goto fail1;
4620 extra_cost = (shift_cost (speed, int_mode, post_shift)
4621 + shift_cost (speed, int_mode, size - 1)
4622 + add_cost (speed, int_mode));
4623 t1 = expmed_mult_highpart
4624 (int_mode, op0, gen_int_mode (ml, int_mode),
4625 NULL_RTX, 0, max_cost - extra_cost);
4626 if (t1 == 0)
4627 goto fail1;
4628 t2 = expand_shift
4629 (RSHIFT_EXPR, int_mode, t1,
4630 post_shift, NULL_RTX, 0);
4631 t3 = expand_shift
4632 (RSHIFT_EXPR, int_mode, op0,
4633 size - 1, NULL_RTX, 0);
4634 if (d < 0)
4635 quotient
4636 = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4637 tquotient);
4638 else
4639 quotient
4640 = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4641 tquotient);
4643 else
4645 rtx t1, t2, t3, t4;
4647 if (post_shift >= BITS_PER_WORD
4648 || size - 1 >= BITS_PER_WORD)
4649 goto fail1;
4651 ml |= HOST_WIDE_INT_M1U << (size - 1);
4652 mlr = gen_int_mode (ml, int_mode);
4653 extra_cost = (shift_cost (speed, int_mode, post_shift)
4654 + shift_cost (speed, int_mode, size - 1)
4655 + 2 * add_cost (speed, int_mode));
4656 t1 = expmed_mult_highpart (int_mode, op0, mlr,
4657 NULL_RTX, 0,
4658 max_cost - extra_cost);
4659 if (t1 == 0)
4660 goto fail1;
4661 t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4662 NULL_RTX);
4663 t3 = expand_shift
4664 (RSHIFT_EXPR, int_mode, t2,
4665 post_shift, NULL_RTX, 0);
4666 t4 = expand_shift
4667 (RSHIFT_EXPR, int_mode, op0,
4668 size - 1, NULL_RTX, 0);
4669 if (d < 0)
4670 quotient
4671 = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4672 tquotient);
4673 else
4674 quotient
4675 = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4676 tquotient);
4679 else /* Too wide mode to use tricky code */
4680 break;
4682 insn = get_last_insn ();
4683 if (insn != last)
4684 set_dst_reg_note (insn, REG_EQUAL,
4685 gen_rtx_DIV (int_mode, op0, op1),
4686 quotient);
4688 break;
4690 fail1:
4691 delete_insns_since (last);
4692 break;
4694 case FLOOR_DIV_EXPR:
4695 case FLOOR_MOD_EXPR:
4696 /* We will come here only for signed operations. */
4697 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4699 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4700 int size = GET_MODE_BITSIZE (int_mode);
4701 unsigned HOST_WIDE_INT mh, ml;
4702 int pre_shift, lgup, post_shift;
4703 HOST_WIDE_INT d = INTVAL (op1);
4705 if (d > 0)
4707 /* We could just as easily deal with negative constants here,
4708 but it does not seem worth the trouble for GCC 2.6. */
4709 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4711 pre_shift = floor_log2 (d);
4712 if (rem_flag)
4714 unsigned HOST_WIDE_INT mask
4715 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4716 remainder = expand_binop
4717 (int_mode, and_optab, op0,
4718 gen_int_mode (mask, int_mode),
4719 remainder, 0, OPTAB_LIB_WIDEN);
4720 if (remainder)
4721 return gen_lowpart (mode, remainder);
4723 quotient = expand_shift
4724 (RSHIFT_EXPR, int_mode, op0,
4725 pre_shift, tquotient, 0);
4727 else
4729 rtx t1, t2, t3, t4;
4731 mh = choose_multiplier (d, size, size - 1,
4732 &ml, &post_shift, &lgup);
4733 gcc_assert (!mh);
4735 if (post_shift < BITS_PER_WORD
4736 && size - 1 < BITS_PER_WORD)
4738 t1 = expand_shift
4739 (RSHIFT_EXPR, int_mode, op0,
4740 size - 1, NULL_RTX, 0);
4741 t2 = expand_binop (int_mode, xor_optab, op0, t1,
4742 NULL_RTX, 0, OPTAB_WIDEN);
4743 extra_cost = (shift_cost (speed, int_mode, post_shift)
4744 + shift_cost (speed, int_mode, size - 1)
4745 + 2 * add_cost (speed, int_mode));
4746 t3 = expmed_mult_highpart
4747 (int_mode, t2, gen_int_mode (ml, int_mode),
4748 NULL_RTX, 1, max_cost - extra_cost);
4749 if (t3 != 0)
4751 t4 = expand_shift
4752 (RSHIFT_EXPR, int_mode, t3,
4753 post_shift, NULL_RTX, 1);
4754 quotient = expand_binop (int_mode, xor_optab,
4755 t4, t1, tquotient, 0,
4756 OPTAB_WIDEN);
4761 else
4763 rtx nsign, t1, t2, t3, t4;
4764 t1 = force_operand (gen_rtx_PLUS (int_mode,
4765 op0, constm1_rtx), NULL_RTX);
4766 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4767 0, OPTAB_WIDEN);
4768 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4769 size - 1, NULL_RTX, 0);
4770 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4771 NULL_RTX);
4772 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4773 NULL_RTX, 0);
4774 if (t4)
4776 rtx t5;
4777 t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4778 NULL_RTX, 0);
4779 quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4780 tquotient);
4785 if (quotient != 0)
4786 break;
4787 delete_insns_since (last);
4789 /* Try using an instruction that produces both the quotient and
4790 remainder, using truncation. We can easily compensate the quotient
4791 or remainder to get floor rounding, once we have the remainder.
4792 Notice that we compute also the final remainder value here,
4793 and return the result right away. */
4794 if (target == 0 || GET_MODE (target) != compute_mode)
4795 target = gen_reg_rtx (compute_mode);
4797 if (rem_flag)
4799 remainder
4800 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4801 quotient = gen_reg_rtx (compute_mode);
4803 else
4805 quotient
4806 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4807 remainder = gen_reg_rtx (compute_mode);
4810 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4811 quotient, remainder, 0))
4813 /* This could be computed with a branch-less sequence.
4814 Save that for later. */
4815 rtx tem;
4816 rtx_code_label *label = gen_label_rtx ();
4817 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4818 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4819 NULL_RTX, 0, OPTAB_WIDEN);
4820 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4821 expand_dec (quotient, const1_rtx);
4822 expand_inc (remainder, op1);
4823 emit_label (label);
4824 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4827 /* No luck with division elimination or divmod. Have to do it
4828 by conditionally adjusting op0 *and* the result. */
4830 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4831 rtx adjusted_op0;
4832 rtx tem;
4834 quotient = gen_reg_rtx (compute_mode);
4835 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4836 label1 = gen_label_rtx ();
4837 label2 = gen_label_rtx ();
4838 label3 = gen_label_rtx ();
4839 label4 = gen_label_rtx ();
4840 label5 = gen_label_rtx ();
4841 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4842 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4843 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4844 quotient, 0, OPTAB_LIB_WIDEN);
4845 if (tem != quotient)
4846 emit_move_insn (quotient, tem);
4847 emit_jump_insn (targetm.gen_jump (label5));
4848 emit_barrier ();
4849 emit_label (label1);
4850 expand_inc (adjusted_op0, const1_rtx);
4851 emit_jump_insn (targetm.gen_jump (label4));
4852 emit_barrier ();
4853 emit_label (label2);
4854 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4855 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4856 quotient, 0, OPTAB_LIB_WIDEN);
4857 if (tem != quotient)
4858 emit_move_insn (quotient, tem);
4859 emit_jump_insn (targetm.gen_jump (label5));
4860 emit_barrier ();
4861 emit_label (label3);
4862 expand_dec (adjusted_op0, const1_rtx);
4863 emit_label (label4);
4864 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4865 quotient, 0, OPTAB_LIB_WIDEN);
4866 if (tem != quotient)
4867 emit_move_insn (quotient, tem);
4868 expand_dec (quotient, const1_rtx);
4869 emit_label (label5);
4871 break;
4873 case CEIL_DIV_EXPR:
4874 case CEIL_MOD_EXPR:
4875 if (unsignedp)
4877 if (op1_is_constant
4878 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4879 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4880 || INTVAL (op1) >= 0))
4882 scalar_int_mode int_mode
4883 = as_a <scalar_int_mode> (compute_mode);
4884 rtx t1, t2, t3;
4885 unsigned HOST_WIDE_INT d = INTVAL (op1);
4886 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4887 floor_log2 (d), tquotient, 1);
4888 t2 = expand_binop (int_mode, and_optab, op0,
4889 gen_int_mode (d - 1, int_mode),
4890 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4891 t3 = gen_reg_rtx (int_mode);
4892 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4893 if (t3 == 0)
4895 rtx_code_label *lab;
4896 lab = gen_label_rtx ();
4897 do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4898 expand_inc (t1, const1_rtx);
4899 emit_label (lab);
4900 quotient = t1;
4902 else
4903 quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4904 tquotient);
4905 break;
4908 /* Try using an instruction that produces both the quotient and
4909 remainder, using truncation. We can easily compensate the
4910 quotient or remainder to get ceiling rounding, once we have the
4911 remainder. Notice that we compute also the final remainder
4912 value here, and return the result right away. */
4913 if (target == 0 || GET_MODE (target) != compute_mode)
4914 target = gen_reg_rtx (compute_mode);
4916 if (rem_flag)
4918 remainder = (REG_P (target)
4919 ? target : gen_reg_rtx (compute_mode));
4920 quotient = gen_reg_rtx (compute_mode);
4922 else
4924 quotient = (REG_P (target)
4925 ? target : gen_reg_rtx (compute_mode));
4926 remainder = gen_reg_rtx (compute_mode);
4929 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4930 remainder, 1))
4932 /* This could be computed with a branch-less sequence.
4933 Save that for later. */
4934 rtx_code_label *label = gen_label_rtx ();
4935 do_cmp_and_jump (remainder, const0_rtx, EQ,
4936 compute_mode, label);
4937 expand_inc (quotient, const1_rtx);
4938 expand_dec (remainder, op1);
4939 emit_label (label);
4940 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4943 /* No luck with division elimination or divmod. Have to do it
4944 by conditionally adjusting op0 *and* the result. */
4946 rtx_code_label *label1, *label2;
4947 rtx adjusted_op0, tem;
4949 quotient = gen_reg_rtx (compute_mode);
4950 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4951 label1 = gen_label_rtx ();
4952 label2 = gen_label_rtx ();
4953 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4954 compute_mode, label1);
4955 emit_move_insn (quotient, const0_rtx);
4956 emit_jump_insn (targetm.gen_jump (label2));
4957 emit_barrier ();
4958 emit_label (label1);
4959 expand_dec (adjusted_op0, const1_rtx);
4960 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4961 quotient, 1, OPTAB_LIB_WIDEN);
4962 if (tem != quotient)
4963 emit_move_insn (quotient, tem);
4964 expand_inc (quotient, const1_rtx);
4965 emit_label (label2);
4968 else /* signed */
4970 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4971 && INTVAL (op1) >= 0)
4973 /* This is extremely similar to the code for the unsigned case
4974 above. For 2.7 we should merge these variants, but for
4975 2.6.1 I don't want to touch the code for unsigned since that
4976 get used in C. The signed case will only be used by other
4977 languages (Ada). */
4979 rtx t1, t2, t3;
4980 unsigned HOST_WIDE_INT d = INTVAL (op1);
4981 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4982 floor_log2 (d), tquotient, 0);
4983 t2 = expand_binop (compute_mode, and_optab, op0,
4984 gen_int_mode (d - 1, compute_mode),
4985 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4986 t3 = gen_reg_rtx (compute_mode);
4987 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4988 compute_mode, 1, 1);
4989 if (t3 == 0)
4991 rtx_code_label *lab;
4992 lab = gen_label_rtx ();
4993 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4994 expand_inc (t1, const1_rtx);
4995 emit_label (lab);
4996 quotient = t1;
4998 else
4999 quotient = force_operand (gen_rtx_PLUS (compute_mode,
5000 t1, t3),
5001 tquotient);
5002 break;
5005 /* Try using an instruction that produces both the quotient and
5006 remainder, using truncation. We can easily compensate the
5007 quotient or remainder to get ceiling rounding, once we have the
5008 remainder. Notice that we compute also the final remainder
5009 value here, and return the result right away. */
5010 if (target == 0 || GET_MODE (target) != compute_mode)
5011 target = gen_reg_rtx (compute_mode);
5012 if (rem_flag)
5014 remainder= (REG_P (target)
5015 ? target : gen_reg_rtx (compute_mode));
5016 quotient = gen_reg_rtx (compute_mode);
5018 else
5020 quotient = (REG_P (target)
5021 ? target : gen_reg_rtx (compute_mode));
5022 remainder = gen_reg_rtx (compute_mode);
5025 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5026 remainder, 0))
5028 /* This could be computed with a branch-less sequence.
5029 Save that for later. */
5030 rtx tem;
5031 rtx_code_label *label = gen_label_rtx ();
5032 do_cmp_and_jump (remainder, const0_rtx, EQ,
5033 compute_mode, label);
5034 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5035 NULL_RTX, 0, OPTAB_WIDEN);
5036 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5037 expand_inc (quotient, const1_rtx);
5038 expand_dec (remainder, op1);
5039 emit_label (label);
5040 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5043 /* No luck with division elimination or divmod. Have to do it
5044 by conditionally adjusting op0 *and* the result. */
5046 rtx_code_label *label1, *label2, *label3, *label4, *label5;
5047 rtx adjusted_op0;
5048 rtx tem;
5050 quotient = gen_reg_rtx (compute_mode);
5051 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5052 label1 = gen_label_rtx ();
5053 label2 = gen_label_rtx ();
5054 label3 = gen_label_rtx ();
5055 label4 = gen_label_rtx ();
5056 label5 = gen_label_rtx ();
5057 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5058 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5059 compute_mode, label1);
5060 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5061 quotient, 0, OPTAB_LIB_WIDEN);
5062 if (tem != quotient)
5063 emit_move_insn (quotient, tem);
5064 emit_jump_insn (targetm.gen_jump (label5));
5065 emit_barrier ();
5066 emit_label (label1);
5067 expand_dec (adjusted_op0, const1_rtx);
5068 emit_jump_insn (targetm.gen_jump (label4));
5069 emit_barrier ();
5070 emit_label (label2);
5071 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5072 compute_mode, label3);
5073 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5074 quotient, 0, OPTAB_LIB_WIDEN);
5075 if (tem != quotient)
5076 emit_move_insn (quotient, tem);
5077 emit_jump_insn (targetm.gen_jump (label5));
5078 emit_barrier ();
5079 emit_label (label3);
5080 expand_inc (adjusted_op0, const1_rtx);
5081 emit_label (label4);
5082 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5083 quotient, 0, OPTAB_LIB_WIDEN);
5084 if (tem != quotient)
5085 emit_move_insn (quotient, tem);
5086 expand_inc (quotient, const1_rtx);
5087 emit_label (label5);
5090 break;
5092 case EXACT_DIV_EXPR:
5093 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5095 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5096 int size = GET_MODE_BITSIZE (int_mode);
5097 HOST_WIDE_INT d = INTVAL (op1);
5098 unsigned HOST_WIDE_INT ml;
5099 int pre_shift;
5100 rtx t1;
5102 pre_shift = ctz_or_zero (d);
5103 ml = invert_mod2n (d >> pre_shift, size);
5104 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5105 pre_shift, NULL_RTX, unsignedp);
5106 quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5107 NULL_RTX, 1);
5109 insn = get_last_insn ();
5110 set_dst_reg_note (insn, REG_EQUAL,
5111 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5112 int_mode, op0, op1),
5113 quotient);
5115 break;
5117 case ROUND_DIV_EXPR:
5118 case ROUND_MOD_EXPR:
5119 if (unsignedp)
5121 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5122 rtx tem;
5123 rtx_code_label *label;
5124 label = gen_label_rtx ();
5125 quotient = gen_reg_rtx (int_mode);
5126 remainder = gen_reg_rtx (int_mode);
5127 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5129 rtx tem;
5130 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5131 quotient, 1, OPTAB_LIB_WIDEN);
5132 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5133 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5134 remainder, 1, OPTAB_LIB_WIDEN);
5136 tem = plus_constant (int_mode, op1, -1);
5137 tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5138 do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5139 expand_inc (quotient, const1_rtx);
5140 expand_dec (remainder, op1);
5141 emit_label (label);
5143 else
5145 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5146 int size = GET_MODE_BITSIZE (int_mode);
5147 rtx abs_rem, abs_op1, tem, mask;
5148 rtx_code_label *label;
5149 label = gen_label_rtx ();
5150 quotient = gen_reg_rtx (int_mode);
5151 remainder = gen_reg_rtx (int_mode);
5152 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5154 rtx tem;
5155 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5156 quotient, 0, OPTAB_LIB_WIDEN);
5157 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5158 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5159 remainder, 0, OPTAB_LIB_WIDEN);
5161 abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5162 abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5163 tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5164 1, NULL_RTX, 1);
5165 do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5166 tem = expand_binop (int_mode, xor_optab, op0, op1,
5167 NULL_RTX, 0, OPTAB_WIDEN);
5168 mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5169 size - 1, NULL_RTX, 0);
5170 tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5171 NULL_RTX, 0, OPTAB_WIDEN);
5172 tem = expand_binop (int_mode, sub_optab, tem, mask,
5173 NULL_RTX, 0, OPTAB_WIDEN);
5174 expand_inc (quotient, tem);
5175 tem = expand_binop (int_mode, xor_optab, mask, op1,
5176 NULL_RTX, 0, OPTAB_WIDEN);
5177 tem = expand_binop (int_mode, sub_optab, tem, mask,
5178 NULL_RTX, 0, OPTAB_WIDEN);
5179 expand_dec (remainder, tem);
5180 emit_label (label);
5182 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5184 default:
5185 gcc_unreachable ();
5188 if (quotient == 0)
5190 if (target && GET_MODE (target) != compute_mode)
5191 target = 0;
5193 if (rem_flag)
5195 /* Try to produce the remainder without producing the quotient.
5196 If we seem to have a divmod pattern that does not require widening,
5197 don't try widening here. We should really have a WIDEN argument
5198 to expand_twoval_binop, since what we'd really like to do here is
5199 1) try a mod insn in compute_mode
5200 2) try a divmod insn in compute_mode
5201 3) try a div insn in compute_mode and multiply-subtract to get
5202 remainder
5203 4) try the same things with widening allowed. */
5204 remainder
5205 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5206 op0, op1, target,
5207 unsignedp,
5208 ((optab_handler (optab2, compute_mode)
5209 != CODE_FOR_nothing)
5210 ? OPTAB_DIRECT : OPTAB_WIDEN));
5211 if (remainder == 0)
5213 /* No luck there. Can we do remainder and divide at once
5214 without a library call? */
5215 remainder = gen_reg_rtx (compute_mode);
5216 if (! expand_twoval_binop ((unsignedp
5217 ? udivmod_optab
5218 : sdivmod_optab),
5219 op0, op1,
5220 NULL_RTX, remainder, unsignedp))
5221 remainder = 0;
5224 if (remainder)
5225 return gen_lowpart (mode, remainder);
5228 /* Produce the quotient. Try a quotient insn, but not a library call.
5229 If we have a divmod in this mode, use it in preference to widening
5230 the div (for this test we assume it will not fail). Note that optab2
5231 is set to the one of the two optabs that the call below will use. */
5232 quotient
5233 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5234 op0, op1, rem_flag ? NULL_RTX : target,
5235 unsignedp,
5236 ((optab_handler (optab2, compute_mode)
5237 != CODE_FOR_nothing)
5238 ? OPTAB_DIRECT : OPTAB_WIDEN));
5240 if (quotient == 0)
5242 /* No luck there. Try a quotient-and-remainder insn,
5243 keeping the quotient alone. */
5244 quotient = gen_reg_rtx (compute_mode);
5245 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5246 op0, op1,
5247 quotient, NULL_RTX, unsignedp))
5249 quotient = 0;
5250 if (! rem_flag)
5251 /* Still no luck. If we are not computing the remainder,
5252 use a library call for the quotient. */
5253 quotient = sign_expand_binop (compute_mode,
5254 udiv_optab, sdiv_optab,
5255 op0, op1, target,
5256 unsignedp, OPTAB_LIB_WIDEN);
5261 if (rem_flag)
5263 if (target && GET_MODE (target) != compute_mode)
5264 target = 0;
5266 if (quotient == 0)
5268 /* No divide instruction either. Use library for remainder. */
5269 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5270 op0, op1, target,
5271 unsignedp, OPTAB_LIB_WIDEN);
5272 /* No remainder function. Try a quotient-and-remainder
5273 function, keeping the remainder. */
5274 if (!remainder)
5276 remainder = gen_reg_rtx (compute_mode);
5277 if (!expand_twoval_binop_libfunc
5278 (unsignedp ? udivmod_optab : sdivmod_optab,
5279 op0, op1,
5280 NULL_RTX, remainder,
5281 unsignedp ? UMOD : MOD))
5282 remainder = NULL_RTX;
5285 else
5287 /* We divided. Now finish doing X - Y * (X / Y). */
5288 remainder = expand_mult (compute_mode, quotient, op1,
5289 NULL_RTX, unsignedp);
5290 remainder = expand_binop (compute_mode, sub_optab, op0,
5291 remainder, target, unsignedp,
5292 OPTAB_LIB_WIDEN);
5296 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5299 /* Return a tree node with data type TYPE, describing the value of X.
5300 Usually this is an VAR_DECL, if there is no obvious better choice.
5301 X may be an expression, however we only support those expressions
5302 generated by loop.c. */
5304 tree
5305 make_tree (tree type, rtx x)
5307 tree t;
5309 switch (GET_CODE (x))
5311 case CONST_INT:
5312 case CONST_WIDE_INT:
5313 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5314 return t;
5316 case CONST_DOUBLE:
5317 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5318 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5319 t = wide_int_to_tree (type,
5320 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5321 HOST_BITS_PER_WIDE_INT * 2));
5322 else
5323 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5325 return t;
5327 case CONST_VECTOR:
5329 unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5330 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5331 tree itype = TREE_TYPE (type);
5333 /* Build a tree with vector elements. */
5334 tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5335 unsigned int count = elts.encoded_nelts ();
5336 for (unsigned int i = 0; i < count; ++i)
5338 rtx elt = CONST_VECTOR_ELT (x, i);
5339 elts.quick_push (make_tree (itype, elt));
5342 return elts.build ();
5345 case PLUS:
5346 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5347 make_tree (type, XEXP (x, 1)));
5349 case MINUS:
5350 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5351 make_tree (type, XEXP (x, 1)));
5353 case NEG:
5354 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5356 case MULT:
5357 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5358 make_tree (type, XEXP (x, 1)));
5360 case ASHIFT:
5361 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5362 make_tree (type, XEXP (x, 1)));
5364 case LSHIFTRT:
5365 t = unsigned_type_for (type);
5366 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5367 make_tree (t, XEXP (x, 0)),
5368 make_tree (type, XEXP (x, 1))));
5370 case ASHIFTRT:
5371 t = signed_type_for (type);
5372 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5373 make_tree (t, XEXP (x, 0)),
5374 make_tree (type, XEXP (x, 1))));
5376 case DIV:
5377 if (TREE_CODE (type) != REAL_TYPE)
5378 t = signed_type_for (type);
5379 else
5380 t = type;
5382 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5383 make_tree (t, XEXP (x, 0)),
5384 make_tree (t, XEXP (x, 1))));
5385 case UDIV:
5386 t = unsigned_type_for (type);
5387 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5388 make_tree (t, XEXP (x, 0)),
5389 make_tree (t, XEXP (x, 1))));
5391 case SIGN_EXTEND:
5392 case ZERO_EXTEND:
5393 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5394 GET_CODE (x) == ZERO_EXTEND);
5395 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5397 case CONST:
5398 return make_tree (type, XEXP (x, 0));
5400 case SYMBOL_REF:
5401 t = SYMBOL_REF_DECL (x);
5402 if (t)
5403 return fold_convert (type, build_fold_addr_expr (t));
5404 /* fall through. */
5406 default:
5407 if (CONST_POLY_INT_P (x))
5408 return wide_int_to_tree (t, const_poly_int_value (x));
5410 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5412 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5413 address mode to pointer mode. */
5414 if (POINTER_TYPE_P (type))
5415 x = convert_memory_address_addr_space
5416 (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5418 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5419 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5420 t->decl_with_rtl.rtl = x;
5422 return t;
5426 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5427 and returning TARGET.
5429 If TARGET is 0, a pseudo-register or constant is returned. */
5432 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5434 rtx tem = 0;
5436 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5437 tem = simplify_binary_operation (AND, mode, op0, op1);
5438 if (tem == 0)
5439 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5441 if (target == 0)
5442 target = tem;
5443 else if (tem != target)
5444 emit_move_insn (target, tem);
5445 return target;
5448 /* Helper function for emit_store_flag. */
5450 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5451 machine_mode mode, machine_mode compare_mode,
5452 int unsignedp, rtx x, rtx y, int normalizep,
5453 machine_mode target_mode)
5455 class expand_operand ops[4];
5456 rtx op0, comparison, subtarget;
5457 rtx_insn *last;
5458 scalar_int_mode result_mode = targetm.cstore_mode (icode);
5459 scalar_int_mode int_target_mode;
5461 last = get_last_insn ();
5462 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5463 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5464 if (!x || !y)
5466 delete_insns_since (last);
5467 return NULL_RTX;
5470 if (target_mode == VOIDmode)
5471 int_target_mode = result_mode;
5472 else
5473 int_target_mode = as_a <scalar_int_mode> (target_mode);
5474 if (!target)
5475 target = gen_reg_rtx (int_target_mode);
5477 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5479 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5480 create_fixed_operand (&ops[1], comparison);
5481 create_fixed_operand (&ops[2], x);
5482 create_fixed_operand (&ops[3], y);
5483 if (!maybe_expand_insn (icode, 4, ops))
5485 delete_insns_since (last);
5486 return NULL_RTX;
5488 subtarget = ops[0].value;
5490 /* If we are converting to a wider mode, first convert to
5491 INT_TARGET_MODE, then normalize. This produces better combining
5492 opportunities on machines that have a SIGN_EXTRACT when we are
5493 testing a single bit. This mostly benefits the 68k.
5495 If STORE_FLAG_VALUE does not have the sign bit set when
5496 interpreted in MODE, we can do this conversion as unsigned, which
5497 is usually more efficient. */
5498 if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5500 gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5501 || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5503 bool unsignedp = (STORE_FLAG_VALUE >= 0);
5504 convert_move (target, subtarget, unsignedp);
5506 op0 = target;
5507 result_mode = int_target_mode;
5509 else
5510 op0 = subtarget;
5512 /* If we want to keep subexpressions around, don't reuse our last
5513 target. */
5514 if (optimize)
5515 subtarget = 0;
5517 /* Now normalize to the proper value in MODE. Sometimes we don't
5518 have to do anything. */
5519 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5521 /* STORE_FLAG_VALUE might be the most negative number, so write
5522 the comparison this way to avoid a compiler-time warning. */
5523 else if (- normalizep == STORE_FLAG_VALUE)
5524 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5526 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5527 it hard to use a value of just the sign bit due to ANSI integer
5528 constant typing rules. */
5529 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5530 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5531 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5532 normalizep == 1);
5533 else
5535 gcc_assert (STORE_FLAG_VALUE & 1);
5537 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5538 if (normalizep == -1)
5539 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5542 /* If we were converting to a smaller mode, do the conversion now. */
5543 if (int_target_mode != result_mode)
5545 convert_move (target, op0, 0);
5546 return target;
5548 else
5549 return op0;
5553 /* A subroutine of emit_store_flag only including "tricks" that do not
5554 need a recursive call. These are kept separate to avoid infinite
5555 loops. */
5557 static rtx
5558 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5559 machine_mode mode, int unsignedp, int normalizep,
5560 machine_mode target_mode)
5562 rtx subtarget;
5563 enum insn_code icode;
5564 machine_mode compare_mode;
5565 enum mode_class mclass;
5566 enum rtx_code scode;
5568 if (unsignedp)
5569 code = unsigned_condition (code);
5570 scode = swap_condition (code);
5572 /* If one operand is constant, make it the second one. Only do this
5573 if the other operand is not constant as well. */
5575 if (swap_commutative_operands_p (op0, op1))
5577 std::swap (op0, op1);
5578 code = swap_condition (code);
5581 if (mode == VOIDmode)
5582 mode = GET_MODE (op0);
5584 if (CONST_SCALAR_INT_P (op1))
5585 canonicalize_comparison (mode, &code, &op1);
5587 /* For some comparisons with 1 and -1, we can convert this to
5588 comparisons with zero. This will often produce more opportunities for
5589 store-flag insns. */
5591 switch (code)
5593 case LT:
5594 if (op1 == const1_rtx)
5595 op1 = const0_rtx, code = LE;
5596 break;
5597 case LE:
5598 if (op1 == constm1_rtx)
5599 op1 = const0_rtx, code = LT;
5600 break;
5601 case GE:
5602 if (op1 == const1_rtx)
5603 op1 = const0_rtx, code = GT;
5604 break;
5605 case GT:
5606 if (op1 == constm1_rtx)
5607 op1 = const0_rtx, code = GE;
5608 break;
5609 case GEU:
5610 if (op1 == const1_rtx)
5611 op1 = const0_rtx, code = NE;
5612 break;
5613 case LTU:
5614 if (op1 == const1_rtx)
5615 op1 = const0_rtx, code = EQ;
5616 break;
5617 default:
5618 break;
5621 /* If we are comparing a double-word integer with zero or -1, we can
5622 convert the comparison into one involving a single word. */
5623 scalar_int_mode int_mode;
5624 if (is_int_mode (mode, &int_mode)
5625 && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5626 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5628 rtx tem;
5629 if ((code == EQ || code == NE)
5630 && (op1 == const0_rtx || op1 == constm1_rtx))
5632 rtx op00, op01;
5634 /* Do a logical OR or AND of the two words and compare the
5635 result. */
5636 op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5637 op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5638 tem = expand_binop (word_mode,
5639 op1 == const0_rtx ? ior_optab : and_optab,
5640 op00, op01, NULL_RTX, unsignedp,
5641 OPTAB_DIRECT);
5643 if (tem != 0)
5644 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5645 unsignedp, normalizep);
5647 else if ((code == LT || code == GE) && op1 == const0_rtx)
5649 rtx op0h;
5651 /* If testing the sign bit, can just test on high word. */
5652 op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5653 subreg_highpart_offset (word_mode,
5654 int_mode));
5655 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5656 unsignedp, normalizep);
5658 else
5659 tem = NULL_RTX;
5661 if (tem)
5663 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5664 return tem;
5665 if (!target)
5666 target = gen_reg_rtx (target_mode);
5668 convert_move (target, tem,
5669 !val_signbit_known_set_p (word_mode,
5670 (normalizep ? normalizep
5671 : STORE_FLAG_VALUE)));
5672 return target;
5676 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5677 complement of A (for GE) and shifting the sign bit to the low bit. */
5678 if (op1 == const0_rtx && (code == LT || code == GE)
5679 && is_int_mode (mode, &int_mode)
5680 && (normalizep || STORE_FLAG_VALUE == 1
5681 || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5683 scalar_int_mode int_target_mode;
5684 subtarget = target;
5686 if (!target)
5687 int_target_mode = int_mode;
5688 else
5690 /* If the result is to be wider than OP0, it is best to convert it
5691 first. If it is to be narrower, it is *incorrect* to convert it
5692 first. */
5693 int_target_mode = as_a <scalar_int_mode> (target_mode);
5694 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5696 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5697 int_mode = int_target_mode;
5701 if (int_target_mode != int_mode)
5702 subtarget = 0;
5704 if (code == GE)
5705 op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5706 ((STORE_FLAG_VALUE == 1 || normalizep)
5707 ? 0 : subtarget), 0);
5709 if (STORE_FLAG_VALUE == 1 || normalizep)
5710 /* If we are supposed to produce a 0/1 value, we want to do
5711 a logical shift from the sign bit to the low-order bit; for
5712 a -1/0 value, we do an arithmetic shift. */
5713 op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5714 GET_MODE_BITSIZE (int_mode) - 1,
5715 subtarget, normalizep != -1);
5717 if (int_mode != int_target_mode)
5718 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5720 return op0;
5723 mclass = GET_MODE_CLASS (mode);
5724 FOR_EACH_MODE_FROM (compare_mode, mode)
5726 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5727 icode = optab_handler (cstore_optab, optab_mode);
5728 if (icode != CODE_FOR_nothing)
5730 do_pending_stack_adjust ();
5731 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5732 unsignedp, op0, op1, normalizep, target_mode);
5733 if (tem)
5734 return tem;
5736 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5738 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5739 unsignedp, op1, op0, normalizep, target_mode);
5740 if (tem)
5741 return tem;
5743 break;
5747 return 0;
5750 /* Subroutine of emit_store_flag that handles cases in which the operands
5751 are scalar integers. SUBTARGET is the target to use for temporary
5752 operations and TRUEVAL is the value to store when the condition is
5753 true. All other arguments are as for emit_store_flag. */
5756 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5757 rtx op1, scalar_int_mode mode, int unsignedp,
5758 int normalizep, rtx trueval)
5760 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5761 rtx_insn *last = get_last_insn ();
5763 /* If this is an equality comparison of integers, we can try to exclusive-or
5764 (or subtract) the two operands and use a recursive call to try the
5765 comparison with zero. Don't do any of these cases if branches are
5766 very cheap. */
5768 if ((code == EQ || code == NE) && op1 != const0_rtx)
5770 rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5771 OPTAB_WIDEN);
5773 if (tem == 0)
5774 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5775 OPTAB_WIDEN);
5776 if (tem != 0)
5777 tem = emit_store_flag (target, code, tem, const0_rtx,
5778 mode, unsignedp, normalizep);
5779 if (tem != 0)
5780 return tem;
5782 delete_insns_since (last);
5785 /* For integer comparisons, try the reverse comparison. However, for
5786 small X and if we'd have anyway to extend, implementing "X != 0"
5787 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5788 rtx_code rcode = reverse_condition (code);
5789 if (can_compare_p (rcode, mode, ccp_store_flag)
5790 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5791 && code == NE
5792 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5793 && op1 == const0_rtx))
5795 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5796 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5798 /* Again, for the reverse comparison, use either an addition or a XOR. */
5799 if (want_add
5800 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5801 optimize_insn_for_speed_p ()) == 0)
5803 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5804 STORE_FLAG_VALUE, target_mode);
5805 if (tem != 0)
5806 tem = expand_binop (target_mode, add_optab, tem,
5807 gen_int_mode (normalizep, target_mode),
5808 target, 0, OPTAB_WIDEN);
5809 if (tem != 0)
5810 return tem;
5812 else if (!want_add
5813 && rtx_cost (trueval, mode, XOR, 1,
5814 optimize_insn_for_speed_p ()) == 0)
5816 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5817 normalizep, target_mode);
5818 if (tem != 0)
5819 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5820 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5821 if (tem != 0)
5822 return tem;
5825 delete_insns_since (last);
5828 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5829 the constant zero. Reject all other comparisons at this point. Only
5830 do LE and GT if branches are expensive since they are expensive on
5831 2-operand machines. */
5833 if (op1 != const0_rtx
5834 || (code != EQ && code != NE
5835 && (BRANCH_COST (optimize_insn_for_speed_p (),
5836 false) <= 1 || (code != LE && code != GT))))
5837 return 0;
5839 /* Try to put the result of the comparison in the sign bit. Assume we can't
5840 do the necessary operation below. */
5842 rtx tem = 0;
5844 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5845 the sign bit set. */
5847 if (code == LE)
5849 /* This is destructive, so SUBTARGET can't be OP0. */
5850 if (rtx_equal_p (subtarget, op0))
5851 subtarget = 0;
5853 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5854 OPTAB_WIDEN);
5855 if (tem)
5856 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5857 OPTAB_WIDEN);
5860 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5861 number of bits in the mode of OP0, minus one. */
5863 if (code == GT)
5865 if (rtx_equal_p (subtarget, op0))
5866 subtarget = 0;
5868 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5869 GET_MODE_BITSIZE (mode) - 1,
5870 subtarget, 0);
5871 if (tem)
5872 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5873 OPTAB_WIDEN);
5876 if (code == EQ || code == NE)
5878 /* For EQ or NE, one way to do the comparison is to apply an operation
5879 that converts the operand into a positive number if it is nonzero
5880 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5881 for NE we negate. This puts the result in the sign bit. Then we
5882 normalize with a shift, if needed.
5884 Two operations that can do the above actions are ABS and FFS, so try
5885 them. If that doesn't work, and MODE is smaller than a full word,
5886 we can use zero-extension to the wider mode (an unsigned conversion)
5887 as the operation. */
5889 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5890 that is compensated by the subsequent overflow when subtracting
5891 one / negating. */
5893 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5894 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5895 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5896 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5897 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5899 tem = convert_modes (word_mode, mode, op0, 1);
5900 mode = word_mode;
5903 if (tem != 0)
5905 if (code == EQ)
5906 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5907 0, OPTAB_WIDEN);
5908 else
5909 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5912 /* If we couldn't do it that way, for NE we can "or" the two's complement
5913 of the value with itself. For EQ, we take the one's complement of
5914 that "or", which is an extra insn, so we only handle EQ if branches
5915 are expensive. */
5917 if (tem == 0
5918 && (code == NE
5919 || BRANCH_COST (optimize_insn_for_speed_p (),
5920 false) > 1))
5922 if (rtx_equal_p (subtarget, op0))
5923 subtarget = 0;
5925 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5926 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5927 OPTAB_WIDEN);
5929 if (tem && code == EQ)
5930 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5934 if (tem && normalizep)
5935 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5936 GET_MODE_BITSIZE (mode) - 1,
5937 subtarget, normalizep == 1);
5939 if (tem)
5941 if (!target)
5943 else if (GET_MODE (tem) != target_mode)
5945 convert_move (target, tem, 0);
5946 tem = target;
5948 else if (!subtarget)
5950 emit_move_insn (target, tem);
5951 tem = target;
5954 else
5955 delete_insns_since (last);
5957 return tem;
5960 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5961 and storing in TARGET. Normally return TARGET.
5962 Return 0 if that cannot be done.
5964 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5965 it is VOIDmode, they cannot both be CONST_INT.
5967 UNSIGNEDP is for the case where we have to widen the operands
5968 to perform the operation. It says to use zero-extension.
5970 NORMALIZEP is 1 if we should convert the result to be either zero
5971 or one. Normalize is -1 if we should convert the result to be
5972 either zero or -1. If NORMALIZEP is zero, the result will be left
5973 "raw" out of the scc insn. */
5976 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5977 machine_mode mode, int unsignedp, int normalizep)
5979 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5980 enum rtx_code rcode;
5981 rtx subtarget;
5982 rtx tem, trueval;
5983 rtx_insn *last;
5985 /* If we compare constants, we shouldn't use a store-flag operation,
5986 but a constant load. We can get there via the vanilla route that
5987 usually generates a compare-branch sequence, but will in this case
5988 fold the comparison to a constant, and thus elide the branch. */
5989 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5990 return NULL_RTX;
5992 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5993 target_mode);
5994 if (tem)
5995 return tem;
5997 /* If we reached here, we can't do this with a scc insn, however there
5998 are some comparisons that can be done in other ways. Don't do any
5999 of these cases if branches are very cheap. */
6000 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6001 return 0;
6003 /* See what we need to return. We can only return a 1, -1, or the
6004 sign bit. */
6006 if (normalizep == 0)
6008 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6009 normalizep = STORE_FLAG_VALUE;
6011 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6013 else
6014 return 0;
6017 last = get_last_insn ();
6019 /* If optimizing, use different pseudo registers for each insn, instead
6020 of reusing the same pseudo. This leads to better CSE, but slows
6021 down the compiler, since there are more pseudos. */
6022 subtarget = (!optimize
6023 && (target_mode == mode)) ? target : NULL_RTX;
6024 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6026 /* For floating-point comparisons, try the reverse comparison or try
6027 changing the "orderedness" of the comparison. */
6028 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6030 enum rtx_code first_code;
6031 bool and_them;
6033 rcode = reverse_condition_maybe_unordered (code);
6034 if (can_compare_p (rcode, mode, ccp_store_flag)
6035 && (code == ORDERED || code == UNORDERED
6036 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6037 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6039 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6040 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6042 /* For the reverse comparison, use either an addition or a XOR. */
6043 if (want_add
6044 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6045 optimize_insn_for_speed_p ()) == 0)
6047 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6048 STORE_FLAG_VALUE, target_mode);
6049 if (tem)
6050 return expand_binop (target_mode, add_optab, tem,
6051 gen_int_mode (normalizep, target_mode),
6052 target, 0, OPTAB_WIDEN);
6054 else if (!want_add
6055 && rtx_cost (trueval, mode, XOR, 1,
6056 optimize_insn_for_speed_p ()) == 0)
6058 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6059 normalizep, target_mode);
6060 if (tem)
6061 return expand_binop (target_mode, xor_optab, tem, trueval,
6062 target, INTVAL (trueval) >= 0,
6063 OPTAB_WIDEN);
6067 delete_insns_since (last);
6069 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
6070 if (code == ORDERED || code == UNORDERED)
6071 return 0;
6073 and_them = split_comparison (code, mode, &first_code, &code);
6075 /* If there are no NaNs, the first comparison should always fall through.
6076 Effectively change the comparison to the other one. */
6077 if (!HONOR_NANS (mode))
6079 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6080 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6081 target_mode);
6084 if (!HAVE_conditional_move)
6085 return 0;
6087 /* Do not turn a trapping comparison into a non-trapping one. */
6088 if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6089 && flag_trapping_math)
6090 return 0;
6092 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6093 conditional move. */
6094 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6095 normalizep, target_mode);
6096 if (tem == 0)
6097 return 0;
6099 if (and_them)
6100 tem = emit_conditional_move (target, code, op0, op1, mode,
6101 tem, const0_rtx, GET_MODE (tem), 0);
6102 else
6103 tem = emit_conditional_move (target, code, op0, op1, mode,
6104 trueval, tem, GET_MODE (tem), 0);
6106 if (tem == 0)
6107 delete_insns_since (last);
6108 return tem;
6111 /* The remaining tricks only apply to integer comparisons. */
6113 scalar_int_mode int_mode;
6114 if (is_int_mode (mode, &int_mode))
6115 return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6116 unsignedp, normalizep, trueval);
6118 return 0;
6121 /* Like emit_store_flag, but always succeeds. */
6124 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6125 machine_mode mode, int unsignedp, int normalizep)
6127 rtx tem;
6128 rtx_code_label *label;
6129 rtx trueval, falseval;
6131 /* First see if emit_store_flag can do the job. */
6132 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6133 if (tem != 0)
6134 return tem;
6136 /* If one operand is constant, make it the second one. Only do this
6137 if the other operand is not constant as well. */
6138 if (swap_commutative_operands_p (op0, op1))
6140 std::swap (op0, op1);
6141 code = swap_condition (code);
6144 if (mode == VOIDmode)
6145 mode = GET_MODE (op0);
6147 if (!target)
6148 target = gen_reg_rtx (word_mode);
6150 /* If this failed, we have to do this with set/compare/jump/set code.
6151 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
6152 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6153 if (code == NE
6154 && GET_MODE_CLASS (mode) == MODE_INT
6155 && REG_P (target)
6156 && op0 == target
6157 && op1 == const0_rtx)
6159 label = gen_label_rtx ();
6160 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6161 NULL_RTX, NULL, label,
6162 profile_probability::uninitialized ());
6163 emit_move_insn (target, trueval);
6164 emit_label (label);
6165 return target;
6168 if (!REG_P (target)
6169 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6170 target = gen_reg_rtx (GET_MODE (target));
6172 /* Jump in the right direction if the target cannot implement CODE
6173 but can jump on its reverse condition. */
6174 falseval = const0_rtx;
6175 if (! can_compare_p (code, mode, ccp_jump)
6176 && (! FLOAT_MODE_P (mode)
6177 || code == ORDERED || code == UNORDERED
6178 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6179 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6181 enum rtx_code rcode;
6182 if (FLOAT_MODE_P (mode))
6183 rcode = reverse_condition_maybe_unordered (code);
6184 else
6185 rcode = reverse_condition (code);
6187 /* Canonicalize to UNORDERED for the libcall. */
6188 if (can_compare_p (rcode, mode, ccp_jump)
6189 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6191 falseval = trueval;
6192 trueval = const0_rtx;
6193 code = rcode;
6197 emit_move_insn (target, trueval);
6198 label = gen_label_rtx ();
6199 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6200 label, profile_probability::uninitialized ());
6202 emit_move_insn (target, falseval);
6203 emit_label (label);
6205 return target;
6208 /* Helper function for canonicalize_cmp_for_target. Swap between inclusive
6209 and exclusive ranges in order to create an equivalent comparison. See
6210 canonicalize_cmp_for_target for the possible cases. */
6212 static enum rtx_code
6213 equivalent_cmp_code (enum rtx_code code)
6215 switch (code)
6217 case GT:
6218 return GE;
6219 case GE:
6220 return GT;
6221 case LT:
6222 return LE;
6223 case LE:
6224 return LT;
6225 case GTU:
6226 return GEU;
6227 case GEU:
6228 return GTU;
6229 case LTU:
6230 return LEU;
6231 case LEU:
6232 return LTU;
6234 default:
6235 return code;
6239 /* Choose the more appropiate immediate in scalar integer comparisons. The
6240 purpose of this is to end up with an immediate which can be loaded into a
6241 register in fewer moves, if possible.
6243 For each integer comparison there exists an equivalent choice:
6244 i) a > b or a >= b + 1
6245 ii) a <= b or a < b + 1
6246 iii) a >= b or a > b - 1
6247 iv) a < b or a <= b - 1
6249 MODE is the mode of the first operand.
6250 CODE points to the comparison code.
6251 IMM points to the rtx containing the immediate. *IMM must satisfy
6252 CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6253 on exit. */
6255 void
6256 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6258 if (!SCALAR_INT_MODE_P (mode))
6259 return;
6261 int to_add = 0;
6262 enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6264 /* Extract the immediate value from the rtx. */
6265 wide_int imm_val = rtx_mode_t (*imm, mode);
6267 if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6268 to_add = 1;
6269 else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6270 to_add = -1;
6271 else
6272 return;
6274 /* Check for overflow/underflow in the case of signed values and
6275 wrapping around in the case of unsigned values. If any occur
6276 cancel the optimization. */
6277 wi::overflow_type overflow = wi::OVF_NONE;
6278 wide_int imm_modif;
6280 if (to_add == 1)
6281 imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6282 else
6283 imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6285 if (overflow)
6286 return;
6288 /* The following creates a pseudo; if we cannot do that, bail out. */
6289 if (!can_create_pseudo_p ())
6290 return;
6292 rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6293 rtx new_imm = immed_wide_int_const (imm_modif, mode);
6295 rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6296 rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6298 /* Update the immediate and the code. */
6299 if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6301 *code = equivalent_cmp_code (*code);
6302 *imm = new_imm;
6308 /* Perform possibly multi-word comparison and conditional jump to LABEL
6309 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6310 now a thin wrapper around do_compare_rtx_and_jump. */
6312 static void
6313 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6314 rtx_code_label *label)
6316 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6317 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6318 NULL, label, profile_probability::uninitialized ());