c++: Implement modules ABI for vtable emissions
[official-gcc.git] / gcc / expmed.cc
blob4ec035e4843b34e917008beab193e898ecd9f548
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2024 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Work around tree-optimization/91825. */
22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "predict.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "optabs.h"
35 #include "expmed.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "dojump.h"
42 #include "explow.h"
43 #include "expr.h"
44 #include "langhooks.h"
45 #include "tree-vector-builder.h"
46 #include "recog.h"
48 struct target_expmed default_target_expmed;
49 #if SWITCHABLE_TARGET
50 struct target_expmed *this_target_expmed = &default_target_expmed;
51 #endif
53 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
54 unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 poly_uint64, poly_uint64,
57 machine_mode, rtx, bool, bool);
58 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
59 unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT,
61 poly_uint64, poly_uint64,
62 rtx, scalar_int_mode, bool);
63 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
64 unsigned HOST_WIDE_INT,
65 unsigned HOST_WIDE_INT,
66 rtx, scalar_int_mode, bool);
67 static void store_split_bit_field (rtx, opt_scalar_int_mode,
68 unsigned HOST_WIDE_INT,
69 unsigned HOST_WIDE_INT,
70 poly_uint64, poly_uint64,
71 rtx, scalar_int_mode, bool);
72 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
73 unsigned HOST_WIDE_INT,
74 unsigned HOST_WIDE_INT, int, rtx,
75 machine_mode, machine_mode, bool, bool);
76 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
77 unsigned HOST_WIDE_INT,
78 unsigned HOST_WIDE_INT, rtx, int, bool);
79 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
80 unsigned HOST_WIDE_INT,
81 unsigned HOST_WIDE_INT, rtx, int, bool);
82 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
83 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
84 unsigned HOST_WIDE_INT,
85 unsigned HOST_WIDE_INT, int, bool);
86 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
87 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
88 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
90 /* Return a constant integer mask value of mode MODE with BITSIZE ones
91 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
92 The mask is truncated if necessary to the width of mode MODE. The
93 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
95 static inline rtx
96 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
98 return immed_wide_int_const
99 (wi::shifted_mask (bitpos, bitsize, complement,
100 GET_MODE_PRECISION (mode)), mode);
103 /* Test whether a value is zero of a power of two. */
104 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
105 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
107 struct init_expmed_rtl
109 rtx reg;
110 rtx plus;
111 rtx neg;
112 rtx mult;
113 rtx sdiv;
114 rtx udiv;
115 rtx sdiv_32;
116 rtx smod_32;
117 rtx wide_mult;
118 rtx wide_lshr;
119 rtx wide_trunc;
120 rtx shift;
121 rtx shift_mult;
122 rtx shift_add;
123 rtx shift_sub0;
124 rtx shift_sub1;
125 rtx zext;
126 rtx trunc;
128 rtx pow2[MAX_BITS_PER_WORD];
129 rtx cint[MAX_BITS_PER_WORD];
132 static void
133 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
134 scalar_int_mode from_mode, bool speed)
136 int to_size, from_size;
137 rtx which;
139 to_size = GET_MODE_PRECISION (to_mode);
140 from_size = GET_MODE_PRECISION (from_mode);
142 /* Most partial integers have a precision less than the "full"
143 integer it requires for storage. In case one doesn't, for
144 comparison purposes here, reduce the bit size by one in that
145 case. */
146 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
147 && pow2p_hwi (to_size))
148 to_size --;
149 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
150 && pow2p_hwi (from_size))
151 from_size --;
153 /* Assume cost of zero-extend and sign-extend is the same. */
154 which = (to_size < from_size ? all->trunc : all->zext);
156 PUT_MODE (all->reg, from_mode);
157 set_convert_cost (to_mode, from_mode, speed,
158 set_src_cost (which, to_mode, speed));
159 /* Restore all->reg's mode. */
160 PUT_MODE (all->reg, to_mode);
163 static void
164 init_expmed_one_mode (struct init_expmed_rtl *all,
165 machine_mode mode, int speed)
167 int m, n, mode_bitsize;
168 machine_mode mode_from;
170 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
172 PUT_MODE (all->reg, mode);
173 PUT_MODE (all->plus, mode);
174 PUT_MODE (all->neg, mode);
175 PUT_MODE (all->mult, mode);
176 PUT_MODE (all->sdiv, mode);
177 PUT_MODE (all->udiv, mode);
178 PUT_MODE (all->sdiv_32, mode);
179 PUT_MODE (all->smod_32, mode);
180 PUT_MODE (all->wide_trunc, mode);
181 PUT_MODE (all->shift, mode);
182 PUT_MODE (all->shift_mult, mode);
183 PUT_MODE (all->shift_add, mode);
184 PUT_MODE (all->shift_sub0, mode);
185 PUT_MODE (all->shift_sub1, mode);
186 PUT_MODE (all->zext, mode);
187 PUT_MODE (all->trunc, mode);
189 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
190 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
191 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
192 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
193 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
195 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
196 <= 2 * add_cost (speed, mode)));
197 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
198 <= 4 * add_cost (speed, mode)));
200 set_shift_cost (speed, mode, 0, 0);
202 int cost = add_cost (speed, mode);
203 set_shiftadd_cost (speed, mode, 0, cost);
204 set_shiftsub0_cost (speed, mode, 0, cost);
205 set_shiftsub1_cost (speed, mode, 0, cost);
208 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
209 for (m = 1; m < n; m++)
211 XEXP (all->shift, 1) = all->cint[m];
212 XEXP (all->shift_mult, 1) = all->pow2[m];
214 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
215 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
216 speed));
217 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
218 speed));
219 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
220 speed));
223 scalar_int_mode int_mode_to;
224 if (is_a <scalar_int_mode> (mode, &int_mode_to))
226 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
227 mode_from = (machine_mode)(mode_from + 1))
228 init_expmed_one_conv (all, int_mode_to,
229 as_a <scalar_int_mode> (mode_from), speed);
231 scalar_int_mode wider_mode;
232 if (GET_MODE_CLASS (int_mode_to) == MODE_INT
233 && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
235 PUT_MODE (all->reg, mode);
236 PUT_MODE (all->zext, wider_mode);
237 PUT_MODE (all->wide_mult, wider_mode);
238 PUT_MODE (all->wide_lshr, wider_mode);
239 XEXP (all->wide_lshr, 1)
240 = gen_int_shift_amount (wider_mode, mode_bitsize);
242 set_mul_widen_cost (speed, wider_mode,
243 set_src_cost (all->wide_mult, wider_mode, speed));
244 set_mul_highpart_cost (speed, int_mode_to,
245 set_src_cost (all->wide_trunc,
246 int_mode_to, speed));
251 void
252 init_expmed (void)
254 struct init_expmed_rtl all;
255 machine_mode mode = QImode;
256 int m, speed;
258 memset (&all, 0, sizeof all);
259 for (m = 1; m < MAX_BITS_PER_WORD; m++)
261 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
262 all.cint[m] = GEN_INT (m);
265 /* Avoid using hard regs in ways which may be unsupported. */
266 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
267 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
268 all.neg = gen_rtx_NEG (mode, all.reg);
269 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
270 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
271 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
272 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
273 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
274 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
275 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
276 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
277 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
278 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
279 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
280 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
281 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
282 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
283 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
285 for (speed = 0; speed < 2; speed++)
287 crtl->maybe_hot_insn_p = speed;
288 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
290 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
291 mode = (machine_mode)(mode + 1))
292 init_expmed_one_mode (&all, mode, speed);
294 if (MIN_MODE_PARTIAL_INT != VOIDmode)
295 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
296 mode = (machine_mode)(mode + 1))
297 init_expmed_one_mode (&all, mode, speed);
299 if (MIN_MODE_VECTOR_INT != VOIDmode)
300 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
301 mode = (machine_mode)(mode + 1))
302 init_expmed_one_mode (&all, mode, speed);
305 if (alg_hash_used_p ())
307 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
308 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
310 else
311 set_alg_hash_used_p (true);
312 default_rtl_profile ();
314 ggc_free (all.trunc);
315 ggc_free (all.shift_sub1);
316 ggc_free (all.shift_sub0);
317 ggc_free (all.shift_add);
318 ggc_free (all.shift_mult);
319 ggc_free (all.shift);
320 ggc_free (all.wide_trunc);
321 ggc_free (all.wide_lshr);
322 ggc_free (all.wide_mult);
323 ggc_free (all.zext);
324 ggc_free (all.smod_32);
325 ggc_free (all.sdiv_32);
326 ggc_free (all.udiv);
327 ggc_free (all.sdiv);
328 ggc_free (all.mult);
329 ggc_free (all.neg);
330 ggc_free (all.plus);
331 ggc_free (all.reg);
334 /* Return an rtx representing minus the value of X.
335 MODE is the intended mode of the result,
336 useful if X is a CONST_INT. */
339 negate_rtx (machine_mode mode, rtx x)
341 rtx result = simplify_unary_operation (NEG, mode, x, mode);
343 if (result == 0)
344 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
346 return result;
349 /* Whether reverse storage order is supported on the target. */
350 static int reverse_storage_order_supported = -1;
352 /* Check whether reverse storage order is supported on the target. */
354 static void
355 check_reverse_storage_order_support (void)
357 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
359 reverse_storage_order_supported = 0;
360 sorry ("reverse scalar storage order");
362 else
363 reverse_storage_order_supported = 1;
366 /* Whether reverse FP storage order is supported on the target. */
367 static int reverse_float_storage_order_supported = -1;
369 /* Check whether reverse FP storage order is supported on the target. */
371 static void
372 check_reverse_float_storage_order_support (void)
374 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
376 reverse_float_storage_order_supported = 0;
377 sorry ("reverse floating-point scalar storage order");
379 else
380 reverse_float_storage_order_supported = 1;
383 /* Return an rtx representing value of X with reverse storage order.
384 MODE is the intended mode of the result,
385 useful if X is a CONST_INT. */
388 flip_storage_order (machine_mode mode, rtx x)
390 scalar_int_mode int_mode;
391 rtx result;
393 if (mode == QImode)
394 return x;
396 if (COMPLEX_MODE_P (mode))
398 rtx real = read_complex_part (x, false);
399 rtx imag = read_complex_part (x, true);
401 real = flip_storage_order (GET_MODE_INNER (mode), real);
402 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
404 return gen_rtx_CONCAT (mode, real, imag);
407 if (UNLIKELY (reverse_storage_order_supported < 0))
408 check_reverse_storage_order_support ();
410 if (!is_a <scalar_int_mode> (mode, &int_mode))
412 if (FLOAT_MODE_P (mode)
413 && UNLIKELY (reverse_float_storage_order_supported < 0))
414 check_reverse_float_storage_order_support ();
416 if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
417 || !targetm.scalar_mode_supported_p (int_mode))
419 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
420 return x;
422 x = gen_lowpart (int_mode, x);
425 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
426 if (result == 0)
427 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
429 if (int_mode != mode)
430 result = gen_lowpart (mode, result);
432 return result;
435 /* If MODE is set, adjust bitfield memory MEM so that it points to the
436 first unit of mode MODE that contains a bitfield of size BITSIZE at
437 bit position BITNUM. If MODE is not set, return a BLKmode reference
438 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
439 of the field within the new memory. */
441 static rtx
442 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
443 unsigned HOST_WIDE_INT bitsize,
444 unsigned HOST_WIDE_INT bitnum,
445 unsigned HOST_WIDE_INT *new_bitnum)
447 scalar_int_mode imode;
448 if (mode.exists (&imode))
450 unsigned int unit = GET_MODE_BITSIZE (imode);
451 *new_bitnum = bitnum % unit;
452 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
453 return adjust_bitfield_address (mem, imode, offset);
455 else
457 *new_bitnum = bitnum % BITS_PER_UNIT;
458 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
459 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
460 / BITS_PER_UNIT);
461 return adjust_bitfield_address_size (mem, BLKmode, offset, size);
465 /* The caller wants to perform insertion or extraction PATTERN on a
466 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
467 BITREGION_START and BITREGION_END are as for store_bit_field
468 and FIELDMODE is the natural mode of the field.
470 Search for a mode that is compatible with the memory access
471 restrictions and (where applicable) with a register insertion or
472 extraction. Return the new memory on success, storing the adjusted
473 bit position in *NEW_BITNUM. Return null otherwise. */
475 static rtx
476 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
477 rtx op0, HOST_WIDE_INT bitsize,
478 HOST_WIDE_INT bitnum,
479 poly_uint64 bitregion_start,
480 poly_uint64 bitregion_end,
481 machine_mode fieldmode,
482 unsigned HOST_WIDE_INT *new_bitnum)
484 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
485 bitregion_end, MEM_ALIGN (op0),
486 MEM_VOLATILE_P (op0));
487 scalar_int_mode best_mode;
488 if (iter.next_mode (&best_mode))
490 /* We can use a memory in BEST_MODE. See whether this is true for
491 any wider modes. All other things being equal, we prefer to
492 use the widest mode possible because it tends to expose more
493 CSE opportunities. */
494 if (!iter.prefer_smaller_modes ())
496 /* Limit the search to the mode required by the corresponding
497 register insertion or extraction instruction, if any. */
498 scalar_int_mode limit_mode = word_mode;
499 extraction_insn insn;
500 if (get_best_reg_extraction_insn (&insn, pattern,
501 GET_MODE_BITSIZE (best_mode),
502 fieldmode))
503 limit_mode = insn.field_mode;
505 scalar_int_mode wider_mode;
506 while (iter.next_mode (&wider_mode)
507 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
508 best_mode = wider_mode;
510 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
511 new_bitnum);
513 return NULL_RTX;
516 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
517 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
518 offset is then BITNUM / BITS_PER_UNIT. */
520 static bool
521 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
522 machine_mode struct_mode)
524 poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
525 if (BYTES_BIG_ENDIAN)
526 return (multiple_p (bitnum, BITS_PER_UNIT)
527 && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
528 || multiple_p (bitnum + bitsize,
529 regsize * BITS_PER_UNIT)));
530 else
531 return multiple_p (bitnum, regsize * BITS_PER_UNIT);
534 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
535 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
536 Return false if the access would touch memory outside the range
537 BITREGION_START to BITREGION_END for conformance to the C++ memory
538 model. */
540 static bool
541 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
542 unsigned HOST_WIDE_INT bitnum,
543 scalar_int_mode fieldmode,
544 poly_uint64 bitregion_start,
545 poly_uint64 bitregion_end)
547 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
549 /* -fstrict-volatile-bitfields must be enabled and we must have a
550 volatile MEM. */
551 if (!MEM_P (op0)
552 || !MEM_VOLATILE_P (op0)
553 || flag_strict_volatile_bitfields <= 0)
554 return false;
556 /* The bit size must not be larger than the field mode, and
557 the field mode must not be larger than a word. */
558 if (bitsize > modesize || modesize > BITS_PER_WORD)
559 return false;
561 /* Check for cases of unaligned fields that must be split. */
562 if (bitnum % modesize + bitsize > modesize)
563 return false;
565 /* The memory must be sufficiently aligned for a MODESIZE access.
566 This condition guarantees, that the memory access will not
567 touch anything after the end of the structure. */
568 if (MEM_ALIGN (op0) < modesize)
569 return false;
571 /* Check for cases where the C++ memory model applies. */
572 if (maybe_ne (bitregion_end, 0U)
573 && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
574 || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
575 bitregion_end)))
576 return false;
578 return true;
581 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
582 bit number BITNUM can be treated as a simple value of mode MODE.
583 Store the byte offset in *BYTENUM if so. */
585 static bool
586 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
587 machine_mode mode, poly_uint64 *bytenum)
589 return (MEM_P (op0)
590 && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
591 && known_eq (bitsize, GET_MODE_BITSIZE (mode))
592 && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
593 || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
594 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
597 /* Try to use instruction INSV to store VALUE into a field of OP0.
598 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
599 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
600 are as for store_bit_field. */
602 static bool
603 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
604 opt_scalar_int_mode op0_mode,
605 unsigned HOST_WIDE_INT bitsize,
606 unsigned HOST_WIDE_INT bitnum,
607 rtx value, scalar_int_mode value_mode)
609 class expand_operand ops[4];
610 rtx value1;
611 rtx xop0 = op0;
612 rtx_insn *last = get_last_insn ();
613 bool copy_back = false;
615 scalar_int_mode op_mode = insv->field_mode;
616 unsigned int unit = GET_MODE_BITSIZE (op_mode);
617 if (bitsize == 0 || bitsize > unit)
618 return false;
620 if (MEM_P (xop0))
621 /* Get a reference to the first byte of the field. */
622 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
623 &bitnum);
624 else
626 /* Convert from counting within OP0 to counting in OP_MODE. */
627 if (BYTES_BIG_ENDIAN)
628 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
630 /* If xop0 is a register, we need it in OP_MODE
631 to make it acceptable to the format of insv. */
632 if (GET_CODE (xop0) == SUBREG)
634 /* If such a SUBREG can't be created, give up. */
635 if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
636 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
637 return false;
638 /* We can't just change the mode, because this might clobber op0,
639 and we will need the original value of op0 if insv fails. */
640 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
641 SUBREG_BYTE (xop0));
643 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
644 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
647 /* If the destination is a paradoxical subreg such that we need a
648 truncate to the inner mode, perform the insertion on a temporary and
649 truncate the result to the original destination. Note that we can't
650 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
651 X) 0)) is (reg:N X). */
652 if (GET_CODE (xop0) == SUBREG
653 && REG_P (SUBREG_REG (xop0))
654 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
655 op_mode))
657 rtx tem = gen_reg_rtx (op_mode);
658 emit_move_insn (tem, xop0);
659 xop0 = tem;
660 copy_back = true;
663 /* There are similar overflow check at the start of store_bit_field_1,
664 but that only check the situation where the field lies completely
665 outside the register, while there do have situation where the field
666 lies partialy in the register, we need to adjust bitsize for this
667 partial overflow situation. Without this fix, pr48335-2.c on big-endian
668 will broken on those arch support bit insert instruction, like arm, aarch64
669 etc. */
670 if (bitsize + bitnum > unit && bitnum < unit)
672 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
673 "destination object, data truncated into %wu-bit",
674 bitsize, unit - bitnum);
675 bitsize = unit - bitnum;
678 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
679 "backwards" from the size of the unit we are inserting into.
680 Otherwise, we count bits from the most significant on a
681 BYTES/BITS_BIG_ENDIAN machine. */
683 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
684 bitnum = unit - bitsize - bitnum;
686 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
687 value1 = value;
688 if (value_mode != op_mode)
690 if (GET_MODE_BITSIZE (value_mode) >= bitsize)
692 rtx tmp;
693 /* Optimization: Don't bother really extending VALUE
694 if it has all the bits we will actually use. However,
695 if we must narrow it, be sure we do it correctly. */
697 if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
699 tmp = simplify_subreg (op_mode, value1, value_mode, 0);
700 if (! tmp)
701 tmp = simplify_gen_subreg (op_mode,
702 force_reg (value_mode, value1),
703 value_mode, 0);
705 else
707 tmp = gen_lowpart_if_possible (op_mode, value1);
708 if (! tmp)
709 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
711 value1 = tmp;
713 else if (CONST_INT_P (value))
714 value1 = gen_int_mode (INTVAL (value), op_mode);
715 else
716 /* Parse phase is supposed to make VALUE's data type
717 match that of the component reference, which is a type
718 at least as wide as the field; so VALUE should have
719 a mode that corresponds to that type. */
720 gcc_assert (CONSTANT_P (value));
723 create_fixed_operand (&ops[0], xop0);
724 create_integer_operand (&ops[1], bitsize);
725 create_integer_operand (&ops[2], bitnum);
726 create_input_operand (&ops[3], value1, op_mode);
727 if (maybe_expand_insn (insv->icode, 4, ops))
729 if (copy_back)
730 convert_move (op0, xop0, true);
731 return true;
733 delete_insns_since (last);
734 return false;
737 /* A subroutine of store_bit_field, with the same arguments. Return true
738 if the operation could be implemented.
740 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
741 no other way of implementing the operation. If FALLBACK_P is false,
742 return false instead.
744 if UNDEFINED_P is true then STR_RTX is undefined and may be set using
745 a subreg instead. */
747 static bool
748 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
749 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
750 machine_mode fieldmode,
751 rtx value, bool reverse, bool fallback_p, bool undefined_p)
753 rtx op0 = str_rtx;
755 while (GET_CODE (op0) == SUBREG)
757 bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
758 op0 = SUBREG_REG (op0);
761 /* No action is needed if the target is a register and if the field
762 lies completely outside that register. This can occur if the source
763 code contains an out-of-bounds access to a small array. */
764 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
765 return true;
767 /* Use vec_set patterns for inserting parts of vectors whenever
768 available. */
769 machine_mode outermode = GET_MODE (op0);
770 scalar_mode innermode = GET_MODE_INNER (outermode);
771 poly_uint64 pos;
772 if (VECTOR_MODE_P (outermode)
773 && !MEM_P (op0)
774 && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
775 && fieldmode == innermode
776 && known_eq (bitsize, GET_MODE_PRECISION (innermode))
777 && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
779 class expand_operand ops[3];
780 enum insn_code icode = optab_handler (vec_set_optab, outermode);
782 create_fixed_operand (&ops[0], op0);
783 create_input_operand (&ops[1], value, innermode);
784 create_integer_operand (&ops[2], pos);
785 if (maybe_expand_insn (icode, 3, ops))
786 return true;
789 /* If the target is a register, overwriting the entire object, or storing
790 a full-word or multi-word field can be done with just a SUBREG. */
791 if (!MEM_P (op0)
792 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
794 /* Use the subreg machinery either to narrow OP0 to the required
795 words or to cope with mode punning between equal-sized modes.
796 In the latter case, use subreg on the rhs side, not lhs. */
797 rtx sub;
798 poly_uint64 bytenum;
799 poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
800 if (known_eq (bitnum, 0U)
801 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
803 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
804 if (sub)
806 if (reverse)
807 sub = flip_storage_order (GET_MODE (op0), sub);
808 emit_move_insn (op0, sub);
809 return true;
812 else if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
813 && (undefined_p
814 || (multiple_p (bitnum, regsize * BITS_PER_UNIT)
815 && multiple_p (bitsize, regsize * BITS_PER_UNIT)))
816 && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
818 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), bytenum);
819 if (sub)
821 if (reverse)
822 value = flip_storage_order (fieldmode, value);
823 emit_move_insn (sub, value);
824 return true;
829 /* If the target is memory, storing any naturally aligned field can be
830 done with a simple store. For targets that support fast unaligned
831 memory, any naturally sized, unit aligned field can be done directly. */
832 poly_uint64 bytenum;
833 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
835 op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
836 if (reverse)
837 value = flip_storage_order (fieldmode, value);
838 emit_move_insn (op0, value);
839 return true;
842 /* It's possible we'll need to handle other cases here for
843 polynomial bitnum and bitsize. */
845 /* From here on we need to be looking at a fixed-size insertion. */
846 unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
847 unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
849 /* Make sure we are playing with integral modes. Pun with subregs
850 if we aren't. This must come after the entire register case above,
851 since that case is valid for any mode. The following cases are only
852 valid for integral modes. */
853 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
854 scalar_int_mode imode;
855 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
857 if (MEM_P (op0))
858 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
859 0, MEM_SIZE (op0));
860 else if (!op0_mode.exists ())
862 if (ibitnum == 0
863 && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
864 && MEM_P (value)
865 && !reverse)
867 value = adjust_address (value, GET_MODE (op0), 0);
868 emit_move_insn (op0, value);
869 return true;
871 if (!fallback_p)
872 return false;
873 rtx temp = assign_stack_temp (GET_MODE (op0),
874 GET_MODE_SIZE (GET_MODE (op0)));
875 emit_move_insn (temp, op0);
876 store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
877 reverse, fallback_p, undefined_p);
878 emit_move_insn (op0, temp);
879 return true;
881 else
882 op0 = gen_lowpart (op0_mode.require (), op0);
885 return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
886 bitregion_start, bitregion_end,
887 fieldmode, value, reverse, fallback_p);
890 /* Subroutine of store_bit_field_1, with the same arguments, except
891 that BITSIZE and BITNUM are constant. Handle cases specific to
892 integral modes. If OP0_MODE is defined, it is the mode of OP0,
893 otherwise OP0 is a BLKmode MEM. */
895 static bool
896 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
897 unsigned HOST_WIDE_INT bitsize,
898 unsigned HOST_WIDE_INT bitnum,
899 poly_uint64 bitregion_start,
900 poly_uint64 bitregion_end,
901 machine_mode fieldmode,
902 rtx value, bool reverse, bool fallback_p)
904 /* Storing an lsb-aligned field in a register
905 can be done with a movstrict instruction. */
907 if (!MEM_P (op0)
908 && !reverse
909 && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
910 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
911 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
913 class expand_operand ops[2];
914 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
915 rtx arg0 = op0;
916 unsigned HOST_WIDE_INT subreg_off;
918 if (GET_CODE (arg0) == SUBREG)
920 /* Else we've got some float mode source being extracted into
921 a different float mode destination -- this combination of
922 subregs results in Severe Tire Damage. */
923 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
924 || GET_MODE_CLASS (fieldmode) == MODE_INT
925 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
926 arg0 = SUBREG_REG (arg0);
929 subreg_off = bitnum / BITS_PER_UNIT;
930 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
931 /* STRICT_LOW_PART must have a non-paradoxical subreg as
932 operand. */
933 && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
935 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
937 create_fixed_operand (&ops[0], arg0);
938 /* Shrink the source operand to FIELDMODE. */
939 create_convert_operand_to (&ops[1], value, fieldmode, false);
940 if (maybe_expand_insn (icode, 2, ops))
941 return true;
945 /* Handle fields bigger than a word. */
947 if (bitsize > BITS_PER_WORD)
949 /* Here we transfer the words of the field
950 in the order least significant first.
951 This is because the most significant word is the one which may
952 be less than full.
953 However, only do that if the value is not BLKmode. */
955 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
956 const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
957 rtx_insn *last;
959 /* This is the mode we must force value to, so that there will be enough
960 subwords to extract. Note that fieldmode will often (always?) be
961 VOIDmode, because that is what store_field uses to indicate that this
962 is a bit field, but passing VOIDmode to operand_subword_force
963 is not allowed.
965 The mode must be fixed-size, since insertions into variable-sized
966 objects are meant to be handled before calling this function. */
967 fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
968 if (value_mode == VOIDmode)
969 value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
971 last = get_last_insn ();
972 for (int i = 0; i < nwords; i++)
974 /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
975 except maybe for the last iteration. */
976 const unsigned HOST_WIDE_INT new_bitsize
977 = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
978 /* Bit offset from the starting bit number in the target. */
979 const unsigned int bit_offset
980 = backwards ^ reverse
981 ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
982 : i * BITS_PER_WORD;
983 /* Starting word number in the value. */
984 const unsigned int wordnum
985 = backwards
986 ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
987 : i;
988 /* The chunk of the value in word_mode. We use bit-field extraction
989 in BLKmode to handle unaligned memory references and to shift the
990 last chunk right on big-endian machines if need be. */
991 rtx value_word
992 = fieldmode == BLKmode
993 ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
994 1, NULL_RTX, word_mode, word_mode, false,
995 NULL)
996 : operand_subword_force (value, wordnum, value_mode);
998 if (!store_bit_field_1 (op0, new_bitsize,
999 bitnum + bit_offset,
1000 bitregion_start, bitregion_end,
1001 word_mode,
1002 value_word, reverse, fallback_p, false))
1004 delete_insns_since (last);
1005 return false;
1008 return true;
1011 /* If VALUE has a floating-point or complex mode, access it as an
1012 integer of the corresponding size. This can occur on a machine
1013 with 64 bit registers that uses SFmode for float. It can also
1014 occur for unaligned float or complex fields. */
1015 rtx orig_value = value;
1016 scalar_int_mode value_mode;
1017 if (GET_MODE (value) == VOIDmode)
1018 /* By this point we've dealt with values that are bigger than a word,
1019 so word_mode is a conservatively correct choice. */
1020 value_mode = word_mode;
1021 else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1023 value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1024 value = gen_reg_rtx (value_mode);
1025 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1028 /* If OP0 is a multi-word register, narrow it to the affected word.
1029 If the region spans two words, defer to store_split_bit_field.
1030 Don't do this if op0 is a single hard register wider than word
1031 such as a float or vector register. */
1032 if (!MEM_P (op0)
1033 && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1034 && (!REG_P (op0)
1035 || !HARD_REGISTER_P (op0)
1036 || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1038 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1040 if (!fallback_p)
1041 return false;
1043 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1044 bitregion_start, bitregion_end,
1045 value, value_mode, reverse);
1046 return true;
1048 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1049 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1050 gcc_assert (op0);
1051 op0_mode = word_mode;
1052 bitnum %= BITS_PER_WORD;
1055 /* From here on we can assume that the field to be stored in fits
1056 within a word. If the destination is a register, it too fits
1057 in a word. */
1059 extraction_insn insv;
1060 if (!MEM_P (op0)
1061 && !reverse
1062 && get_best_reg_extraction_insn (&insv, EP_insv,
1063 GET_MODE_BITSIZE (op0_mode.require ()),
1064 fieldmode)
1065 && store_bit_field_using_insv (&insv, op0, op0_mode,
1066 bitsize, bitnum, value, value_mode))
1067 return true;
1069 /* If OP0 is a memory, try copying it to a register and seeing if a
1070 cheap register alternative is available. */
1071 if (MEM_P (op0) && !reverse)
1073 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1074 fieldmode)
1075 && store_bit_field_using_insv (&insv, op0, op0_mode,
1076 bitsize, bitnum, value, value_mode))
1077 return true;
1079 rtx_insn *last = get_last_insn ();
1081 /* Try loading part of OP0 into a register, inserting the bitfield
1082 into that, and then copying the result back to OP0. */
1083 unsigned HOST_WIDE_INT bitpos;
1084 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1085 bitregion_start, bitregion_end,
1086 fieldmode, &bitpos);
1087 if (xop0)
1089 rtx tempreg = copy_to_reg (xop0);
1090 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1091 bitregion_start, bitregion_end,
1092 fieldmode, orig_value, reverse, false, false))
1094 emit_move_insn (xop0, tempreg);
1095 return true;
1097 delete_insns_since (last);
1101 if (!fallback_p)
1102 return false;
1104 store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1105 bitregion_end, value, value_mode, reverse);
1106 return true;
1109 /* Generate code to store value from rtx VALUE
1110 into a bit-field within structure STR_RTX
1111 containing BITSIZE bits starting at bit BITNUM.
1113 BITREGION_START is bitpos of the first bitfield in this region.
1114 BITREGION_END is the bitpos of the ending bitfield in this region.
1115 These two fields are 0, if the C++ memory model does not apply,
1116 or we are not interested in keeping track of bitfield regions.
1118 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1120 If REVERSE is true, the store is to be done in reverse order.
1122 If UNDEFINED_P is true then STR_RTX is currently undefined. */
1124 void
1125 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1126 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1127 machine_mode fieldmode,
1128 rtx value, bool reverse, bool undefined_p)
1130 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1131 unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1132 scalar_int_mode int_mode;
1133 if (bitsize.is_constant (&ibitsize)
1134 && bitnum.is_constant (&ibitnum)
1135 && is_a <scalar_int_mode> (fieldmode, &int_mode)
1136 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1137 bitregion_start, bitregion_end))
1139 /* Storing of a full word can be done with a simple store.
1140 We know here that the field can be accessed with one single
1141 instruction. For targets that support unaligned memory,
1142 an unaligned access may be necessary. */
1143 if (ibitsize == GET_MODE_BITSIZE (int_mode))
1145 str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1146 ibitnum / BITS_PER_UNIT);
1147 if (reverse)
1148 value = flip_storage_order (int_mode, value);
1149 gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1150 emit_move_insn (str_rtx, value);
1152 else
1154 rtx temp;
1156 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1157 ibitnum, &ibitnum);
1158 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1159 temp = copy_to_reg (str_rtx);
1160 if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1161 int_mode, value, reverse, true, undefined_p))
1162 gcc_unreachable ();
1164 emit_move_insn (str_rtx, temp);
1167 return;
1170 /* Under the C++0x memory model, we must not touch bits outside the
1171 bit region. Adjust the address to start at the beginning of the
1172 bit region. */
1173 if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1175 scalar_int_mode best_mode;
1176 machine_mode addr_mode = VOIDmode;
1178 poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1179 bitnum -= bitregion_start;
1180 poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1181 bitregion_end -= bitregion_start;
1182 bitregion_start = 0;
1183 if (bitsize.is_constant (&ibitsize)
1184 && bitnum.is_constant (&ibitnum)
1185 && get_best_mode (ibitsize, ibitnum,
1186 bitregion_start, bitregion_end,
1187 MEM_ALIGN (str_rtx), INT_MAX,
1188 MEM_VOLATILE_P (str_rtx), &best_mode))
1189 addr_mode = best_mode;
1190 str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1191 offset, size);
1194 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1195 bitregion_start, bitregion_end,
1196 fieldmode, value, reverse, true, undefined_p))
1197 gcc_unreachable ();
1200 /* Use shifts and boolean operations to store VALUE into a bit field of
1201 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1202 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1203 the mode of VALUE.
1205 If REVERSE is true, the store is to be done in reverse order. */
1207 static void
1208 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1209 unsigned HOST_WIDE_INT bitsize,
1210 unsigned HOST_WIDE_INT bitnum,
1211 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1212 rtx value, scalar_int_mode value_mode, bool reverse)
1214 /* There is a case not handled here:
1215 a structure with a known alignment of just a halfword
1216 and a field split across two aligned halfwords within the structure.
1217 Or likewise a structure with a known alignment of just a byte
1218 and a field split across two bytes.
1219 Such cases are not supposed to be able to occur. */
1221 scalar_int_mode best_mode;
1222 if (MEM_P (op0))
1224 unsigned int max_bitsize = BITS_PER_WORD;
1225 scalar_int_mode imode;
1226 if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1227 max_bitsize = GET_MODE_BITSIZE (imode);
1229 if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1230 MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1231 &best_mode))
1233 /* The only way this should occur is if the field spans word
1234 boundaries. */
1235 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1236 bitregion_start, bitregion_end,
1237 value, value_mode, reverse);
1238 return;
1241 op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1243 else
1244 best_mode = op0_mode.require ();
1246 store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1247 value, value_mode, reverse);
1250 /* Helper function for store_fixed_bit_field, stores
1251 the bit field always using MODE, which is the mode of OP0. The other
1252 arguments are as for store_fixed_bit_field. */
1254 static void
1255 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1256 unsigned HOST_WIDE_INT bitsize,
1257 unsigned HOST_WIDE_INT bitnum,
1258 rtx value, scalar_int_mode value_mode, bool reverse)
1260 rtx temp;
1261 int all_zero = 0;
1262 int all_one = 0;
1264 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1265 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1267 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1268 /* BITNUM is the distance between our msb
1269 and that of the containing datum.
1270 Convert it to the distance from the lsb. */
1271 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1273 /* Now BITNUM is always the distance between our lsb
1274 and that of OP0. */
1276 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1277 we must first convert its mode to MODE. */
1279 if (CONST_INT_P (value))
1281 unsigned HOST_WIDE_INT v = UINTVAL (value);
1283 if (bitsize < HOST_BITS_PER_WIDE_INT)
1284 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1286 if (v == 0)
1287 all_zero = 1;
1288 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1289 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1290 || (bitsize == HOST_BITS_PER_WIDE_INT
1291 && v == HOST_WIDE_INT_M1U))
1292 all_one = 1;
1294 value = lshift_value (mode, v, bitnum);
1296 else
1298 int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1299 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1301 if (value_mode != mode)
1302 value = convert_to_mode (mode, value, 1);
1304 if (must_and)
1305 value = expand_binop (mode, and_optab, value,
1306 mask_rtx (mode, 0, bitsize, 0),
1307 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1308 if (bitnum > 0)
1309 value = expand_shift (LSHIFT_EXPR, mode, value,
1310 bitnum, NULL_RTX, 1);
1313 if (reverse)
1314 value = flip_storage_order (mode, value);
1316 /* Now clear the chosen bits in OP0,
1317 except that if VALUE is -1 we need not bother. */
1318 /* We keep the intermediates in registers to allow CSE to combine
1319 consecutive bitfield assignments. */
1321 temp = force_reg (mode, op0);
1323 if (! all_one)
1325 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1326 if (reverse)
1327 mask = flip_storage_order (mode, mask);
1328 temp = expand_binop (mode, and_optab, temp, mask,
1329 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1330 temp = force_reg (mode, temp);
1333 /* Now logical-or VALUE into OP0, unless it is zero. */
1335 if (! all_zero)
1337 temp = expand_binop (mode, ior_optab, temp, value,
1338 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1339 temp = force_reg (mode, temp);
1342 if (op0 != temp)
1344 op0 = copy_rtx (op0);
1345 emit_move_insn (op0, temp);
1349 /* Store a bit field that is split across multiple accessible memory objects.
1351 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1352 BITSIZE is the field width; BITPOS the position of its first bit
1353 (within the word).
1354 VALUE is the value to store, which has mode VALUE_MODE.
1355 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1356 a BLKmode MEM.
1358 If REVERSE is true, the store is to be done in reverse order.
1360 This does not yet handle fields wider than BITS_PER_WORD. */
1362 static void
1363 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1364 unsigned HOST_WIDE_INT bitsize,
1365 unsigned HOST_WIDE_INT bitpos,
1366 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1367 rtx value, scalar_int_mode value_mode, bool reverse)
1369 unsigned int unit, total_bits, bitsdone = 0;
1371 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1372 much at a time. */
1373 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1374 unit = BITS_PER_WORD;
1375 else
1376 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1378 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1379 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1380 again, and we will mutually recurse forever. */
1381 if (MEM_P (op0) && op0_mode.exists ())
1382 unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1384 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1385 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1386 that VALUE might be a floating-point constant. */
1387 if (CONSTANT_P (value) && !CONST_INT_P (value))
1389 rtx word = gen_lowpart_common (word_mode, value);
1391 if (word && (value != word))
1392 value = word;
1393 else
1394 value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1395 value_mode = word_mode;
1398 total_bits = GET_MODE_BITSIZE (value_mode);
1400 while (bitsdone < bitsize)
1402 unsigned HOST_WIDE_INT thissize;
1403 unsigned HOST_WIDE_INT thispos;
1404 unsigned HOST_WIDE_INT offset;
1405 rtx part;
1407 offset = (bitpos + bitsdone) / unit;
1408 thispos = (bitpos + bitsdone) % unit;
1410 /* When region of bytes we can touch is restricted, decrease
1411 UNIT close to the end of the region as needed. If op0 is a REG
1412 or SUBREG of REG, don't do this, as there can't be data races
1413 on a register and we can expand shorter code in some cases. */
1414 if (maybe_ne (bitregion_end, 0U)
1415 && unit > BITS_PER_UNIT
1416 && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1417 && !REG_P (op0)
1418 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1420 unit = unit / 2;
1421 continue;
1424 /* THISSIZE must not overrun a word boundary. Otherwise,
1425 store_fixed_bit_field will call us again, and we will mutually
1426 recurse forever. */
1427 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1428 thissize = MIN (thissize, unit - thispos);
1430 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1432 /* Fetch successively less significant portions. */
1433 if (CONST_INT_P (value))
1434 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1435 >> (bitsize - bitsdone - thissize))
1436 & ((HOST_WIDE_INT_1 << thissize) - 1));
1437 /* Likewise, but the source is little-endian. */
1438 else if (reverse)
1439 part = extract_fixed_bit_field (word_mode, value, value_mode,
1440 thissize,
1441 bitsize - bitsdone - thissize,
1442 NULL_RTX, 1, false);
1443 else
1444 /* The args are chosen so that the last part includes the
1445 lsb. Give extract_bit_field the value it needs (with
1446 endianness compensation) to fetch the piece we want. */
1447 part = extract_fixed_bit_field (word_mode, value, value_mode,
1448 thissize,
1449 total_bits - bitsize + bitsdone,
1450 NULL_RTX, 1, false);
1452 else
1454 /* Fetch successively more significant portions. */
1455 if (CONST_INT_P (value))
1456 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1457 >> bitsdone)
1458 & ((HOST_WIDE_INT_1 << thissize) - 1));
1459 /* Likewise, but the source is big-endian. */
1460 else if (reverse)
1461 part = extract_fixed_bit_field (word_mode, value, value_mode,
1462 thissize,
1463 total_bits - bitsdone - thissize,
1464 NULL_RTX, 1, false);
1465 else
1466 part = extract_fixed_bit_field (word_mode, value, value_mode,
1467 thissize, bitsdone, NULL_RTX,
1468 1, false);
1471 /* If OP0 is a register, then handle OFFSET here. */
1472 rtx op0_piece = op0;
1473 opt_scalar_int_mode op0_piece_mode = op0_mode;
1474 if (SUBREG_P (op0) || REG_P (op0))
1476 scalar_int_mode imode;
1477 if (op0_mode.exists (&imode)
1478 && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1480 if (offset)
1481 op0_piece = const0_rtx;
1483 else
1485 op0_piece = operand_subword_force (op0,
1486 offset * unit / BITS_PER_WORD,
1487 GET_MODE (op0));
1488 op0_piece_mode = word_mode;
1490 offset &= BITS_PER_WORD / unit - 1;
1493 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1494 it is just an out-of-bounds access. Ignore it. */
1495 if (op0_piece != const0_rtx)
1496 store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1497 offset * unit + thispos, bitregion_start,
1498 bitregion_end, part, word_mode, reverse);
1499 bitsdone += thissize;
1503 /* A subroutine of extract_bit_field_1 that converts return value X
1504 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1505 to extract_bit_field. */
1507 static rtx
1508 convert_extracted_bit_field (rtx x, machine_mode mode,
1509 machine_mode tmode, bool unsignedp)
1511 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1512 return x;
1514 /* If the x mode is not a scalar integral, first convert to the
1515 integer mode of that size and then access it as a floating-point
1516 value via a SUBREG. */
1517 if (!SCALAR_INT_MODE_P (tmode))
1519 scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1520 x = convert_to_mode (int_mode, x, unsignedp);
1521 x = force_reg (int_mode, x);
1522 return gen_lowpart (tmode, x);
1525 return convert_to_mode (tmode, x, unsignedp);
1528 /* Try to use an ext(z)v pattern to extract a field from OP0.
1529 Return the extracted value on success, otherwise return null.
1530 EXTV describes the extraction instruction to use. If OP0_MODE
1531 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1532 The other arguments are as for extract_bit_field. */
1534 static rtx
1535 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1536 opt_scalar_int_mode op0_mode,
1537 unsigned HOST_WIDE_INT bitsize,
1538 unsigned HOST_WIDE_INT bitnum,
1539 int unsignedp, rtx target,
1540 machine_mode mode, machine_mode tmode)
1542 class expand_operand ops[4];
1543 rtx spec_target = target;
1544 rtx spec_target_subreg = 0;
1545 scalar_int_mode ext_mode = extv->field_mode;
1546 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1548 if (bitsize == 0 || unit < bitsize)
1549 return NULL_RTX;
1551 if (MEM_P (op0))
1552 /* Get a reference to the first byte of the field. */
1553 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1554 &bitnum);
1555 else
1557 /* Convert from counting within OP0 to counting in EXT_MODE. */
1558 if (BYTES_BIG_ENDIAN)
1559 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1561 /* If op0 is a register, we need it in EXT_MODE to make it
1562 acceptable to the format of ext(z)v. */
1563 if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1564 return NULL_RTX;
1565 if (REG_P (op0) && op0_mode.require () != ext_mode)
1566 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1569 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1570 "backwards" from the size of the unit we are extracting from.
1571 Otherwise, we count bits from the most significant on a
1572 BYTES/BITS_BIG_ENDIAN machine. */
1574 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1575 bitnum = unit - bitsize - bitnum;
1577 if (target == 0)
1578 target = spec_target = gen_reg_rtx (tmode);
1580 if (GET_MODE (target) != ext_mode)
1582 rtx temp;
1583 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1584 between the mode of the extraction (word_mode) and the target
1585 mode. Instead, create a temporary and use convert_move to set
1586 the target. */
1587 if (REG_P (target)
1588 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1589 && (temp = gen_lowpart_if_possible (ext_mode, target)))
1591 target = temp;
1592 if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1593 spec_target_subreg = target;
1595 else
1596 target = gen_reg_rtx (ext_mode);
1599 create_output_operand (&ops[0], target, ext_mode);
1600 create_fixed_operand (&ops[1], op0);
1601 create_integer_operand (&ops[2], bitsize);
1602 create_integer_operand (&ops[3], bitnum);
1603 if (maybe_expand_insn (extv->icode, 4, ops))
1605 target = ops[0].value;
1606 if (target == spec_target)
1607 return target;
1608 if (target == spec_target_subreg)
1609 return spec_target;
1610 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1612 return NULL_RTX;
1615 /* See whether it would be valid to extract the part of OP0 with
1616 mode OP0_MODE described by BITNUM and BITSIZE into a value of
1617 mode MODE using a subreg operation.
1618 Return the subreg if so, otherwise return null. */
1620 static rtx
1621 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1622 machine_mode op0_mode,
1623 poly_uint64 bitsize, poly_uint64 bitnum)
1625 poly_uint64 bytenum;
1626 if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1627 && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1628 && lowpart_bit_field_p (bitnum, bitsize, op0_mode)
1629 && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode))
1630 return simplify_gen_subreg (mode, op0, op0_mode, bytenum);
1631 return NULL_RTX;
1634 /* A subroutine of extract_bit_field, with the same arguments.
1635 If UNSIGNEDP is -1, the result need not be sign or zero extended.
1636 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1637 if we can find no other means of implementing the operation.
1638 if FALLBACK_P is false, return NULL instead. */
1640 static rtx
1641 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1642 int unsignedp, rtx target, machine_mode mode,
1643 machine_mode tmode, bool reverse, bool fallback_p,
1644 rtx *alt_rtl)
1646 rtx op0 = str_rtx;
1647 machine_mode mode1;
1649 if (tmode == VOIDmode)
1650 tmode = mode;
1652 while (GET_CODE (op0) == SUBREG)
1654 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1655 op0 = SUBREG_REG (op0);
1658 /* If we have an out-of-bounds access to a register, just return an
1659 uninitialized register of the required mode. This can occur if the
1660 source code contains an out-of-bounds access to a small array. */
1661 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1662 return gen_reg_rtx (tmode);
1664 if (REG_P (op0)
1665 && mode == GET_MODE (op0)
1666 && known_eq (bitnum, 0U)
1667 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1669 if (reverse)
1670 op0 = flip_storage_order (mode, op0);
1671 /* We're trying to extract a full register from itself. */
1672 return op0;
1675 /* First try to check for vector from vector extractions. */
1676 if (VECTOR_MODE_P (GET_MODE (op0))
1677 && !MEM_P (op0)
1678 && VECTOR_MODE_P (tmode)
1679 && known_eq (bitsize, GET_MODE_PRECISION (tmode))
1680 && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1682 machine_mode new_mode = GET_MODE (op0);
1683 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1685 scalar_mode inner_mode = GET_MODE_INNER (tmode);
1686 poly_uint64 nunits;
1687 if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1688 GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1689 || !related_vector_mode (tmode, inner_mode,
1690 nunits).exists (&new_mode)
1691 || maybe_ne (GET_MODE_SIZE (new_mode),
1692 GET_MODE_SIZE (GET_MODE (op0))))
1693 new_mode = VOIDmode;
1695 poly_uint64 pos;
1696 if (new_mode != VOIDmode
1697 && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1698 != CODE_FOR_nothing)
1699 && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1701 class expand_operand ops[3];
1702 machine_mode outermode = new_mode;
1703 machine_mode innermode = tmode;
1704 enum insn_code icode
1705 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1707 if (new_mode != GET_MODE (op0))
1708 op0 = gen_lowpart (new_mode, op0);
1709 create_output_operand (&ops[0], target, innermode);
1710 ops[0].target = 1;
1711 create_input_operand (&ops[1], op0, outermode);
1712 create_integer_operand (&ops[2], pos);
1713 if (maybe_expand_insn (icode, 3, ops))
1715 if (alt_rtl && ops[0].target)
1716 *alt_rtl = target;
1717 target = ops[0].value;
1718 if (GET_MODE (target) != mode)
1719 return gen_lowpart (tmode, target);
1720 return target;
1725 /* See if we can get a better vector mode before extracting. */
1726 if (VECTOR_MODE_P (GET_MODE (op0))
1727 && !MEM_P (op0)
1728 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1730 machine_mode new_mode;
1732 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1733 new_mode = MIN_MODE_VECTOR_FLOAT;
1734 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1735 new_mode = MIN_MODE_VECTOR_FRACT;
1736 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1737 new_mode = MIN_MODE_VECTOR_UFRACT;
1738 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1739 new_mode = MIN_MODE_VECTOR_ACCUM;
1740 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1741 new_mode = MIN_MODE_VECTOR_UACCUM;
1742 else
1743 new_mode = MIN_MODE_VECTOR_INT;
1745 FOR_EACH_MODE_FROM (new_mode, new_mode)
1746 if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1747 && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1748 && known_eq (bitsize, GET_MODE_UNIT_PRECISION (new_mode))
1749 && multiple_p (bitnum, GET_MODE_UNIT_PRECISION (new_mode))
1750 && targetm.vector_mode_supported_p (new_mode)
1751 && targetm.modes_tieable_p (GET_MODE (op0), new_mode))
1752 break;
1753 if (new_mode != VOIDmode)
1754 op0 = gen_lowpart (new_mode, op0);
1757 /* Use vec_extract patterns for extracting parts of vectors whenever
1758 available. If that fails, see whether the current modes and bitregion
1759 give a natural subreg. */
1760 machine_mode outermode = GET_MODE (op0);
1761 if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1763 scalar_mode innermode = GET_MODE_INNER (outermode);
1765 enum insn_code icode
1766 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1768 poly_uint64 pos;
1769 if (icode != CODE_FOR_nothing
1770 && known_eq (bitsize, GET_MODE_PRECISION (innermode))
1771 && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
1773 class expand_operand ops[3];
1775 create_output_operand (&ops[0], target,
1776 insn_data[icode].operand[0].mode);
1777 ops[0].target = 1;
1778 create_input_operand (&ops[1], op0, outermode);
1779 create_integer_operand (&ops[2], pos);
1780 if (maybe_expand_insn (icode, 3, ops))
1782 if (alt_rtl && ops[0].target)
1783 *alt_rtl = target;
1784 target = ops[0].value;
1785 if (GET_MODE (target) != mode)
1786 return gen_lowpart (tmode, target);
1787 return target;
1790 /* Using subregs is useful if we're extracting one register vector
1791 from a multi-register vector. extract_bit_field_as_subreg checks
1792 for valid bitsize and bitnum, so we don't need to do that here. */
1793 if (VECTOR_MODE_P (mode))
1795 rtx sub = extract_bit_field_as_subreg (mode, op0, outermode,
1796 bitsize, bitnum);
1797 if (sub)
1798 return sub;
1802 /* Make sure we are playing with integral modes. Pun with subregs
1803 if we aren't. */
1804 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1805 scalar_int_mode imode;
1806 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1808 if (MEM_P (op0))
1809 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1810 0, MEM_SIZE (op0));
1811 else if (op0_mode.exists (&imode))
1813 op0 = gen_lowpart (imode, op0);
1815 /* If we got a SUBREG, force it into a register since we
1816 aren't going to be able to do another SUBREG on it. */
1817 if (GET_CODE (op0) == SUBREG)
1818 op0 = force_reg (imode, op0);
1820 else
1822 poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1823 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1824 emit_move_insn (mem, op0);
1825 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1829 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1830 If that's wrong, the solution is to test for it and set TARGET to 0
1831 if needed. */
1833 /* Get the mode of the field to use for atomic access or subreg
1834 conversion. */
1835 if (!SCALAR_INT_MODE_P (tmode)
1836 || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1837 mode1 = mode;
1838 gcc_assert (mode1 != BLKmode);
1840 /* Extraction of a full MODE1 value can be done with a subreg as long
1841 as the least significant bit of the value is the least significant
1842 bit of either OP0 or a word of OP0. */
1843 if (!MEM_P (op0) && !reverse && op0_mode.exists (&imode))
1845 rtx sub = extract_bit_field_as_subreg (mode1, op0, imode,
1846 bitsize, bitnum);
1847 if (sub)
1848 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1851 /* Extraction of a full MODE1 value can be done with a load as long as
1852 the field is on a byte boundary and is sufficiently aligned. */
1853 poly_uint64 bytenum;
1854 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1856 op0 = adjust_bitfield_address (op0, mode1, bytenum);
1857 if (reverse)
1858 op0 = flip_storage_order (mode1, op0);
1859 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1862 /* If we have a memory source and a non-constant bit offset, restrict
1863 the memory to the referenced bytes. This is a worst-case fallback
1864 but is useful for things like vector booleans. */
1865 if (MEM_P (op0) && !bitnum.is_constant ())
1867 bytenum = bits_to_bytes_round_down (bitnum);
1868 bitnum = num_trailing_bits (bitnum);
1869 poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1870 op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1871 op0_mode = opt_scalar_int_mode ();
1874 /* It's possible we'll need to handle other cases here for
1875 polynomial bitnum and bitsize. */
1877 /* From here on we need to be looking at a fixed-size insertion. */
1878 return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1879 bitnum.to_constant (), unsignedp,
1880 target, mode, tmode, reverse, fallback_p);
1883 /* Subroutine of extract_bit_field_1, with the same arguments, except
1884 that BITSIZE and BITNUM are constant. Handle cases specific to
1885 integral modes. If OP0_MODE is defined, it is the mode of OP0,
1886 otherwise OP0 is a BLKmode MEM. */
1888 static rtx
1889 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1890 unsigned HOST_WIDE_INT bitsize,
1891 unsigned HOST_WIDE_INT bitnum, int unsignedp,
1892 rtx target, machine_mode mode, machine_mode tmode,
1893 bool reverse, bool fallback_p)
1895 /* Handle fields bigger than a word. */
1897 if (bitsize > BITS_PER_WORD)
1899 /* Here we transfer the words of the field
1900 in the order least significant first.
1901 This is because the most significant word is the one which may
1902 be less than full. */
1904 const bool backwards = WORDS_BIG_ENDIAN;
1905 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1906 unsigned int i;
1907 rtx_insn *last;
1909 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1910 target = gen_reg_rtx (mode);
1912 /* In case we're about to clobber a base register or something
1913 (see gcc.c-torture/execute/20040625-1.c). */
1914 if (reg_mentioned_p (target, op0))
1915 target = gen_reg_rtx (mode);
1917 /* Indicate for flow that the entire target reg is being set. */
1918 emit_clobber (target);
1920 /* The mode must be fixed-size, since extract_bit_field_1 handles
1921 extractions from variable-sized objects before calling this
1922 function. */
1923 unsigned int target_size
1924 = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1925 last = get_last_insn ();
1926 for (i = 0; i < nwords; i++)
1928 /* If I is 0, use the low-order word in both field and target;
1929 if I is 1, use the next to lowest word; and so on. */
1930 /* Word number in TARGET to use. */
1931 unsigned int wordnum
1932 = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1933 /* Offset from start of field in OP0. */
1934 unsigned int bit_offset = (backwards ^ reverse
1935 ? MAX ((int) bitsize - ((int) i + 1)
1936 * BITS_PER_WORD,
1938 : (int) i * BITS_PER_WORD);
1939 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1940 rtx result_part
1941 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1942 bitsize - i * BITS_PER_WORD),
1943 bitnum + bit_offset,
1944 (unsignedp ? 1 : -1), target_part,
1945 mode, word_mode, reverse, fallback_p, NULL);
1947 gcc_assert (target_part);
1948 if (!result_part)
1950 delete_insns_since (last);
1951 return NULL;
1954 if (result_part != target_part)
1955 emit_move_insn (target_part, result_part);
1958 if (unsignedp)
1960 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1961 need to be zero'd out. */
1962 if (target_size > nwords * UNITS_PER_WORD)
1964 unsigned int i, total_words;
1966 total_words = target_size / UNITS_PER_WORD;
1967 for (i = nwords; i < total_words; i++)
1968 emit_move_insn
1969 (operand_subword (target,
1970 backwards ? total_words - i - 1 : i,
1971 1, VOIDmode),
1972 const0_rtx);
1974 return target;
1977 /* Signed bit field: sign-extend with two arithmetic shifts. */
1978 target = expand_shift (LSHIFT_EXPR, mode, target,
1979 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1980 return expand_shift (RSHIFT_EXPR, mode, target,
1981 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1984 /* If OP0 is a multi-word register, narrow it to the affected word.
1985 If the region spans two words, defer to extract_split_bit_field. */
1986 if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1988 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1990 if (!fallback_p)
1991 return NULL_RTX;
1992 target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1993 unsignedp, reverse);
1994 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1996 /* If OP0 is a hard register, copy it to a pseudo before calling
1997 simplify_gen_subreg. */
1998 if (REG_P (op0) && HARD_REGISTER_P (op0))
1999 op0 = copy_to_reg (op0);
2000 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
2001 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
2002 op0_mode = word_mode;
2003 bitnum %= BITS_PER_WORD;
2006 /* From here on we know the desired field is smaller than a word.
2007 If OP0 is a register, it too fits within a word. */
2008 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
2009 extraction_insn extv;
2010 if (!MEM_P (op0)
2011 && !reverse
2012 /* ??? We could limit the structure size to the part of OP0 that
2013 contains the field, with appropriate checks for endianness
2014 and TARGET_TRULY_NOOP_TRUNCATION. */
2015 && get_best_reg_extraction_insn (&extv, pattern,
2016 GET_MODE_BITSIZE (op0_mode.require ()),
2017 tmode))
2019 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2020 bitsize, bitnum,
2021 unsignedp, target, mode,
2022 tmode);
2023 if (result)
2024 return result;
2027 /* If OP0 is a memory, try copying it to a register and seeing if a
2028 cheap register alternative is available. */
2029 if (MEM_P (op0) & !reverse)
2031 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2032 tmode))
2034 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2035 bitsize, bitnum,
2036 unsignedp, target, mode,
2037 tmode);
2038 if (result)
2039 return result;
2042 rtx_insn *last = get_last_insn ();
2044 /* Try loading part of OP0 into a register and extracting the
2045 bitfield from that. */
2046 unsigned HOST_WIDE_INT bitpos;
2047 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2048 0, 0, tmode, &bitpos);
2049 if (xop0)
2051 xop0 = copy_to_reg (xop0);
2052 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2053 unsignedp, target,
2054 mode, tmode, reverse, false, NULL);
2055 if (result)
2056 return result;
2057 delete_insns_since (last);
2061 if (!fallback_p)
2062 return NULL;
2064 /* Find a correspondingly-sized integer field, so we can apply
2065 shifts and masks to it. */
2066 scalar_int_mode int_mode;
2067 if (!int_mode_for_mode (tmode).exists (&int_mode))
2068 /* If this fails, we should probably push op0 out to memory and then
2069 do a load. */
2070 int_mode = int_mode_for_mode (mode).require ();
2072 target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2073 bitnum, target, unsignedp, reverse);
2075 /* Complex values must be reversed piecewise, so we need to undo the global
2076 reversal, convert to the complex mode and reverse again. */
2077 if (reverse && COMPLEX_MODE_P (tmode))
2079 target = flip_storage_order (int_mode, target);
2080 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2081 target = flip_storage_order (tmode, target);
2083 else
2084 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2086 return target;
2089 /* Generate code to extract a byte-field from STR_RTX
2090 containing BITSIZE bits, starting at BITNUM,
2091 and put it in TARGET if possible (if TARGET is nonzero).
2092 Regardless of TARGET, we return the rtx for where the value is placed.
2094 STR_RTX is the structure containing the byte (a REG or MEM).
2095 UNSIGNEDP is nonzero if this is an unsigned bit field.
2096 MODE is the natural mode of the field value once extracted.
2097 TMODE is the mode the caller would like the value to have;
2098 but the value may be returned with type MODE instead.
2100 If REVERSE is true, the extraction is to be done in reverse order.
2102 If a TARGET is specified and we can store in it at no extra cost,
2103 we do so, and return TARGET.
2104 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2105 if they are equally easy.
2107 If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2108 then *ALT_RTL is set to TARGET (before legitimziation). */
2111 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2112 int unsignedp, rtx target, machine_mode mode,
2113 machine_mode tmode, bool reverse, rtx *alt_rtl)
2115 machine_mode mode1;
2117 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
2118 if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2119 mode1 = GET_MODE (str_rtx);
2120 else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2121 mode1 = GET_MODE (target);
2122 else
2123 mode1 = tmode;
2125 unsigned HOST_WIDE_INT ibitsize, ibitnum;
2126 scalar_int_mode int_mode;
2127 if (bitsize.is_constant (&ibitsize)
2128 && bitnum.is_constant (&ibitnum)
2129 && is_a <scalar_int_mode> (mode1, &int_mode)
2130 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2131 int_mode, 0, 0))
2133 /* Extraction of a full INT_MODE value can be done with a simple load.
2134 We know here that the field can be accessed with one single
2135 instruction. For targets that support unaligned memory,
2136 an unaligned access may be necessary. */
2137 if (ibitsize == GET_MODE_BITSIZE (int_mode))
2139 rtx result = adjust_bitfield_address (str_rtx, int_mode,
2140 ibitnum / BITS_PER_UNIT);
2141 if (reverse)
2142 result = flip_storage_order (int_mode, result);
2143 gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2144 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2147 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2148 &ibitnum);
2149 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2150 str_rtx = copy_to_reg (str_rtx);
2151 return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2152 target, mode, tmode, reverse, true, alt_rtl);
2155 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2156 target, mode, tmode, reverse, true, alt_rtl);
2159 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2160 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
2161 otherwise OP0 is a BLKmode MEM.
2163 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2164 If REVERSE is true, the extraction is to be done in reverse order.
2166 If TARGET is nonzero, attempts to store the value there
2167 and return TARGET, but this is not guaranteed.
2168 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2170 static rtx
2171 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2172 opt_scalar_int_mode op0_mode,
2173 unsigned HOST_WIDE_INT bitsize,
2174 unsigned HOST_WIDE_INT bitnum, rtx target,
2175 int unsignedp, bool reverse)
2177 scalar_int_mode mode;
2178 if (MEM_P (op0))
2180 if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2181 BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2182 /* The only way this should occur is if the field spans word
2183 boundaries. */
2184 return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2185 unsignedp, reverse);
2187 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2189 else
2190 mode = op0_mode.require ();
2192 return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2193 target, unsignedp, reverse);
2196 /* Helper function for extract_fixed_bit_field, extracts
2197 the bit field always using MODE, which is the mode of OP0.
2198 If UNSIGNEDP is -1, the result need not be sign or zero extended.
2199 The other arguments are as for extract_fixed_bit_field. */
2201 static rtx
2202 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2203 unsigned HOST_WIDE_INT bitsize,
2204 unsigned HOST_WIDE_INT bitnum, rtx target,
2205 int unsignedp, bool reverse)
2207 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2208 for invalid input, such as extract equivalent of f5 from
2209 gcc.dg/pr48335-2.c. */
2211 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2212 /* BITNUM is the distance between our msb and that of OP0.
2213 Convert it to the distance from the lsb. */
2214 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2216 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2217 We have reduced the big-endian case to the little-endian case. */
2218 if (reverse)
2219 op0 = flip_storage_order (mode, op0);
2221 if (unsignedp)
2223 if (bitnum)
2225 /* If the field does not already start at the lsb,
2226 shift it so it does. */
2227 /* Maybe propagate the target for the shift. */
2228 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2229 if (tmode != mode)
2230 subtarget = 0;
2231 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2233 /* Convert the value to the desired mode. TMODE must also be a
2234 scalar integer for this conversion to make sense, since we
2235 shouldn't reinterpret the bits. */
2236 scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2237 if (mode != new_mode)
2238 op0 = convert_to_mode (new_mode, op0, 1);
2240 /* Unless the msb of the field used to be the msb when we shifted,
2241 mask out the upper bits. */
2243 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize
2244 && unsignedp != -1)
2245 return expand_binop (new_mode, and_optab, op0,
2246 mask_rtx (new_mode, 0, bitsize, 0),
2247 target, 1, OPTAB_LIB_WIDEN);
2248 return op0;
2251 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2252 then arithmetic-shift its lsb to the lsb of the word. */
2253 op0 = force_reg (mode, op0);
2255 /* Find the narrowest integer mode that contains the field. */
2257 opt_scalar_int_mode mode_iter;
2258 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2259 if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2260 break;
2262 mode = mode_iter.require ();
2263 op0 = convert_to_mode (mode, op0, 0);
2265 if (mode != tmode)
2266 target = 0;
2268 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2270 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2271 /* Maybe propagate the target for the shift. */
2272 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2273 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2276 return expand_shift (RSHIFT_EXPR, mode, op0,
2277 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2280 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2281 VALUE << BITPOS. */
2283 static rtx
2284 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2285 int bitpos)
2287 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2290 /* Extract a bit field that is split across two words
2291 and return an RTX for the result.
2293 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2294 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2295 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2296 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2297 a BLKmode MEM.
2299 If REVERSE is true, the extraction is to be done in reverse order. */
2301 static rtx
2302 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2303 unsigned HOST_WIDE_INT bitsize,
2304 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2305 bool reverse)
2307 unsigned int unit;
2308 unsigned int bitsdone = 0;
2309 rtx result = NULL_RTX;
2310 int first = 1;
2312 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2313 much at a time. */
2314 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2315 unit = BITS_PER_WORD;
2316 else
2317 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2319 while (bitsdone < bitsize)
2321 unsigned HOST_WIDE_INT thissize;
2322 rtx part;
2323 unsigned HOST_WIDE_INT thispos;
2324 unsigned HOST_WIDE_INT offset;
2326 offset = (bitpos + bitsdone) / unit;
2327 thispos = (bitpos + bitsdone) % unit;
2329 /* THISSIZE must not overrun a word boundary. Otherwise,
2330 extract_fixed_bit_field will call us again, and we will mutually
2331 recurse forever. */
2332 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2333 thissize = MIN (thissize, unit - thispos);
2335 /* If OP0 is a register, then handle OFFSET here. */
2336 rtx op0_piece = op0;
2337 opt_scalar_int_mode op0_piece_mode = op0_mode;
2338 if (SUBREG_P (op0) || REG_P (op0))
2340 op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2341 op0_piece_mode = word_mode;
2342 offset = 0;
2345 /* Extract the parts in bit-counting order,
2346 whose meaning is determined by BYTES_PER_UNIT.
2347 OFFSET is in UNITs, and UNIT is in bits. */
2348 part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2349 thissize, offset * unit + thispos,
2350 0, 1, reverse);
2351 bitsdone += thissize;
2353 /* Shift this part into place for the result. */
2354 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2356 if (bitsize != bitsdone)
2357 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2358 bitsize - bitsdone, 0, 1);
2360 else
2362 if (bitsdone != thissize)
2363 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2364 bitsdone - thissize, 0, 1);
2367 if (first)
2368 result = part;
2369 else
2370 /* Combine the parts with bitwise or. This works
2371 because we extracted each part as an unsigned bit field. */
2372 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2373 OPTAB_LIB_WIDEN);
2375 first = 0;
2378 /* Unsigned bit field: we are done. */
2379 if (unsignedp)
2380 return result;
2381 /* Signed bit field: sign-extend with two arithmetic shifts. */
2382 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2383 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2384 return expand_shift (RSHIFT_EXPR, word_mode, result,
2385 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2388 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2389 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2390 MODE, fill the upper bits with zeros. Fail if the layout of either
2391 mode is unknown (as for CC modes) or if the extraction would involve
2392 unprofitable mode punning. Return the value on success, otherwise
2393 return null.
2395 This is different from gen_lowpart* in these respects:
2397 - the returned value must always be considered an rvalue
2399 - when MODE is wider than SRC_MODE, the extraction involves
2400 a zero extension
2402 - when MODE is smaller than SRC_MODE, the extraction involves
2403 a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2405 In other words, this routine performs a computation, whereas the
2406 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2407 operations. */
2410 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2412 scalar_int_mode int_mode, src_int_mode;
2414 if (mode == src_mode)
2415 return src;
2417 if (CONSTANT_P (src))
2419 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2420 fails, it will happily create (subreg (symbol_ref)) or similar
2421 invalid SUBREGs. */
2422 poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2423 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2424 if (ret)
2425 return ret;
2427 if (GET_MODE (src) == VOIDmode
2428 || !validate_subreg (mode, src_mode, src, byte))
2429 return NULL_RTX;
2431 src = force_reg (GET_MODE (src), src);
2432 return gen_rtx_SUBREG (mode, src, byte);
2435 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2436 return NULL_RTX;
2438 if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2439 && targetm.modes_tieable_p (mode, src_mode))
2441 rtx x = gen_lowpart_common (mode, src);
2442 if (x)
2443 return x;
2446 if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2447 || !int_mode_for_mode (mode).exists (&int_mode))
2448 return NULL_RTX;
2450 if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2451 return NULL_RTX;
2452 if (!targetm.modes_tieable_p (int_mode, mode))
2453 return NULL_RTX;
2455 src = gen_lowpart (src_int_mode, src);
2456 if (!validate_subreg (int_mode, src_int_mode, src,
2457 subreg_lowpart_offset (int_mode, src_int_mode)))
2458 return NULL_RTX;
2460 src = convert_modes (int_mode, src_int_mode, src, true);
2461 src = gen_lowpart (mode, src);
2462 return src;
2465 /* Add INC into TARGET. */
2467 void
2468 expand_inc (rtx target, rtx inc)
2470 rtx value = expand_binop (GET_MODE (target), add_optab,
2471 target, inc,
2472 target, 0, OPTAB_LIB_WIDEN);
2473 if (value != target)
2474 emit_move_insn (target, value);
2477 /* Subtract DEC from TARGET. */
2479 void
2480 expand_dec (rtx target, rtx dec)
2482 rtx value = expand_binop (GET_MODE (target), sub_optab,
2483 target, dec,
2484 target, 0, OPTAB_LIB_WIDEN);
2485 if (value != target)
2486 emit_move_insn (target, value);
2489 /* Output a shift instruction for expression code CODE,
2490 with SHIFTED being the rtx for the value to shift,
2491 and AMOUNT the rtx for the amount to shift by.
2492 Store the result in the rtx TARGET, if that is convenient.
2493 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2494 Return the rtx for where the value is.
2495 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2496 in which case 0 is returned. */
2498 static rtx
2499 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2500 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2502 rtx op1, temp = 0;
2503 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2504 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2505 optab lshift_optab = ashl_optab;
2506 optab rshift_arith_optab = ashr_optab;
2507 optab rshift_uns_optab = lshr_optab;
2508 optab lrotate_optab = rotl_optab;
2509 optab rrotate_optab = rotr_optab;
2510 machine_mode op1_mode;
2511 scalar_mode scalar_mode = GET_MODE_INNER (mode);
2512 int attempt;
2513 bool speed = optimize_insn_for_speed_p ();
2515 op1 = amount;
2516 op1_mode = GET_MODE (op1);
2518 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2519 shift amount is a vector, use the vector/vector shift patterns. */
2520 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2522 lshift_optab = vashl_optab;
2523 rshift_arith_optab = vashr_optab;
2524 rshift_uns_optab = vlshr_optab;
2525 lrotate_optab = vrotl_optab;
2526 rrotate_optab = vrotr_optab;
2529 /* Previously detected shift-counts computed by NEGATE_EXPR
2530 and shifted in the other direction; but that does not work
2531 on all machines. */
2533 if (SHIFT_COUNT_TRUNCATED)
2535 if (CONST_INT_P (op1)
2536 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2537 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2538 op1 = gen_int_shift_amount (mode,
2539 (unsigned HOST_WIDE_INT) INTVAL (op1)
2540 % GET_MODE_BITSIZE (scalar_mode));
2541 else if (GET_CODE (op1) == SUBREG
2542 && subreg_lowpart_p (op1)
2543 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2544 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2545 op1 = SUBREG_REG (op1);
2548 /* Canonicalize rotates by constant amount. We may canonicalize
2549 to reduce the immediate or if the ISA can rotate by constants
2550 in only on direction. */
2551 if (rotate && reverse_rotate_by_imm_p (scalar_mode, left, op1))
2553 op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2554 - INTVAL (op1)));
2555 left = !left;
2556 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2559 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2560 Note that this is not the case for bigger values. For instance a rotation
2561 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2562 0x04030201 (bswapsi). */
2563 if (rotate
2564 && CONST_INT_P (op1)
2565 && INTVAL (op1) == BITS_PER_UNIT
2566 && GET_MODE_SIZE (scalar_mode) == 2
2567 && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2568 return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2570 if (op1 == const0_rtx)
2571 return shifted;
2573 /* Check whether its cheaper to implement a left shift by a constant
2574 bit count by a sequence of additions. */
2575 if (code == LSHIFT_EXPR
2576 && CONST_INT_P (op1)
2577 && INTVAL (op1) > 0
2578 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2579 && INTVAL (op1) < MAX_BITS_PER_WORD
2580 && (shift_cost (speed, mode, INTVAL (op1))
2581 > INTVAL (op1) * add_cost (speed, mode))
2582 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2584 int i;
2585 for (i = 0; i < INTVAL (op1); i++)
2587 temp = force_reg (mode, shifted);
2588 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2589 unsignedp, OPTAB_LIB_WIDEN);
2591 return shifted;
2594 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2596 enum optab_methods methods;
2598 if (attempt == 0)
2599 methods = OPTAB_DIRECT;
2600 else if (attempt == 1)
2601 methods = OPTAB_WIDEN;
2602 else
2603 methods = OPTAB_LIB_WIDEN;
2605 if (rotate)
2607 /* Widening does not work for rotation. */
2608 if (methods == OPTAB_WIDEN)
2609 continue;
2610 else if (methods == OPTAB_LIB_WIDEN)
2612 /* If we have been unable to open-code this by a rotation,
2613 do it as the IOR of two shifts. I.e., to rotate A
2614 by N bits, compute
2615 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2616 where C is the bitsize of A.
2618 It is theoretically possible that the target machine might
2619 not be able to perform either shift and hence we would
2620 be making two libcalls rather than just the one for the
2621 shift (similarly if IOR could not be done). We will allow
2622 this extremely unlikely lossage to avoid complicating the
2623 code below. */
2625 rtx subtarget = target == shifted ? 0 : target;
2626 rtx new_amount, other_amount;
2627 rtx temp1;
2629 new_amount = op1;
2630 if (op1 == const0_rtx)
2631 return shifted;
2632 else if (CONST_INT_P (op1))
2633 other_amount = gen_int_shift_amount
2634 (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2635 else
2637 other_amount
2638 = simplify_gen_unary (NEG, GET_MODE (op1),
2639 op1, GET_MODE (op1));
2640 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2641 other_amount
2642 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2643 gen_int_mode (mask, GET_MODE (op1)));
2646 shifted = force_reg (mode, shifted);
2648 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2649 mode, shifted, new_amount, 0, 1);
2650 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2651 mode, shifted, other_amount,
2652 subtarget, 1);
2653 return expand_binop (mode, ior_optab, temp, temp1, target,
2654 unsignedp, methods);
2657 temp = expand_binop (mode,
2658 left ? lrotate_optab : rrotate_optab,
2659 shifted, op1, target, unsignedp, methods);
2661 else if (unsignedp)
2662 temp = expand_binop (mode,
2663 left ? lshift_optab : rshift_uns_optab,
2664 shifted, op1, target, unsignedp, methods);
2666 /* Do arithmetic shifts.
2667 Also, if we are going to widen the operand, we can just as well
2668 use an arithmetic right-shift instead of a logical one. */
2669 if (temp == 0 && ! rotate
2670 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2672 enum optab_methods methods1 = methods;
2674 /* If trying to widen a log shift to an arithmetic shift,
2675 don't accept an arithmetic shift of the same size. */
2676 if (unsignedp)
2677 methods1 = OPTAB_MUST_WIDEN;
2679 /* Arithmetic shift */
2681 temp = expand_binop (mode,
2682 left ? lshift_optab : rshift_arith_optab,
2683 shifted, op1, target, unsignedp, methods1);
2686 /* We used to try extzv here for logical right shifts, but that was
2687 only useful for one machine, the VAX, and caused poor code
2688 generation there for lshrdi3, so the code was deleted and a
2689 define_expand for lshrsi3 was added to vax.md. */
2692 gcc_assert (temp != NULL_RTX || may_fail);
2693 return temp;
2696 /* Output a shift instruction for expression code CODE,
2697 with SHIFTED being the rtx for the value to shift,
2698 and AMOUNT the amount to shift by.
2699 Store the result in the rtx TARGET, if that is convenient.
2700 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2701 Return the rtx for where the value is. */
2704 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2705 poly_int64 amount, rtx target, int unsignedp)
2707 return expand_shift_1 (code, mode, shifted,
2708 gen_int_shift_amount (mode, amount),
2709 target, unsignedp);
2712 /* Likewise, but return 0 if that cannot be done. */
2715 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2716 int amount, rtx target, int unsignedp)
2718 return expand_shift_1 (code, mode,
2719 shifted, GEN_INT (amount), target, unsignedp, true);
2722 /* Output a shift instruction for expression code CODE,
2723 with SHIFTED being the rtx for the value to shift,
2724 and AMOUNT the tree for the amount to shift by.
2725 Store the result in the rtx TARGET, if that is convenient.
2726 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2727 Return the rtx for where the value is. */
2730 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2731 tree amount, rtx target, int unsignedp)
2733 return expand_shift_1 (code, mode,
2734 shifted, expand_normal (amount), target, unsignedp);
2738 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2739 const struct mult_cost *, machine_mode mode);
2740 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2741 const struct algorithm *, enum mult_variant);
2742 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2743 static rtx extract_high_half (scalar_int_mode, rtx);
2744 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2745 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2746 int, int);
2747 /* Compute and return the best algorithm for multiplying by T.
2748 The algorithm must cost less than cost_limit
2749 If retval.cost >= COST_LIMIT, no algorithm was found and all
2750 other field of the returned struct are undefined.
2751 MODE is the machine mode of the multiplication. */
2753 static void
2754 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2755 const struct mult_cost *cost_limit, machine_mode mode)
2757 int m;
2758 struct algorithm *alg_in, *best_alg;
2759 struct mult_cost best_cost;
2760 struct mult_cost new_limit;
2761 int op_cost, op_latency;
2762 unsigned HOST_WIDE_INT orig_t = t;
2763 unsigned HOST_WIDE_INT q;
2764 int maxm, hash_index;
2765 bool cache_hit = false;
2766 enum alg_code cache_alg = alg_zero;
2767 bool speed = optimize_insn_for_speed_p ();
2768 scalar_int_mode imode;
2769 struct alg_hash_entry *entry_ptr;
2771 /* Indicate that no algorithm is yet found. If no algorithm
2772 is found, this value will be returned and indicate failure. */
2773 alg_out->cost.cost = cost_limit->cost + 1;
2774 alg_out->cost.latency = cost_limit->latency + 1;
2776 if (cost_limit->cost < 0
2777 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2778 return;
2780 /* Be prepared for vector modes. */
2781 imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2783 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2785 /* Restrict the bits of "t" to the multiplication's mode. */
2786 t &= GET_MODE_MASK (imode);
2788 /* t == 1 can be done in zero cost. */
2789 if (t == 1)
2791 alg_out->ops = 1;
2792 alg_out->cost.cost = 0;
2793 alg_out->cost.latency = 0;
2794 alg_out->op[0] = alg_m;
2795 return;
2798 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2799 fail now. */
2800 if (t == 0)
2802 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2803 return;
2804 else
2806 alg_out->ops = 1;
2807 alg_out->cost.cost = zero_cost (speed);
2808 alg_out->cost.latency = zero_cost (speed);
2809 alg_out->op[0] = alg_zero;
2810 return;
2814 /* We'll be needing a couple extra algorithm structures now. */
2816 alg_in = XALLOCA (struct algorithm);
2817 best_alg = XALLOCA (struct algorithm);
2818 best_cost = *cost_limit;
2820 /* Compute the hash index. */
2821 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2823 /* See if we already know what to do for T. */
2824 entry_ptr = alg_hash_entry_ptr (hash_index);
2825 if (entry_ptr->t == t
2826 && entry_ptr->mode == mode
2827 && entry_ptr->speed == speed
2828 && entry_ptr->alg != alg_unknown)
2830 cache_alg = entry_ptr->alg;
2832 if (cache_alg == alg_impossible)
2834 /* The cache tells us that it's impossible to synthesize
2835 multiplication by T within entry_ptr->cost. */
2836 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2837 /* COST_LIMIT is at least as restrictive as the one
2838 recorded in the hash table, in which case we have no
2839 hope of synthesizing a multiplication. Just
2840 return. */
2841 return;
2843 /* If we get here, COST_LIMIT is less restrictive than the
2844 one recorded in the hash table, so we may be able to
2845 synthesize a multiplication. Proceed as if we didn't
2846 have the cache entry. */
2848 else
2850 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2851 /* The cached algorithm shows that this multiplication
2852 requires more cost than COST_LIMIT. Just return. This
2853 way, we don't clobber this cache entry with
2854 alg_impossible but retain useful information. */
2855 return;
2857 cache_hit = true;
2859 switch (cache_alg)
2861 case alg_shift:
2862 goto do_alg_shift;
2864 case alg_add_t_m2:
2865 case alg_sub_t_m2:
2866 goto do_alg_addsub_t_m2;
2868 case alg_add_factor:
2869 case alg_sub_factor:
2870 goto do_alg_addsub_factor;
2872 case alg_add_t2_m:
2873 goto do_alg_add_t2_m;
2875 case alg_sub_t2_m:
2876 goto do_alg_sub_t2_m;
2878 default:
2879 gcc_unreachable ();
2884 /* If we have a group of zero bits at the low-order part of T, try
2885 multiplying by the remaining bits and then doing a shift. */
2887 if ((t & 1) == 0)
2889 do_alg_shift:
2890 m = ctz_or_zero (t); /* m = number of low zero bits */
2891 if (m < maxm)
2893 q = t >> m;
2894 /* The function expand_shift will choose between a shift and
2895 a sequence of additions, so the observed cost is given as
2896 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2897 op_cost = m * add_cost (speed, mode);
2898 if (shift_cost (speed, mode, m) < op_cost)
2899 op_cost = shift_cost (speed, mode, m);
2900 new_limit.cost = best_cost.cost - op_cost;
2901 new_limit.latency = best_cost.latency - op_cost;
2902 synth_mult (alg_in, q, &new_limit, mode);
2904 alg_in->cost.cost += op_cost;
2905 alg_in->cost.latency += op_cost;
2906 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2908 best_cost = alg_in->cost;
2909 std::swap (alg_in, best_alg);
2910 best_alg->log[best_alg->ops] = m;
2911 best_alg->op[best_alg->ops] = alg_shift;
2914 /* See if treating ORIG_T as a signed number yields a better
2915 sequence. Try this sequence only for a negative ORIG_T
2916 as it would be useless for a non-negative ORIG_T. */
2917 if ((HOST_WIDE_INT) orig_t < 0)
2919 /* Shift ORIG_T as follows because a right shift of a
2920 negative-valued signed type is implementation
2921 defined. */
2922 q = ~(~orig_t >> m);
2923 /* The function expand_shift will choose between a shift
2924 and a sequence of additions, so the observed cost is
2925 given as MIN (m * add_cost(speed, mode),
2926 shift_cost(speed, mode, m)). */
2927 op_cost = m * add_cost (speed, mode);
2928 if (shift_cost (speed, mode, m) < op_cost)
2929 op_cost = shift_cost (speed, mode, m);
2930 new_limit.cost = best_cost.cost - op_cost;
2931 new_limit.latency = best_cost.latency - op_cost;
2932 synth_mult (alg_in, q, &new_limit, mode);
2934 alg_in->cost.cost += op_cost;
2935 alg_in->cost.latency += op_cost;
2936 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2938 best_cost = alg_in->cost;
2939 std::swap (alg_in, best_alg);
2940 best_alg->log[best_alg->ops] = m;
2941 best_alg->op[best_alg->ops] = alg_shift;
2945 if (cache_hit)
2946 goto done;
2949 /* If we have an odd number, add or subtract one. */
2950 if ((t & 1) != 0)
2952 unsigned HOST_WIDE_INT w;
2954 do_alg_addsub_t_m2:
2955 for (w = 1; (w & t) != 0; w <<= 1)
2957 /* If T was -1, then W will be zero after the loop. This is another
2958 case where T ends with ...111. Handling this with (T + 1) and
2959 subtract 1 produces slightly better code and results in algorithm
2960 selection much faster than treating it like the ...0111 case
2961 below. */
2962 if (w == 0
2963 || (w > 2
2964 /* Reject the case where t is 3.
2965 Thus we prefer addition in that case. */
2966 && t != 3))
2968 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2970 op_cost = add_cost (speed, mode);
2971 new_limit.cost = best_cost.cost - op_cost;
2972 new_limit.latency = best_cost.latency - op_cost;
2973 synth_mult (alg_in, t + 1, &new_limit, mode);
2975 alg_in->cost.cost += op_cost;
2976 alg_in->cost.latency += op_cost;
2977 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2979 best_cost = alg_in->cost;
2980 std::swap (alg_in, best_alg);
2981 best_alg->log[best_alg->ops] = 0;
2982 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2985 else
2987 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2989 op_cost = add_cost (speed, mode);
2990 new_limit.cost = best_cost.cost - op_cost;
2991 new_limit.latency = best_cost.latency - op_cost;
2992 synth_mult (alg_in, t - 1, &new_limit, mode);
2994 alg_in->cost.cost += op_cost;
2995 alg_in->cost.latency += op_cost;
2996 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2998 best_cost = alg_in->cost;
2999 std::swap (alg_in, best_alg);
3000 best_alg->log[best_alg->ops] = 0;
3001 best_alg->op[best_alg->ops] = alg_add_t_m2;
3005 /* We may be able to calculate a * -7, a * -15, a * -31, etc
3006 quickly with a - a * n for some appropriate constant n. */
3007 m = exact_log2 (-orig_t + 1);
3008 if (m >= 0 && m < maxm)
3010 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3011 /* If the target has a cheap shift-and-subtract insn use
3012 that in preference to a shift insn followed by a sub insn.
3013 Assume that the shift-and-sub is "atomic" with a latency
3014 equal to it's cost, otherwise assume that on superscalar
3015 hardware the shift may be executed concurrently with the
3016 earlier steps in the algorithm. */
3017 if (shiftsub1_cost (speed, mode, m) <= op_cost)
3019 op_cost = shiftsub1_cost (speed, mode, m);
3020 op_latency = op_cost;
3022 else
3023 op_latency = add_cost (speed, mode);
3025 new_limit.cost = best_cost.cost - op_cost;
3026 new_limit.latency = best_cost.latency - op_latency;
3027 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3028 &new_limit, mode);
3030 alg_in->cost.cost += op_cost;
3031 alg_in->cost.latency += op_latency;
3032 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3034 best_cost = alg_in->cost;
3035 std::swap (alg_in, best_alg);
3036 best_alg->log[best_alg->ops] = m;
3037 best_alg->op[best_alg->ops] = alg_sub_t_m2;
3041 if (cache_hit)
3042 goto done;
3045 /* Look for factors of t of the form
3046 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3047 If we find such a factor, we can multiply by t using an algorithm that
3048 multiplies by q, shift the result by m and add/subtract it to itself.
3050 We search for large factors first and loop down, even if large factors
3051 are less probable than small; if we find a large factor we will find a
3052 good sequence quickly, and therefore be able to prune (by decreasing
3053 COST_LIMIT) the search. */
3055 do_alg_addsub_factor:
3056 for (m = floor_log2 (t - 1); m >= 2; m--)
3058 unsigned HOST_WIDE_INT d;
3060 d = (HOST_WIDE_INT_1U << m) + 1;
3061 if (t % d == 0 && t > d && m < maxm
3062 && (!cache_hit || cache_alg == alg_add_factor))
3064 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3065 if (shiftadd_cost (speed, mode, m) <= op_cost)
3066 op_cost = shiftadd_cost (speed, mode, m);
3068 op_latency = op_cost;
3071 new_limit.cost = best_cost.cost - op_cost;
3072 new_limit.latency = best_cost.latency - op_latency;
3073 synth_mult (alg_in, t / d, &new_limit, mode);
3075 alg_in->cost.cost += op_cost;
3076 alg_in->cost.latency += op_latency;
3077 if (alg_in->cost.latency < op_cost)
3078 alg_in->cost.latency = op_cost;
3079 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3081 best_cost = alg_in->cost;
3082 std::swap (alg_in, best_alg);
3083 best_alg->log[best_alg->ops] = m;
3084 best_alg->op[best_alg->ops] = alg_add_factor;
3086 /* Other factors will have been taken care of in the recursion. */
3087 break;
3090 d = (HOST_WIDE_INT_1U << m) - 1;
3091 if (t % d == 0 && t > d && m < maxm
3092 && (!cache_hit || cache_alg == alg_sub_factor))
3094 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3095 if (shiftsub0_cost (speed, mode, m) <= op_cost)
3096 op_cost = shiftsub0_cost (speed, mode, m);
3098 op_latency = op_cost;
3100 new_limit.cost = best_cost.cost - op_cost;
3101 new_limit.latency = best_cost.latency - op_latency;
3102 synth_mult (alg_in, t / d, &new_limit, mode);
3104 alg_in->cost.cost += op_cost;
3105 alg_in->cost.latency += op_latency;
3106 if (alg_in->cost.latency < op_cost)
3107 alg_in->cost.latency = op_cost;
3108 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3110 best_cost = alg_in->cost;
3111 std::swap (alg_in, best_alg);
3112 best_alg->log[best_alg->ops] = m;
3113 best_alg->op[best_alg->ops] = alg_sub_factor;
3115 break;
3118 if (cache_hit)
3119 goto done;
3121 /* Try shift-and-add (load effective address) instructions,
3122 i.e. do a*3, a*5, a*9. */
3123 if ((t & 1) != 0)
3125 do_alg_add_t2_m:
3126 q = t - 1;
3127 m = ctz_hwi (q);
3128 if (q && m < maxm)
3130 op_cost = shiftadd_cost (speed, mode, m);
3131 new_limit.cost = best_cost.cost - op_cost;
3132 new_limit.latency = best_cost.latency - op_cost;
3133 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3135 alg_in->cost.cost += op_cost;
3136 alg_in->cost.latency += op_cost;
3137 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3139 best_cost = alg_in->cost;
3140 std::swap (alg_in, best_alg);
3141 best_alg->log[best_alg->ops] = m;
3142 best_alg->op[best_alg->ops] = alg_add_t2_m;
3145 if (cache_hit)
3146 goto done;
3148 do_alg_sub_t2_m:
3149 q = t + 1;
3150 m = ctz_hwi (q);
3151 if (q && m < maxm)
3153 op_cost = shiftsub0_cost (speed, mode, m);
3154 new_limit.cost = best_cost.cost - op_cost;
3155 new_limit.latency = best_cost.latency - op_cost;
3156 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3158 alg_in->cost.cost += op_cost;
3159 alg_in->cost.latency += op_cost;
3160 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3162 best_cost = alg_in->cost;
3163 std::swap (alg_in, best_alg);
3164 best_alg->log[best_alg->ops] = m;
3165 best_alg->op[best_alg->ops] = alg_sub_t2_m;
3168 if (cache_hit)
3169 goto done;
3172 done:
3173 /* If best_cost has not decreased, we have not found any algorithm. */
3174 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3176 /* We failed to find an algorithm. Record alg_impossible for
3177 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3178 we are asked to find an algorithm for T within the same or
3179 lower COST_LIMIT, we can immediately return to the
3180 caller. */
3181 entry_ptr->t = t;
3182 entry_ptr->mode = mode;
3183 entry_ptr->speed = speed;
3184 entry_ptr->alg = alg_impossible;
3185 entry_ptr->cost = *cost_limit;
3186 return;
3189 /* Cache the result. */
3190 if (!cache_hit)
3192 entry_ptr->t = t;
3193 entry_ptr->mode = mode;
3194 entry_ptr->speed = speed;
3195 entry_ptr->alg = best_alg->op[best_alg->ops];
3196 entry_ptr->cost.cost = best_cost.cost;
3197 entry_ptr->cost.latency = best_cost.latency;
3200 /* If we are getting a too long sequence for `struct algorithm'
3201 to record, make this search fail. */
3202 if (best_alg->ops == MAX_BITS_PER_WORD)
3203 return;
3205 /* Copy the algorithm from temporary space to the space at alg_out.
3206 We avoid using structure assignment because the majority of
3207 best_alg is normally undefined, and this is a critical function. */
3208 alg_out->ops = best_alg->ops + 1;
3209 alg_out->cost = best_cost;
3210 memcpy (alg_out->op, best_alg->op,
3211 alg_out->ops * sizeof *alg_out->op);
3212 memcpy (alg_out->log, best_alg->log,
3213 alg_out->ops * sizeof *alg_out->log);
3216 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3217 Try three variations:
3219 - a shift/add sequence based on VAL itself
3220 - a shift/add sequence based on -VAL, followed by a negation
3221 - a shift/add sequence based on VAL - 1, followed by an addition.
3223 Return true if the cheapest of these cost less than MULT_COST,
3224 describing the algorithm in *ALG and final fixup in *VARIANT. */
3226 bool
3227 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3228 struct algorithm *alg, enum mult_variant *variant,
3229 int mult_cost)
3231 struct algorithm alg2;
3232 struct mult_cost limit;
3233 int op_cost;
3234 bool speed = optimize_insn_for_speed_p ();
3236 /* Fail quickly for impossible bounds. */
3237 if (mult_cost < 0)
3238 return false;
3240 /* Ensure that mult_cost provides a reasonable upper bound.
3241 Any constant multiplication can be performed with less
3242 than 2 * bits additions. */
3243 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3244 if (mult_cost > op_cost)
3245 mult_cost = op_cost;
3247 *variant = basic_variant;
3248 limit.cost = mult_cost;
3249 limit.latency = mult_cost;
3250 synth_mult (alg, val, &limit, mode);
3252 /* This works only if the inverted value actually fits in an
3253 `unsigned int' */
3254 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3256 op_cost = neg_cost (speed, mode);
3257 if (MULT_COST_LESS (&alg->cost, mult_cost))
3259 limit.cost = alg->cost.cost - op_cost;
3260 limit.latency = alg->cost.latency - op_cost;
3262 else
3264 limit.cost = mult_cost - op_cost;
3265 limit.latency = mult_cost - op_cost;
3268 synth_mult (&alg2, -val, &limit, mode);
3269 alg2.cost.cost += op_cost;
3270 alg2.cost.latency += op_cost;
3271 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3272 *alg = alg2, *variant = negate_variant;
3275 /* This proves very useful for division-by-constant. */
3276 op_cost = add_cost (speed, mode);
3277 if (MULT_COST_LESS (&alg->cost, mult_cost))
3279 limit.cost = alg->cost.cost - op_cost;
3280 limit.latency = alg->cost.latency - op_cost;
3282 else
3284 limit.cost = mult_cost - op_cost;
3285 limit.latency = mult_cost - op_cost;
3288 if (val != HOST_WIDE_INT_MIN
3289 || GET_MODE_UNIT_PRECISION (mode) == HOST_BITS_PER_WIDE_INT)
3291 synth_mult (&alg2, val - HOST_WIDE_INT_1U, &limit, mode);
3292 alg2.cost.cost += op_cost;
3293 alg2.cost.latency += op_cost;
3294 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3295 *alg = alg2, *variant = add_variant;
3298 return MULT_COST_LESS (&alg->cost, mult_cost);
3301 /* A subroutine of expand_mult, used for constant multiplications.
3302 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3303 convenient. Use the shift/add sequence described by ALG and apply
3304 the final fixup specified by VARIANT. */
3306 static rtx
3307 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3308 rtx target, const struct algorithm *alg,
3309 enum mult_variant variant)
3311 unsigned HOST_WIDE_INT val_so_far;
3312 rtx_insn *insn;
3313 rtx accum, tem;
3314 int opno;
3315 machine_mode nmode;
3317 /* Avoid referencing memory over and over and invalid sharing
3318 on SUBREGs. */
3319 op0 = force_reg (mode, op0);
3321 /* ACCUM starts out either as OP0 or as a zero, depending on
3322 the first operation. */
3324 if (alg->op[0] == alg_zero)
3326 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3327 val_so_far = 0;
3329 else if (alg->op[0] == alg_m)
3331 accum = copy_to_mode_reg (mode, op0);
3332 val_so_far = 1;
3334 else
3335 gcc_unreachable ();
3337 for (opno = 1; opno < alg->ops; opno++)
3339 int log = alg->log[opno];
3340 rtx shift_subtarget = optimize ? 0 : accum;
3341 rtx add_target
3342 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3343 && !optimize)
3344 ? target : 0;
3345 rtx accum_target = optimize ? 0 : accum;
3346 rtx accum_inner;
3348 switch (alg->op[opno])
3350 case alg_shift:
3351 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3352 /* REG_EQUAL note will be attached to the following insn. */
3353 emit_move_insn (accum, tem);
3354 val_so_far <<= log;
3355 break;
3357 case alg_add_t_m2:
3358 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3359 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3360 add_target ? add_target : accum_target);
3361 val_so_far += HOST_WIDE_INT_1U << log;
3362 break;
3364 case alg_sub_t_m2:
3365 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3366 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3367 add_target ? add_target : accum_target);
3368 val_so_far -= HOST_WIDE_INT_1U << log;
3369 break;
3371 case alg_add_t2_m:
3372 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3373 log, shift_subtarget, 0);
3374 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3375 add_target ? add_target : accum_target);
3376 val_so_far = (val_so_far << log) + 1;
3377 break;
3379 case alg_sub_t2_m:
3380 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3381 log, shift_subtarget, 0);
3382 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3383 add_target ? add_target : accum_target);
3384 val_so_far = (val_so_far << log) - 1;
3385 break;
3387 case alg_add_factor:
3388 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3389 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3390 add_target ? add_target : accum_target);
3391 val_so_far += val_so_far << log;
3392 break;
3394 case alg_sub_factor:
3395 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3396 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3397 (add_target
3398 ? add_target : (optimize ? 0 : tem)));
3399 val_so_far = (val_so_far << log) - val_so_far;
3400 break;
3402 default:
3403 gcc_unreachable ();
3406 if (SCALAR_INT_MODE_P (mode))
3408 /* Write a REG_EQUAL note on the last insn so that we can cse
3409 multiplication sequences. Note that if ACCUM is a SUBREG,
3410 we've set the inner register and must properly indicate that. */
3411 tem = op0, nmode = mode;
3412 accum_inner = accum;
3413 if (GET_CODE (accum) == SUBREG)
3415 accum_inner = SUBREG_REG (accum);
3416 nmode = GET_MODE (accum_inner);
3417 tem = gen_lowpart (nmode, op0);
3420 /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3421 In that case, only the low bits of accum would be guaranteed to
3422 be equal to the content of the REG_EQUAL note, the upper bits
3423 can be anything. */
3424 if (!paradoxical_subreg_p (tem))
3426 insn = get_last_insn ();
3427 wide_int wval_so_far
3428 = wi::uhwi (val_so_far,
3429 GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3430 rtx c = immed_wide_int_const (wval_so_far, nmode);
3431 set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3432 accum_inner);
3437 if (variant == negate_variant)
3439 val_so_far = -val_so_far;
3440 accum = expand_unop (mode, neg_optab, accum, target, 0);
3442 else if (variant == add_variant)
3444 val_so_far = val_so_far + 1;
3445 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3448 /* Compare only the bits of val and val_so_far that are significant
3449 in the result mode, to avoid sign-/zero-extension confusion. */
3450 nmode = GET_MODE_INNER (mode);
3451 val &= GET_MODE_MASK (nmode);
3452 val_so_far &= GET_MODE_MASK (nmode);
3453 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3455 return accum;
3458 /* Perform a multiplication and return an rtx for the result.
3459 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3460 TARGET is a suggestion for where to store the result (an rtx).
3462 We check specially for a constant integer as OP1.
3463 If you want this check for OP0 as well, then before calling
3464 you should swap the two operands if OP0 would be constant. */
3467 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3468 int unsignedp, bool no_libcall)
3470 enum mult_variant variant;
3471 struct algorithm algorithm;
3472 rtx scalar_op1;
3473 int max_cost;
3474 bool speed = optimize_insn_for_speed_p ();
3475 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3477 if (CONSTANT_P (op0))
3478 std::swap (op0, op1);
3480 /* For vectors, there are several simplifications that can be made if
3481 all elements of the vector constant are identical. */
3482 scalar_op1 = unwrap_const_vec_duplicate (op1);
3484 if (INTEGRAL_MODE_P (mode))
3486 rtx fake_reg;
3487 HOST_WIDE_INT coeff;
3488 bool is_neg;
3489 int mode_bitsize;
3491 if (op1 == CONST0_RTX (mode))
3492 return op1;
3493 if (op1 == CONST1_RTX (mode))
3494 return op0;
3495 if (op1 == CONSTM1_RTX (mode))
3496 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3497 op0, target, 0);
3499 if (do_trapv)
3500 goto skip_synth;
3502 /* If mode is integer vector mode, check if the backend supports
3503 vector lshift (by scalar or vector) at all. If not, we can't use
3504 synthetized multiply. */
3505 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3506 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3507 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3508 goto skip_synth;
3510 /* These are the operations that are potentially turned into
3511 a sequence of shifts and additions. */
3512 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3514 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3515 less than or equal in size to `unsigned int' this doesn't matter.
3516 If the mode is larger than `unsigned int', then synth_mult works
3517 only if the constant value exactly fits in an `unsigned int' without
3518 any truncation. This means that multiplying by negative values does
3519 not work; results are off by 2^32 on a 32 bit machine. */
3520 if (CONST_INT_P (scalar_op1))
3522 coeff = INTVAL (scalar_op1);
3523 is_neg = coeff < 0;
3525 #if TARGET_SUPPORTS_WIDE_INT
3526 else if (CONST_WIDE_INT_P (scalar_op1))
3527 #else
3528 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3529 #endif
3531 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3532 /* Perfect power of 2 (other than 1, which is handled above). */
3533 if (shift > 0)
3534 return expand_shift (LSHIFT_EXPR, mode, op0,
3535 shift, target, unsignedp);
3536 else
3537 goto skip_synth;
3539 else
3540 goto skip_synth;
3542 /* We used to test optimize here, on the grounds that it's better to
3543 produce a smaller program when -O is not used. But this causes
3544 such a terrible slowdown sometimes that it seems better to always
3545 use synth_mult. */
3547 /* Special case powers of two. */
3548 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3549 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3550 return expand_shift (LSHIFT_EXPR, mode, op0,
3551 floor_log2 (coeff), target, unsignedp);
3553 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3555 /* Attempt to handle multiplication of DImode values by negative
3556 coefficients, by performing the multiplication by a positive
3557 multiplier and then inverting the result. */
3558 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3560 /* Its safe to use -coeff even for INT_MIN, as the
3561 result is interpreted as an unsigned coefficient.
3562 Exclude cost of op0 from max_cost to match the cost
3563 calculation of the synth_mult. */
3564 coeff = -(unsigned HOST_WIDE_INT) coeff;
3565 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3566 mode, speed)
3567 - neg_cost (speed, mode));
3568 if (max_cost <= 0)
3569 goto skip_synth;
3571 /* Special case powers of two. */
3572 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3574 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3575 floor_log2 (coeff), target, unsignedp);
3576 return expand_unop (mode, neg_optab, temp, target, 0);
3579 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3580 max_cost))
3582 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3583 &algorithm, variant);
3584 return expand_unop (mode, neg_optab, temp, target, 0);
3586 goto skip_synth;
3589 /* Exclude cost of op0 from max_cost to match the cost
3590 calculation of the synth_mult. */
3591 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3592 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3593 return expand_mult_const (mode, op0, coeff, target,
3594 &algorithm, variant);
3596 skip_synth:
3598 /* Expand x*2.0 as x+x. */
3599 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3600 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3602 op0 = force_reg (GET_MODE (op0), op0);
3603 return expand_binop (mode, add_optab, op0, op0,
3604 target, unsignedp,
3605 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3608 /* This used to use umul_optab if unsigned, but for non-widening multiply
3609 there is no difference between signed and unsigned. */
3610 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3611 op0, op1, target, unsignedp,
3612 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3613 gcc_assert (op0 || no_libcall);
3614 return op0;
3617 /* Return a cost estimate for multiplying a register by the given
3618 COEFFicient in the given MODE and SPEED. */
3621 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3623 int max_cost;
3624 struct algorithm algorithm;
3625 enum mult_variant variant;
3627 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3628 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3629 mode, speed);
3630 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3631 return algorithm.cost.cost;
3632 else
3633 return max_cost;
3636 /* Perform a widening multiplication and return an rtx for the result.
3637 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3638 TARGET is a suggestion for where to store the result (an rtx).
3639 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3640 or smul_widen_optab.
3642 We check specially for a constant integer as OP1, comparing the
3643 cost of a widening multiply against the cost of a sequence of shifts
3644 and adds. */
3647 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3648 int unsignedp, optab this_optab)
3650 bool speed = optimize_insn_for_speed_p ();
3651 rtx cop1;
3653 if (CONST_INT_P (op1)
3654 && GET_MODE (op0) != VOIDmode
3655 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3656 this_optab == umul_widen_optab))
3657 && CONST_INT_P (cop1)
3658 && (INTVAL (cop1) >= 0
3659 || HWI_COMPUTABLE_MODE_P (mode)))
3661 HOST_WIDE_INT coeff = INTVAL (cop1);
3662 int max_cost;
3663 enum mult_variant variant;
3664 struct algorithm algorithm;
3666 if (coeff == 0)
3667 return CONST0_RTX (mode);
3669 /* Special case powers of two. */
3670 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3672 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3673 return expand_shift (LSHIFT_EXPR, mode, op0,
3674 floor_log2 (coeff), target, unsignedp);
3677 /* Exclude cost of op0 from max_cost to match the cost
3678 calculation of the synth_mult. */
3679 max_cost = mul_widen_cost (speed, mode);
3680 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3681 max_cost))
3683 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3684 return expand_mult_const (mode, op0, coeff, target,
3685 &algorithm, variant);
3688 return expand_binop (mode, this_optab, op0, op1, target,
3689 unsignedp, OPTAB_LIB_WIDEN);
3692 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3693 replace division by D, and put the least significant N bits of the result
3694 in *MULTIPLIER_PTR and return the most significant bit.
3696 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3697 needed precision is in PRECISION (should be <= N).
3699 PRECISION should be as small as possible so this function can choose
3700 multiplier more freely.
3702 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3703 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3705 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3706 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3708 unsigned HOST_WIDE_INT
3709 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3710 unsigned HOST_WIDE_INT *multiplier_ptr,
3711 int *post_shift_ptr, int *lgup_ptr)
3713 int lgup, post_shift;
3714 int pow, pow2;
3716 /* lgup = ceil(log2(divisor)); */
3717 lgup = ceil_log2 (d);
3719 gcc_assert (lgup <= n);
3721 pow = n + lgup;
3722 pow2 = n + lgup - precision;
3724 /* mlow = 2^(N + lgup)/d */
3725 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3726 wide_int mlow = wi::udiv_trunc (val, d);
3728 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3729 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3730 wide_int mhigh = wi::udiv_trunc (val, d);
3732 /* If precision == N, then mlow, mhigh exceed 2^N
3733 (but they do not exceed 2^(N+1)). */
3735 /* Reduce to lowest terms. */
3736 for (post_shift = lgup; post_shift > 0; post_shift--)
3738 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3739 HOST_BITS_PER_WIDE_INT);
3740 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3741 HOST_BITS_PER_WIDE_INT);
3742 if (ml_lo >= mh_lo)
3743 break;
3745 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3746 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3749 *post_shift_ptr = post_shift;
3750 *lgup_ptr = lgup;
3751 if (n < HOST_BITS_PER_WIDE_INT)
3753 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3754 *multiplier_ptr = mhigh.to_uhwi () & mask;
3755 return mhigh.to_uhwi () > mask;
3757 else
3759 *multiplier_ptr = mhigh.to_uhwi ();
3760 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3764 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3765 congruent to 1 (mod 2**N). */
3767 static unsigned HOST_WIDE_INT
3768 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3770 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3772 /* The algorithm notes that the choice y = x satisfies
3773 x*y == 1 mod 2^3, since x is assumed odd.
3774 Each iteration doubles the number of bits of significance in y. */
3776 unsigned HOST_WIDE_INT mask;
3777 unsigned HOST_WIDE_INT y = x;
3778 int nbit = 3;
3780 mask = (n == HOST_BITS_PER_WIDE_INT
3781 ? HOST_WIDE_INT_M1U
3782 : (HOST_WIDE_INT_1U << n) - 1);
3784 while (nbit < n)
3786 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3787 nbit *= 2;
3789 return y;
3792 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3793 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3794 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3795 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3796 become signed.
3798 The result is put in TARGET if that is convenient.
3800 MODE is the mode of operation. */
3803 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3804 rtx op1, rtx target, int unsignedp)
3806 rtx tem;
3807 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3809 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3810 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3811 tem = expand_and (mode, tem, op1, NULL_RTX);
3812 adj_operand
3813 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3814 adj_operand);
3816 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3817 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3818 tem = expand_and (mode, tem, op0, NULL_RTX);
3819 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3820 target);
3822 return target;
3825 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3827 static rtx
3828 extract_high_half (scalar_int_mode mode, rtx op)
3830 if (mode == word_mode)
3831 return gen_highpart (mode, op);
3833 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3835 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3836 GET_MODE_BITSIZE (mode), 0, 1);
3837 return convert_modes (mode, wider_mode, op, 0);
3840 /* Like expmed_mult_highpart, but only consider using a multiplication
3841 optab. OP1 is an rtx for the constant operand. */
3843 static rtx
3844 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3845 rtx target, int unsignedp, int max_cost)
3847 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3848 optab moptab;
3849 rtx tem;
3850 int size;
3851 bool speed = optimize_insn_for_speed_p ();
3853 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3855 size = GET_MODE_BITSIZE (mode);
3857 /* Firstly, try using a multiplication insn that only generates the needed
3858 high part of the product, and in the sign flavor of unsignedp. */
3859 if (mul_highpart_cost (speed, mode) < max_cost)
3861 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3862 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3863 unsignedp, OPTAB_DIRECT);
3864 if (tem)
3865 return tem;
3868 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3869 Need to adjust the result after the multiplication. */
3870 if (size - 1 < BITS_PER_WORD
3871 && (mul_highpart_cost (speed, mode)
3872 + 2 * shift_cost (speed, mode, size-1)
3873 + 4 * add_cost (speed, mode) < max_cost))
3875 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3876 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3877 unsignedp, OPTAB_DIRECT);
3878 if (tem)
3879 /* We used the wrong signedness. Adjust the result. */
3880 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3881 tem, unsignedp);
3884 /* Try widening multiplication. */
3885 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3886 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3887 && mul_widen_cost (speed, wider_mode) < max_cost)
3889 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3890 unsignedp, OPTAB_WIDEN);
3891 if (tem)
3892 return extract_high_half (mode, tem);
3895 /* Try widening the mode and perform a non-widening multiplication. */
3896 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3897 && size - 1 < BITS_PER_WORD
3898 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3899 < max_cost))
3901 rtx_insn *insns;
3902 rtx wop0, wop1;
3904 /* We need to widen the operands, for example to ensure the
3905 constant multiplier is correctly sign or zero extended.
3906 Use a sequence to clean-up any instructions emitted by
3907 the conversions if things don't work out. */
3908 start_sequence ();
3909 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3910 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3911 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3912 unsignedp, OPTAB_WIDEN);
3913 insns = get_insns ();
3914 end_sequence ();
3916 if (tem)
3918 emit_insn (insns);
3919 return extract_high_half (mode, tem);
3923 /* Try widening multiplication of opposite signedness, and adjust. */
3924 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3925 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3926 && size - 1 < BITS_PER_WORD
3927 && (mul_widen_cost (speed, wider_mode)
3928 + 2 * shift_cost (speed, mode, size-1)
3929 + 4 * add_cost (speed, mode) < max_cost))
3931 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3932 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3933 if (tem != 0)
3935 tem = extract_high_half (mode, tem);
3936 /* We used the wrong signedness. Adjust the result. */
3937 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3938 target, unsignedp);
3942 return 0;
3945 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3946 putting the high half of the result in TARGET if that is convenient,
3947 and return where the result is. If the operation cannot be performed,
3948 0 is returned.
3950 MODE is the mode of operation and result.
3952 UNSIGNEDP nonzero means unsigned multiply.
3954 MAX_COST is the total allowed cost for the expanded RTL. */
3956 static rtx
3957 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3958 rtx target, int unsignedp, int max_cost)
3960 unsigned HOST_WIDE_INT cnst1;
3961 int extra_cost;
3962 bool sign_adjust = false;
3963 enum mult_variant variant;
3964 struct algorithm alg;
3965 rtx tem;
3966 bool speed = optimize_insn_for_speed_p ();
3968 /* We can't support modes wider than HOST_BITS_PER_INT. */
3969 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3971 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3973 /* We can't optimize modes wider than BITS_PER_WORD.
3974 ??? We might be able to perform double-word arithmetic if
3975 mode == word_mode, however all the cost calculations in
3976 synth_mult etc. assume single-word operations. */
3977 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3978 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3979 return expmed_mult_highpart_optab (mode, op0, op1, target,
3980 unsignedp, max_cost);
3982 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3984 /* Check whether we try to multiply by a negative constant. */
3985 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3987 sign_adjust = true;
3988 extra_cost += add_cost (speed, mode);
3991 /* See whether shift/add multiplication is cheap enough. */
3992 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3993 max_cost - extra_cost))
3995 /* See whether the specialized multiplication optabs are
3996 cheaper than the shift/add version. */
3997 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3998 alg.cost.cost + extra_cost);
3999 if (tem)
4000 return tem;
4002 tem = convert_to_mode (wider_mode, op0, unsignedp);
4003 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
4004 tem = extract_high_half (mode, tem);
4006 /* Adjust result for signedness. */
4007 if (sign_adjust)
4008 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
4010 return tem;
4012 return expmed_mult_highpart_optab (mode, op0, op1, target,
4013 unsignedp, max_cost);
4017 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
4019 static rtx
4020 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4022 rtx result, temp, shift;
4023 rtx_code_label *label;
4024 int logd;
4025 int prec = GET_MODE_PRECISION (mode);
4027 logd = floor_log2 (d);
4028 result = gen_reg_rtx (mode);
4030 /* Avoid conditional branches when they're expensive. */
4031 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4032 && optimize_insn_for_speed_p ())
4034 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4035 mode, 0, -1);
4036 if (signmask)
4038 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4039 signmask = force_reg (mode, signmask);
4040 shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4042 /* Use the rtx_cost of a LSHIFTRT instruction to determine
4043 which instruction sequence to use. If logical right shifts
4044 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4045 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
4047 temp = gen_rtx_LSHIFTRT (mode, result, shift);
4048 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4049 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4050 > COSTS_N_INSNS (2)))
4052 temp = expand_binop (mode, xor_optab, op0, signmask,
4053 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4054 temp = expand_binop (mode, sub_optab, temp, signmask,
4055 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4056 temp = expand_binop (mode, and_optab, temp,
4057 gen_int_mode (masklow, mode),
4058 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4059 temp = expand_binop (mode, xor_optab, temp, signmask,
4060 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4061 temp = expand_binop (mode, sub_optab, temp, signmask,
4062 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4064 else
4066 signmask = expand_binop (mode, lshr_optab, signmask, shift,
4067 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4068 signmask = force_reg (mode, signmask);
4070 temp = expand_binop (mode, add_optab, op0, signmask,
4071 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4072 temp = expand_binop (mode, and_optab, temp,
4073 gen_int_mode (masklow, mode),
4074 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4075 temp = expand_binop (mode, sub_optab, temp, signmask,
4076 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4078 return temp;
4082 /* Mask contains the mode's signbit and the significant bits of the
4083 modulus. By including the signbit in the operation, many targets
4084 can avoid an explicit compare operation in the following comparison
4085 against zero. */
4086 wide_int mask = wi::mask (logd, false, prec);
4087 mask = wi::set_bit (mask, prec - 1);
4089 temp = expand_binop (mode, and_optab, op0,
4090 immed_wide_int_const (mask, mode),
4091 result, 1, OPTAB_LIB_WIDEN);
4092 if (temp != result)
4093 emit_move_insn (result, temp);
4095 label = gen_label_rtx ();
4096 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4098 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4099 0, OPTAB_LIB_WIDEN);
4101 mask = wi::mask (logd, true, prec);
4102 temp = expand_binop (mode, ior_optab, temp,
4103 immed_wide_int_const (mask, mode),
4104 result, 1, OPTAB_LIB_WIDEN);
4105 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4106 0, OPTAB_LIB_WIDEN);
4107 if (temp != result)
4108 emit_move_insn (result, temp);
4109 emit_label (label);
4110 return result;
4113 /* Expand signed division of OP0 by a power of two D in mode MODE.
4114 This routine is only called for positive values of D. */
4116 static rtx
4117 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4119 rtx temp;
4120 rtx_code_label *label;
4121 int logd;
4123 logd = floor_log2 (d);
4125 if (d == 2
4126 && BRANCH_COST (optimize_insn_for_speed_p (),
4127 false) >= 1)
4129 temp = gen_reg_rtx (mode);
4130 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4131 if (temp != NULL_RTX)
4133 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4134 0, OPTAB_LIB_WIDEN);
4135 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4139 if (HAVE_conditional_move
4140 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4142 rtx temp2;
4144 start_sequence ();
4145 temp2 = copy_to_mode_reg (mode, op0);
4146 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4147 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4148 temp = force_reg (mode, temp);
4150 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
4151 temp2 = emit_conditional_move (temp2, { LT, temp2, const0_rtx, mode },
4152 temp, temp2, mode, 0);
4153 if (temp2)
4155 rtx_insn *seq = get_insns ();
4156 end_sequence ();
4157 emit_insn (seq);
4158 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4160 end_sequence ();
4163 if (BRANCH_COST (optimize_insn_for_speed_p (),
4164 false) >= 2)
4166 int ushift = GET_MODE_BITSIZE (mode) - logd;
4168 temp = gen_reg_rtx (mode);
4169 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4170 if (temp != NULL_RTX)
4172 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4173 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4174 > COSTS_N_INSNS (1))
4175 temp = expand_binop (mode, and_optab, temp,
4176 gen_int_mode (d - 1, mode),
4177 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4178 else
4179 temp = expand_shift (RSHIFT_EXPR, mode, temp,
4180 ushift, NULL_RTX, 1);
4181 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4182 0, OPTAB_LIB_WIDEN);
4183 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4187 label = gen_label_rtx ();
4188 temp = copy_to_mode_reg (mode, op0);
4189 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4190 expand_inc (temp, gen_int_mode (d - 1, mode));
4191 emit_label (label);
4192 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4195 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4196 if that is convenient, and returning where the result is.
4197 You may request either the quotient or the remainder as the result;
4198 specify REM_FLAG nonzero to get the remainder.
4200 CODE is the expression code for which kind of division this is;
4201 it controls how rounding is done. MODE is the machine mode to use.
4202 UNSIGNEDP nonzero means do unsigned division. */
4204 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4205 and then correct it by or'ing in missing high bits
4206 if result of ANDI is nonzero.
4207 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4208 This could optimize to a bfexts instruction.
4209 But C doesn't use these operations, so their optimizations are
4210 left for later. */
4211 /* ??? For modulo, we don't actually need the highpart of the first product,
4212 the low part will do nicely. And for small divisors, the second multiply
4213 can also be a low-part only multiply or even be completely left out.
4214 E.g. to calculate the remainder of a division by 3 with a 32 bit
4215 multiply, multiply with 0x55555556 and extract the upper two bits;
4216 the result is exact for inputs up to 0x1fffffff.
4217 The input range can be reduced by using cross-sum rules.
4218 For odd divisors >= 3, the following table gives right shift counts
4219 so that if a number is shifted by an integer multiple of the given
4220 amount, the remainder stays the same:
4221 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4222 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4223 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4224 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4225 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4227 Cross-sum rules for even numbers can be derived by leaving as many bits
4228 to the right alone as the divisor has zeros to the right.
4229 E.g. if x is an unsigned 32 bit number:
4230 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4234 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4235 rtx op0, rtx op1, rtx target, int unsignedp,
4236 enum optab_methods methods)
4238 machine_mode compute_mode;
4239 rtx tquotient;
4240 rtx quotient = 0, remainder = 0;
4241 rtx_insn *last;
4242 rtx_insn *insn;
4243 optab optab1, optab2;
4244 int op1_is_constant, op1_is_pow2 = 0;
4245 int max_cost, extra_cost;
4246 static HOST_WIDE_INT last_div_const = 0;
4247 bool speed = optimize_insn_for_speed_p ();
4249 op1_is_constant = CONST_INT_P (op1);
4250 if (op1_is_constant)
4252 wide_int ext_op1 = rtx_mode_t (op1, mode);
4253 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4254 || (! unsignedp
4255 && wi::popcount (wi::neg (ext_op1)) == 1));
4259 This is the structure of expand_divmod:
4261 First comes code to fix up the operands so we can perform the operations
4262 correctly and efficiently.
4264 Second comes a switch statement with code specific for each rounding mode.
4265 For some special operands this code emits all RTL for the desired
4266 operation, for other cases, it generates only a quotient and stores it in
4267 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4268 to indicate that it has not done anything.
4270 Last comes code that finishes the operation. If QUOTIENT is set and
4271 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4272 QUOTIENT is not set, it is computed using trunc rounding.
4274 We try to generate special code for division and remainder when OP1 is a
4275 constant. If |OP1| = 2**n we can use shifts and some other fast
4276 operations. For other values of OP1, we compute a carefully selected
4277 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4278 by m.
4280 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4281 half of the product. Different strategies for generating the product are
4282 implemented in expmed_mult_highpart.
4284 If what we actually want is the remainder, we generate that by another
4285 by-constant multiplication and a subtraction. */
4287 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4288 code below will malfunction if we are, so check here and handle
4289 the special case if so. */
4290 if (op1 == const1_rtx)
4291 return rem_flag ? const0_rtx : op0;
4293 /* When dividing by -1, we could get an overflow.
4294 negv_optab can handle overflows. */
4295 if (! unsignedp && op1 == constm1_rtx)
4297 if (rem_flag)
4298 return const0_rtx;
4299 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4300 ? negv_optab : neg_optab, op0, target, 0);
4303 if (target
4304 /* Don't use the function value register as a target
4305 since we have to read it as well as write it,
4306 and function-inlining gets confused by this. */
4307 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4308 /* Don't clobber an operand while doing a multi-step calculation. */
4309 || ((rem_flag || op1_is_constant)
4310 && (reg_mentioned_p (target, op0)
4311 || (MEM_P (op0) && MEM_P (target))))
4312 || reg_mentioned_p (target, op1)
4313 || (MEM_P (op1) && MEM_P (target))))
4314 target = 0;
4316 /* Get the mode in which to perform this computation. Normally it will
4317 be MODE, but sometimes we can't do the desired operation in MODE.
4318 If so, pick a wider mode in which we can do the operation. Convert
4319 to that mode at the start to avoid repeated conversions.
4321 First see what operations we need. These depend on the expression
4322 we are evaluating. (We assume that divxx3 insns exist under the
4323 same conditions that modxx3 insns and that these insns don't normally
4324 fail. If these assumptions are not correct, we may generate less
4325 efficient code in some cases.)
4327 Then see if we find a mode in which we can open-code that operation
4328 (either a division, modulus, or shift). Finally, check for the smallest
4329 mode for which we can do the operation with a library call. */
4331 /* We might want to refine this now that we have division-by-constant
4332 optimization. Since expmed_mult_highpart tries so many variants, it is
4333 not straightforward to generalize this. Maybe we should make an array
4334 of possible modes in init_expmed? Save this for GCC 2.7. */
4336 optab1 = (op1_is_pow2
4337 ? (unsignedp ? lshr_optab : ashr_optab)
4338 : (unsignedp ? udiv_optab : sdiv_optab));
4339 optab2 = (op1_is_pow2 ? optab1
4340 : (unsignedp ? udivmod_optab : sdivmod_optab));
4342 if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4344 FOR_EACH_MODE_FROM (compute_mode, mode)
4345 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4346 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4347 break;
4349 if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4350 FOR_EACH_MODE_FROM (compute_mode, mode)
4351 if (optab_libfunc (optab1, compute_mode)
4352 || optab_libfunc (optab2, compute_mode))
4353 break;
4355 else
4356 compute_mode = mode;
4358 /* If we still couldn't find a mode, use MODE, but expand_binop will
4359 probably die. */
4360 if (compute_mode == VOIDmode)
4361 compute_mode = mode;
4363 if (target && GET_MODE (target) == compute_mode)
4364 tquotient = target;
4365 else
4366 tquotient = gen_reg_rtx (compute_mode);
4368 #if 0
4369 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4370 (mode), and thereby get better code when OP1 is a constant. Do that
4371 later. It will require going over all usages of SIZE below. */
4372 size = GET_MODE_BITSIZE (mode);
4373 #endif
4375 /* Only deduct something for a REM if the last divide done was
4376 for a different constant. Then set the constant of the last
4377 divide. */
4378 max_cost = (unsignedp
4379 ? udiv_cost (speed, compute_mode)
4380 : sdiv_cost (speed, compute_mode));
4381 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4382 && INTVAL (op1) == last_div_const))
4383 max_cost -= (mul_cost (speed, compute_mode)
4384 + add_cost (speed, compute_mode));
4386 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4388 /* Now convert to the best mode to use. */
4389 if (compute_mode != mode)
4391 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4392 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4394 /* convert_modes may have placed op1 into a register, so we
4395 must recompute the following. */
4396 op1_is_constant = CONST_INT_P (op1);
4397 if (op1_is_constant)
4399 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4400 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4401 || (! unsignedp
4402 && wi::popcount (wi::neg (ext_op1)) == 1));
4404 else
4405 op1_is_pow2 = 0;
4408 /* If one of the operands is a volatile MEM, copy it into a register. */
4410 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4411 op0 = force_reg (compute_mode, op0);
4412 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4413 op1 = force_reg (compute_mode, op1);
4415 /* If we need the remainder or if OP1 is constant, we need to
4416 put OP0 in a register in case it has any queued subexpressions. */
4417 if (rem_flag || op1_is_constant)
4418 op0 = force_reg (compute_mode, op0);
4420 last = get_last_insn ();
4422 /* Promote floor rounding to trunc rounding for unsigned operations. */
4423 if (unsignedp)
4425 if (code == FLOOR_DIV_EXPR)
4426 code = TRUNC_DIV_EXPR;
4427 if (code == FLOOR_MOD_EXPR)
4428 code = TRUNC_MOD_EXPR;
4429 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4430 code = TRUNC_DIV_EXPR;
4433 if (op1 != const0_rtx)
4434 switch (code)
4436 case TRUNC_MOD_EXPR:
4437 case TRUNC_DIV_EXPR:
4438 if (op1_is_constant)
4440 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4441 int size = GET_MODE_BITSIZE (int_mode);
4442 if (unsignedp)
4444 unsigned HOST_WIDE_INT mh, ml;
4445 int pre_shift, post_shift;
4446 int dummy;
4447 wide_int wd = rtx_mode_t (op1, int_mode);
4448 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4450 if (wi::popcount (wd) == 1)
4452 pre_shift = floor_log2 (d);
4453 if (rem_flag)
4455 unsigned HOST_WIDE_INT mask
4456 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4457 remainder
4458 = expand_binop (int_mode, and_optab, op0,
4459 gen_int_mode (mask, int_mode),
4460 remainder, 1, methods);
4461 if (remainder)
4462 return gen_lowpart (mode, remainder);
4464 quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4465 pre_shift, tquotient, 1);
4467 else if (size <= HOST_BITS_PER_WIDE_INT)
4469 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4471 /* Most significant bit of divisor is set; emit an scc
4472 insn. */
4473 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4474 int_mode, 1, 1);
4476 else
4478 /* Find a suitable multiplier and right shift count
4479 instead of multiplying with D. */
4481 mh = choose_multiplier (d, size, size,
4482 &ml, &post_shift, &dummy);
4484 /* If the suggested multiplier is more than SIZE bits,
4485 we can do better for even divisors, using an
4486 initial right shift. */
4487 if (mh != 0 && (d & 1) == 0)
4489 pre_shift = ctz_or_zero (d);
4490 mh = choose_multiplier (d >> pre_shift, size,
4491 size - pre_shift,
4492 &ml, &post_shift, &dummy);
4493 gcc_assert (!mh);
4495 else
4496 pre_shift = 0;
4498 if (mh != 0)
4500 rtx t1, t2, t3, t4;
4502 if (post_shift - 1 >= BITS_PER_WORD)
4503 goto fail1;
4505 extra_cost
4506 = (shift_cost (speed, int_mode, post_shift - 1)
4507 + shift_cost (speed, int_mode, 1)
4508 + 2 * add_cost (speed, int_mode));
4509 t1 = expmed_mult_highpart
4510 (int_mode, op0, gen_int_mode (ml, int_mode),
4511 NULL_RTX, 1, max_cost - extra_cost);
4512 if (t1 == 0)
4513 goto fail1;
4514 t2 = force_operand (gen_rtx_MINUS (int_mode,
4515 op0, t1),
4516 NULL_RTX);
4517 t3 = expand_shift (RSHIFT_EXPR, int_mode,
4518 t2, 1, NULL_RTX, 1);
4519 t4 = force_operand (gen_rtx_PLUS (int_mode,
4520 t1, t3),
4521 NULL_RTX);
4522 quotient = expand_shift
4523 (RSHIFT_EXPR, int_mode, t4,
4524 post_shift - 1, tquotient, 1);
4526 else
4528 rtx t1, t2;
4530 if (pre_shift >= BITS_PER_WORD
4531 || post_shift >= BITS_PER_WORD)
4532 goto fail1;
4534 t1 = expand_shift
4535 (RSHIFT_EXPR, int_mode, op0,
4536 pre_shift, NULL_RTX, 1);
4537 extra_cost
4538 = (shift_cost (speed, int_mode, pre_shift)
4539 + shift_cost (speed, int_mode, post_shift));
4540 t2 = expmed_mult_highpart
4541 (int_mode, t1,
4542 gen_int_mode (ml, int_mode),
4543 NULL_RTX, 1, max_cost - extra_cost);
4544 if (t2 == 0)
4545 goto fail1;
4546 quotient = expand_shift
4547 (RSHIFT_EXPR, int_mode, t2,
4548 post_shift, tquotient, 1);
4552 else /* Too wide mode to use tricky code */
4553 break;
4555 insn = get_last_insn ();
4556 if (insn != last)
4557 set_dst_reg_note (insn, REG_EQUAL,
4558 gen_rtx_UDIV (int_mode, op0, op1),
4559 quotient);
4561 else /* TRUNC_DIV, signed */
4563 unsigned HOST_WIDE_INT ml;
4564 int lgup, post_shift;
4565 rtx mlr;
4566 HOST_WIDE_INT d = INTVAL (op1);
4567 unsigned HOST_WIDE_INT abs_d;
4569 /* Not prepared to handle division/remainder by
4570 0xffffffffffffffff8000000000000000 etc. */
4571 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4572 break;
4574 /* Since d might be INT_MIN, we have to cast to
4575 unsigned HOST_WIDE_INT before negating to avoid
4576 undefined signed overflow. */
4577 abs_d = (d >= 0
4578 ? (unsigned HOST_WIDE_INT) d
4579 : - (unsigned HOST_WIDE_INT) d);
4581 /* n rem d = n rem -d */
4582 if (rem_flag && d < 0)
4584 d = abs_d;
4585 op1 = gen_int_mode (abs_d, int_mode);
4588 if (d == 1)
4589 quotient = op0;
4590 else if (d == -1)
4591 quotient = expand_unop (int_mode, neg_optab, op0,
4592 tquotient, 0);
4593 else if (size <= HOST_BITS_PER_WIDE_INT
4594 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4596 /* This case is not handled correctly below. */
4597 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4598 int_mode, 1, 1);
4599 if (quotient == 0)
4600 goto fail1;
4602 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4603 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4604 && (rem_flag
4605 ? smod_pow2_cheap (speed, int_mode)
4606 : sdiv_pow2_cheap (speed, int_mode))
4607 /* We assume that cheap metric is true if the
4608 optab has an expander for this mode. */
4609 && ((optab_handler ((rem_flag ? smod_optab
4610 : sdiv_optab),
4611 int_mode)
4612 != CODE_FOR_nothing)
4613 || (optab_handler (sdivmod_optab, int_mode)
4614 != CODE_FOR_nothing)))
4616 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4618 if (rem_flag)
4620 remainder = expand_smod_pow2 (int_mode, op0, d);
4621 if (remainder)
4622 return gen_lowpart (mode, remainder);
4625 if (sdiv_pow2_cheap (speed, int_mode)
4626 && ((optab_handler (sdiv_optab, int_mode)
4627 != CODE_FOR_nothing)
4628 || (optab_handler (sdivmod_optab, int_mode)
4629 != CODE_FOR_nothing)))
4630 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4631 int_mode, op0,
4632 gen_int_mode (abs_d,
4633 int_mode),
4634 NULL_RTX, 0);
4635 else
4636 quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4638 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4639 negate the quotient. */
4640 if (d < 0)
4642 insn = get_last_insn ();
4643 if (insn != last
4644 && abs_d < (HOST_WIDE_INT_1U
4645 << (HOST_BITS_PER_WIDE_INT - 1)))
4646 set_dst_reg_note (insn, REG_EQUAL,
4647 gen_rtx_DIV (int_mode, op0,
4648 gen_int_mode
4649 (abs_d,
4650 int_mode)),
4651 quotient);
4653 quotient = expand_unop (int_mode, neg_optab,
4654 quotient, quotient, 0);
4657 else if (size <= HOST_BITS_PER_WIDE_INT)
4659 choose_multiplier (abs_d, size, size - 1,
4660 &ml, &post_shift, &lgup);
4661 if (ml < HOST_WIDE_INT_1U << (size - 1))
4663 rtx t1, t2, t3;
4665 if (post_shift >= BITS_PER_WORD
4666 || size - 1 >= BITS_PER_WORD)
4667 goto fail1;
4669 extra_cost = (shift_cost (speed, int_mode, post_shift)
4670 + shift_cost (speed, int_mode, size - 1)
4671 + add_cost (speed, int_mode));
4672 t1 = expmed_mult_highpart
4673 (int_mode, op0, gen_int_mode (ml, int_mode),
4674 NULL_RTX, 0, max_cost - extra_cost);
4675 if (t1 == 0)
4676 goto fail1;
4677 t2 = expand_shift
4678 (RSHIFT_EXPR, int_mode, t1,
4679 post_shift, NULL_RTX, 0);
4680 t3 = expand_shift
4681 (RSHIFT_EXPR, int_mode, op0,
4682 size - 1, NULL_RTX, 0);
4683 if (d < 0)
4684 quotient
4685 = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4686 tquotient);
4687 else
4688 quotient
4689 = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4690 tquotient);
4692 else
4694 rtx t1, t2, t3, t4;
4696 if (post_shift >= BITS_PER_WORD
4697 || size - 1 >= BITS_PER_WORD)
4698 goto fail1;
4700 ml |= HOST_WIDE_INT_M1U << (size - 1);
4701 mlr = gen_int_mode (ml, int_mode);
4702 extra_cost = (shift_cost (speed, int_mode, post_shift)
4703 + shift_cost (speed, int_mode, size - 1)
4704 + 2 * add_cost (speed, int_mode));
4705 t1 = expmed_mult_highpart (int_mode, op0, mlr,
4706 NULL_RTX, 0,
4707 max_cost - extra_cost);
4708 if (t1 == 0)
4709 goto fail1;
4710 t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4711 NULL_RTX);
4712 t3 = expand_shift
4713 (RSHIFT_EXPR, int_mode, t2,
4714 post_shift, NULL_RTX, 0);
4715 t4 = expand_shift
4716 (RSHIFT_EXPR, int_mode, op0,
4717 size - 1, NULL_RTX, 0);
4718 if (d < 0)
4719 quotient
4720 = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4721 tquotient);
4722 else
4723 quotient
4724 = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4725 tquotient);
4728 else /* Too wide mode to use tricky code */
4729 break;
4731 insn = get_last_insn ();
4732 if (insn != last)
4733 set_dst_reg_note (insn, REG_EQUAL,
4734 gen_rtx_DIV (int_mode, op0, op1),
4735 quotient);
4737 break;
4739 fail1:
4740 delete_insns_since (last);
4741 break;
4743 case FLOOR_DIV_EXPR:
4744 case FLOOR_MOD_EXPR:
4745 /* We will come here only for signed operations. */
4746 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4748 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4749 int size = GET_MODE_BITSIZE (int_mode);
4750 unsigned HOST_WIDE_INT mh, ml;
4751 int pre_shift, lgup, post_shift;
4752 HOST_WIDE_INT d = INTVAL (op1);
4754 if (d > 0)
4756 /* We could just as easily deal with negative constants here,
4757 but it does not seem worth the trouble for GCC 2.6. */
4758 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4760 pre_shift = floor_log2 (d);
4761 if (rem_flag)
4763 unsigned HOST_WIDE_INT mask
4764 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4765 remainder = expand_binop
4766 (int_mode, and_optab, op0,
4767 gen_int_mode (mask, int_mode),
4768 remainder, 0, methods);
4769 if (remainder)
4770 return gen_lowpart (mode, remainder);
4772 quotient = expand_shift
4773 (RSHIFT_EXPR, int_mode, op0,
4774 pre_shift, tquotient, 0);
4776 else
4778 rtx t1, t2, t3, t4;
4780 mh = choose_multiplier (d, size, size - 1,
4781 &ml, &post_shift, &lgup);
4782 gcc_assert (!mh);
4784 if (post_shift < BITS_PER_WORD
4785 && size - 1 < BITS_PER_WORD)
4787 t1 = expand_shift
4788 (RSHIFT_EXPR, int_mode, op0,
4789 size - 1, NULL_RTX, 0);
4790 t2 = expand_binop (int_mode, xor_optab, op0, t1,
4791 NULL_RTX, 0, OPTAB_WIDEN);
4792 extra_cost = (shift_cost (speed, int_mode, post_shift)
4793 + shift_cost (speed, int_mode, size - 1)
4794 + 2 * add_cost (speed, int_mode));
4795 t3 = expmed_mult_highpart
4796 (int_mode, t2, gen_int_mode (ml, int_mode),
4797 NULL_RTX, 1, max_cost - extra_cost);
4798 if (t3 != 0)
4800 t4 = expand_shift
4801 (RSHIFT_EXPR, int_mode, t3,
4802 post_shift, NULL_RTX, 1);
4803 quotient = expand_binop (int_mode, xor_optab,
4804 t4, t1, tquotient, 0,
4805 OPTAB_WIDEN);
4810 else
4812 rtx nsign, t1, t2, t3, t4;
4813 t1 = force_operand (gen_rtx_PLUS (int_mode,
4814 op0, constm1_rtx), NULL_RTX);
4815 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4816 0, OPTAB_WIDEN);
4817 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4818 size - 1, NULL_RTX, 0);
4819 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4820 NULL_RTX);
4821 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4822 NULL_RTX, 0);
4823 if (t4)
4825 rtx t5;
4826 t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4827 NULL_RTX, 0);
4828 quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4829 tquotient);
4834 if (quotient != 0)
4835 break;
4836 delete_insns_since (last);
4838 /* Try using an instruction that produces both the quotient and
4839 remainder, using truncation. We can easily compensate the quotient
4840 or remainder to get floor rounding, once we have the remainder.
4841 Notice that we compute also the final remainder value here,
4842 and return the result right away. */
4843 if (target == 0 || GET_MODE (target) != compute_mode)
4844 target = gen_reg_rtx (compute_mode);
4846 if (rem_flag)
4848 remainder
4849 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4850 quotient = gen_reg_rtx (compute_mode);
4852 else
4854 quotient
4855 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4856 remainder = gen_reg_rtx (compute_mode);
4859 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4860 quotient, remainder, 0))
4862 /* This could be computed with a branch-less sequence.
4863 Save that for later. */
4864 rtx tem;
4865 rtx_code_label *label = gen_label_rtx ();
4866 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4867 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4868 NULL_RTX, 0, OPTAB_WIDEN);
4869 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4870 expand_dec (quotient, const1_rtx);
4871 expand_inc (remainder, op1);
4872 emit_label (label);
4873 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4876 /* No luck with division elimination or divmod. Have to do it
4877 by conditionally adjusting op0 *and* the result. */
4879 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4880 rtx adjusted_op0;
4881 rtx tem;
4883 quotient = gen_reg_rtx (compute_mode);
4884 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4885 label1 = gen_label_rtx ();
4886 label2 = gen_label_rtx ();
4887 label3 = gen_label_rtx ();
4888 label4 = gen_label_rtx ();
4889 label5 = gen_label_rtx ();
4890 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4891 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4892 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4893 quotient, 0, methods);
4894 if (tem != quotient)
4895 emit_move_insn (quotient, tem);
4896 emit_jump_insn (targetm.gen_jump (label5));
4897 emit_barrier ();
4898 emit_label (label1);
4899 expand_inc (adjusted_op0, const1_rtx);
4900 emit_jump_insn (targetm.gen_jump (label4));
4901 emit_barrier ();
4902 emit_label (label2);
4903 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4904 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4905 quotient, 0, methods);
4906 if (tem != quotient)
4907 emit_move_insn (quotient, tem);
4908 emit_jump_insn (targetm.gen_jump (label5));
4909 emit_barrier ();
4910 emit_label (label3);
4911 expand_dec (adjusted_op0, const1_rtx);
4912 emit_label (label4);
4913 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4914 quotient, 0, methods);
4915 if (tem != quotient)
4916 emit_move_insn (quotient, tem);
4917 expand_dec (quotient, const1_rtx);
4918 emit_label (label5);
4920 break;
4922 case CEIL_DIV_EXPR:
4923 case CEIL_MOD_EXPR:
4924 if (unsignedp)
4926 if (op1_is_constant
4927 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4928 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4929 || INTVAL (op1) >= 0))
4931 scalar_int_mode int_mode
4932 = as_a <scalar_int_mode> (compute_mode);
4933 rtx t1, t2, t3;
4934 unsigned HOST_WIDE_INT d = INTVAL (op1);
4935 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4936 floor_log2 (d), tquotient, 1);
4937 t2 = expand_binop (int_mode, and_optab, op0,
4938 gen_int_mode (d - 1, int_mode),
4939 NULL_RTX, 1, methods);
4940 t3 = gen_reg_rtx (int_mode);
4941 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4942 if (t3 == 0)
4944 rtx_code_label *lab;
4945 lab = gen_label_rtx ();
4946 do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4947 expand_inc (t1, const1_rtx);
4948 emit_label (lab);
4949 quotient = t1;
4951 else
4952 quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4953 tquotient);
4954 break;
4957 /* Try using an instruction that produces both the quotient and
4958 remainder, using truncation. We can easily compensate the
4959 quotient or remainder to get ceiling rounding, once we have the
4960 remainder. Notice that we compute also the final remainder
4961 value here, and return the result right away. */
4962 if (target == 0 || GET_MODE (target) != compute_mode)
4963 target = gen_reg_rtx (compute_mode);
4965 if (rem_flag)
4967 remainder = (REG_P (target)
4968 ? target : gen_reg_rtx (compute_mode));
4969 quotient = gen_reg_rtx (compute_mode);
4971 else
4973 quotient = (REG_P (target)
4974 ? target : gen_reg_rtx (compute_mode));
4975 remainder = gen_reg_rtx (compute_mode);
4978 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4979 remainder, 1))
4981 /* This could be computed with a branch-less sequence.
4982 Save that for later. */
4983 rtx_code_label *label = gen_label_rtx ();
4984 do_cmp_and_jump (remainder, const0_rtx, EQ,
4985 compute_mode, label);
4986 expand_inc (quotient, const1_rtx);
4987 expand_dec (remainder, op1);
4988 emit_label (label);
4989 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4992 /* No luck with division elimination or divmod. Have to do it
4993 by conditionally adjusting op0 *and* the result. */
4995 rtx_code_label *label1, *label2;
4996 rtx adjusted_op0, tem;
4998 quotient = gen_reg_rtx (compute_mode);
4999 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5000 label1 = gen_label_rtx ();
5001 label2 = gen_label_rtx ();
5002 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
5003 compute_mode, label1);
5004 emit_move_insn (quotient, const0_rtx);
5005 emit_jump_insn (targetm.gen_jump (label2));
5006 emit_barrier ();
5007 emit_label (label1);
5008 expand_dec (adjusted_op0, const1_rtx);
5009 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
5010 quotient, 1, methods);
5011 if (tem != quotient)
5012 emit_move_insn (quotient, tem);
5013 expand_inc (quotient, const1_rtx);
5014 emit_label (label2);
5017 else /* signed */
5019 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
5020 && INTVAL (op1) >= 0)
5022 /* This is extremely similar to the code for the unsigned case
5023 above. For 2.7 we should merge these variants, but for
5024 2.6.1 I don't want to touch the code for unsigned since that
5025 get used in C. The signed case will only be used by other
5026 languages (Ada). */
5028 rtx t1, t2, t3;
5029 unsigned HOST_WIDE_INT d = INTVAL (op1);
5030 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5031 floor_log2 (d), tquotient, 0);
5032 t2 = expand_binop (compute_mode, and_optab, op0,
5033 gen_int_mode (d - 1, compute_mode),
5034 NULL_RTX, 1, methods);
5035 t3 = gen_reg_rtx (compute_mode);
5036 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5037 compute_mode, 1, 1);
5038 if (t3 == 0)
5040 rtx_code_label *lab;
5041 lab = gen_label_rtx ();
5042 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5043 expand_inc (t1, const1_rtx);
5044 emit_label (lab);
5045 quotient = t1;
5047 else
5048 quotient = force_operand (gen_rtx_PLUS (compute_mode,
5049 t1, t3),
5050 tquotient);
5051 break;
5054 /* Try using an instruction that produces both the quotient and
5055 remainder, using truncation. We can easily compensate the
5056 quotient or remainder to get ceiling rounding, once we have the
5057 remainder. Notice that we compute also the final remainder
5058 value here, and return the result right away. */
5059 if (target == 0 || GET_MODE (target) != compute_mode)
5060 target = gen_reg_rtx (compute_mode);
5061 if (rem_flag)
5063 remainder= (REG_P (target)
5064 ? target : gen_reg_rtx (compute_mode));
5065 quotient = gen_reg_rtx (compute_mode);
5067 else
5069 quotient = (REG_P (target)
5070 ? target : gen_reg_rtx (compute_mode));
5071 remainder = gen_reg_rtx (compute_mode);
5074 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5075 remainder, 0))
5077 /* This could be computed with a branch-less sequence.
5078 Save that for later. */
5079 rtx tem;
5080 rtx_code_label *label = gen_label_rtx ();
5081 do_cmp_and_jump (remainder, const0_rtx, EQ,
5082 compute_mode, label);
5083 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5084 NULL_RTX, 0, OPTAB_WIDEN);
5085 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5086 expand_inc (quotient, const1_rtx);
5087 expand_dec (remainder, op1);
5088 emit_label (label);
5089 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5092 /* No luck with division elimination or divmod. Have to do it
5093 by conditionally adjusting op0 *and* the result. */
5095 rtx_code_label *label1, *label2, *label3, *label4, *label5;
5096 rtx adjusted_op0;
5097 rtx tem;
5099 quotient = gen_reg_rtx (compute_mode);
5100 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5101 label1 = gen_label_rtx ();
5102 label2 = gen_label_rtx ();
5103 label3 = gen_label_rtx ();
5104 label4 = gen_label_rtx ();
5105 label5 = gen_label_rtx ();
5106 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5107 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5108 compute_mode, label1);
5109 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5110 quotient, 0, methods);
5111 if (tem != quotient)
5112 emit_move_insn (quotient, tem);
5113 emit_jump_insn (targetm.gen_jump (label5));
5114 emit_barrier ();
5115 emit_label (label1);
5116 expand_dec (adjusted_op0, const1_rtx);
5117 emit_jump_insn (targetm.gen_jump (label4));
5118 emit_barrier ();
5119 emit_label (label2);
5120 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5121 compute_mode, label3);
5122 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5123 quotient, 0, methods);
5124 if (tem != quotient)
5125 emit_move_insn (quotient, tem);
5126 emit_jump_insn (targetm.gen_jump (label5));
5127 emit_barrier ();
5128 emit_label (label3);
5129 expand_inc (adjusted_op0, const1_rtx);
5130 emit_label (label4);
5131 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5132 quotient, 0, methods);
5133 if (tem != quotient)
5134 emit_move_insn (quotient, tem);
5135 expand_inc (quotient, const1_rtx);
5136 emit_label (label5);
5139 break;
5141 case EXACT_DIV_EXPR:
5142 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5144 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5145 int size = GET_MODE_BITSIZE (int_mode);
5146 HOST_WIDE_INT d = INTVAL (op1);
5147 unsigned HOST_WIDE_INT ml;
5148 int pre_shift;
5149 rtx t1;
5151 pre_shift = ctz_or_zero (d);
5152 ml = invert_mod2n (d >> pre_shift, size);
5153 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5154 pre_shift, NULL_RTX, unsignedp);
5155 quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5156 NULL_RTX, 1);
5158 insn = get_last_insn ();
5159 set_dst_reg_note (insn, REG_EQUAL,
5160 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5161 int_mode, op0, op1),
5162 quotient);
5164 break;
5166 case ROUND_DIV_EXPR:
5167 case ROUND_MOD_EXPR:
5168 if (unsignedp)
5170 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5171 rtx tem;
5172 rtx_code_label *label;
5173 label = gen_label_rtx ();
5174 quotient = gen_reg_rtx (int_mode);
5175 remainder = gen_reg_rtx (int_mode);
5176 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5178 rtx tem;
5179 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5180 quotient, 1, methods);
5181 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5182 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5183 remainder, 1, methods);
5185 tem = plus_constant (int_mode, op1, -1);
5186 tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5187 do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5188 expand_inc (quotient, const1_rtx);
5189 expand_dec (remainder, op1);
5190 emit_label (label);
5192 else
5194 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5195 int size = GET_MODE_BITSIZE (int_mode);
5196 rtx abs_rem, abs_op1, tem, mask;
5197 rtx_code_label *label;
5198 label = gen_label_rtx ();
5199 quotient = gen_reg_rtx (int_mode);
5200 remainder = gen_reg_rtx (int_mode);
5201 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5203 rtx tem;
5204 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5205 quotient, 0, methods);
5206 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5207 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5208 remainder, 0, methods);
5210 abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5211 abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5212 tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5213 1, NULL_RTX, 1);
5214 do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5215 tem = expand_binop (int_mode, xor_optab, op0, op1,
5216 NULL_RTX, 0, OPTAB_WIDEN);
5217 mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5218 size - 1, NULL_RTX, 0);
5219 tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5220 NULL_RTX, 0, OPTAB_WIDEN);
5221 tem = expand_binop (int_mode, sub_optab, tem, mask,
5222 NULL_RTX, 0, OPTAB_WIDEN);
5223 expand_inc (quotient, tem);
5224 tem = expand_binop (int_mode, xor_optab, mask, op1,
5225 NULL_RTX, 0, OPTAB_WIDEN);
5226 tem = expand_binop (int_mode, sub_optab, tem, mask,
5227 NULL_RTX, 0, OPTAB_WIDEN);
5228 expand_dec (remainder, tem);
5229 emit_label (label);
5231 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5233 default:
5234 gcc_unreachable ();
5237 if (quotient == 0)
5239 if (target && GET_MODE (target) != compute_mode)
5240 target = 0;
5242 if (rem_flag)
5244 /* Try to produce the remainder without producing the quotient.
5245 If we seem to have a divmod pattern that does not require widening,
5246 don't try widening here. We should really have a WIDEN argument
5247 to expand_twoval_binop, since what we'd really like to do here is
5248 1) try a mod insn in compute_mode
5249 2) try a divmod insn in compute_mode
5250 3) try a div insn in compute_mode and multiply-subtract to get
5251 remainder
5252 4) try the same things with widening allowed. */
5253 remainder
5254 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5255 op0, op1, target,
5256 unsignedp,
5257 ((optab_handler (optab2, compute_mode)
5258 != CODE_FOR_nothing)
5259 ? OPTAB_DIRECT : OPTAB_WIDEN));
5260 if (remainder == 0)
5262 /* No luck there. Can we do remainder and divide at once
5263 without a library call? */
5264 remainder = gen_reg_rtx (compute_mode);
5265 if (! expand_twoval_binop ((unsignedp
5266 ? udivmod_optab
5267 : sdivmod_optab),
5268 op0, op1,
5269 NULL_RTX, remainder, unsignedp))
5270 remainder = 0;
5273 if (remainder)
5274 return gen_lowpart (mode, remainder);
5277 /* Produce the quotient. Try a quotient insn, but not a library call.
5278 If we have a divmod in this mode, use it in preference to widening
5279 the div (for this test we assume it will not fail). Note that optab2
5280 is set to the one of the two optabs that the call below will use. */
5281 quotient
5282 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5283 op0, op1, rem_flag ? NULL_RTX : target,
5284 unsignedp,
5285 ((optab_handler (optab2, compute_mode)
5286 != CODE_FOR_nothing)
5287 ? OPTAB_DIRECT : OPTAB_WIDEN));
5289 if (quotient == 0)
5291 /* No luck there. Try a quotient-and-remainder insn,
5292 keeping the quotient alone. */
5293 quotient = gen_reg_rtx (compute_mode);
5294 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5295 op0, op1,
5296 quotient, NULL_RTX, unsignedp))
5298 quotient = 0;
5299 if (! rem_flag)
5300 /* Still no luck. If we are not computing the remainder,
5301 use a library call for the quotient. */
5302 quotient = sign_expand_binop (compute_mode,
5303 udiv_optab, sdiv_optab,
5304 op0, op1, target,
5305 unsignedp, methods);
5310 if (rem_flag)
5312 if (target && GET_MODE (target) != compute_mode)
5313 target = 0;
5315 if (quotient == 0)
5317 /* No divide instruction either. Use library for remainder. */
5318 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5319 op0, op1, target,
5320 unsignedp, methods);
5321 /* No remainder function. Try a quotient-and-remainder
5322 function, keeping the remainder. */
5323 if (!remainder
5324 && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5326 remainder = gen_reg_rtx (compute_mode);
5327 if (!expand_twoval_binop_libfunc
5328 (unsignedp ? udivmod_optab : sdivmod_optab,
5329 op0, op1,
5330 NULL_RTX, remainder,
5331 unsignedp ? UMOD : MOD))
5332 remainder = NULL_RTX;
5335 else
5337 /* We divided. Now finish doing X - Y * (X / Y). */
5338 remainder = expand_mult (compute_mode, quotient, op1,
5339 NULL_RTX, unsignedp);
5340 remainder = expand_binop (compute_mode, sub_optab, op0,
5341 remainder, target, unsignedp,
5342 methods);
5346 if (methods != OPTAB_LIB_WIDEN
5347 && (rem_flag ? remainder : quotient) == NULL_RTX)
5348 return NULL_RTX;
5350 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5353 /* Return a tree node with data type TYPE, describing the value of X.
5354 Usually this is an VAR_DECL, if there is no obvious better choice.
5355 X may be an expression, however we only support those expressions
5356 generated by loop.c. */
5358 tree
5359 make_tree (tree type, rtx x)
5361 tree t;
5363 switch (GET_CODE (x))
5365 case CONST_INT:
5366 case CONST_WIDE_INT:
5367 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5368 return t;
5370 case CONST_DOUBLE:
5371 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5372 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5373 t = wide_int_to_tree (type,
5374 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5375 HOST_BITS_PER_WIDE_INT * 2));
5376 else
5377 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5379 return t;
5381 case CONST_VECTOR:
5383 unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5384 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5385 tree itype = TREE_TYPE (type);
5387 /* Build a tree with vector elements. */
5388 tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5389 unsigned int count = elts.encoded_nelts ();
5390 for (unsigned int i = 0; i < count; ++i)
5392 rtx elt = CONST_VECTOR_ELT (x, i);
5393 elts.quick_push (make_tree (itype, elt));
5396 return elts.build ();
5399 case PLUS:
5400 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5401 make_tree (type, XEXP (x, 1)));
5403 case MINUS:
5404 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5405 make_tree (type, XEXP (x, 1)));
5407 case NEG:
5408 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5410 case MULT:
5411 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5412 make_tree (type, XEXP (x, 1)));
5414 case ASHIFT:
5415 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5416 make_tree (type, XEXP (x, 1)));
5418 case LSHIFTRT:
5419 t = unsigned_type_for (type);
5420 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5421 make_tree (t, XEXP (x, 0)),
5422 make_tree (type, XEXP (x, 1))));
5424 case ASHIFTRT:
5425 t = signed_type_for (type);
5426 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5427 make_tree (t, XEXP (x, 0)),
5428 make_tree (type, XEXP (x, 1))));
5430 case DIV:
5431 if (TREE_CODE (type) != REAL_TYPE)
5432 t = signed_type_for (type);
5433 else
5434 t = type;
5436 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5437 make_tree (t, XEXP (x, 0)),
5438 make_tree (t, XEXP (x, 1))));
5439 case UDIV:
5440 t = unsigned_type_for (type);
5441 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5442 make_tree (t, XEXP (x, 0)),
5443 make_tree (t, XEXP (x, 1))));
5445 case SIGN_EXTEND:
5446 case ZERO_EXTEND:
5447 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5448 GET_CODE (x) == ZERO_EXTEND);
5449 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5451 case CONST:
5452 return make_tree (type, XEXP (x, 0));
5454 case SYMBOL_REF:
5455 t = SYMBOL_REF_DECL (x);
5456 if (t)
5457 return fold_convert (type, build_fold_addr_expr (t));
5458 /* fall through. */
5460 default:
5461 if (CONST_POLY_INT_P (x))
5462 return wide_int_to_tree (t, const_poly_int_value (x));
5464 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5466 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5467 address mode to pointer mode. */
5468 if (POINTER_TYPE_P (type))
5469 x = convert_memory_address_addr_space
5470 (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5472 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5473 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5474 t->decl_with_rtl.rtl = x;
5476 return t;
5480 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5481 and returning TARGET.
5483 If TARGET is 0, a pseudo-register or constant is returned. */
5486 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5488 rtx tem = 0;
5490 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5491 tem = simplify_binary_operation (AND, mode, op0, op1);
5492 if (tem == 0)
5493 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5495 if (target == 0)
5496 target = tem;
5497 else if (tem != target)
5498 emit_move_insn (target, tem);
5499 return target;
5502 /* Helper function for emit_store_flag. */
5504 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5505 machine_mode mode, machine_mode compare_mode,
5506 int unsignedp, rtx x, rtx y, int normalizep,
5507 machine_mode target_mode)
5509 class expand_operand ops[4];
5510 rtx op0, comparison, subtarget;
5511 rtx_insn *last;
5512 scalar_int_mode result_mode = targetm.cstore_mode (icode);
5513 scalar_int_mode int_target_mode;
5515 last = get_last_insn ();
5516 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5517 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5518 if (!x || !y)
5520 delete_insns_since (last);
5521 return NULL_RTX;
5524 if (target_mode == VOIDmode)
5525 int_target_mode = result_mode;
5526 else
5527 int_target_mode = as_a <scalar_int_mode> (target_mode);
5528 if (!target)
5529 target = gen_reg_rtx (int_target_mode);
5531 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5533 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5534 create_fixed_operand (&ops[1], comparison);
5535 create_fixed_operand (&ops[2], x);
5536 create_fixed_operand (&ops[3], y);
5537 if (!maybe_expand_insn (icode, 4, ops))
5539 delete_insns_since (last);
5540 return NULL_RTX;
5542 subtarget = ops[0].value;
5544 /* If we are converting to a wider mode, first convert to
5545 INT_TARGET_MODE, then normalize. This produces better combining
5546 opportunities on machines that have a SIGN_EXTRACT when we are
5547 testing a single bit. This mostly benefits the 68k.
5549 If STORE_FLAG_VALUE does not have the sign bit set when
5550 interpreted in MODE, we can do this conversion as unsigned, which
5551 is usually more efficient. */
5552 if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5554 gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5555 || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5557 bool unsignedp = (STORE_FLAG_VALUE >= 0);
5558 convert_move (target, subtarget, unsignedp);
5560 op0 = target;
5561 result_mode = int_target_mode;
5563 else
5564 op0 = subtarget;
5566 /* If we want to keep subexpressions around, don't reuse our last
5567 target. */
5568 if (optimize)
5569 subtarget = 0;
5571 /* Now normalize to the proper value in MODE. Sometimes we don't
5572 have to do anything. */
5573 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5575 /* STORE_FLAG_VALUE might be the most negative number, so write
5576 the comparison this way to avoid a compiler-time warning. */
5577 else if (- normalizep == STORE_FLAG_VALUE)
5578 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5580 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5581 it hard to use a value of just the sign bit due to ANSI integer
5582 constant typing rules. */
5583 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5584 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5585 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5586 normalizep == 1);
5587 else
5589 gcc_assert (STORE_FLAG_VALUE & 1);
5591 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5592 if (normalizep == -1)
5593 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5596 /* If we were converting to a smaller mode, do the conversion now. */
5597 if (int_target_mode != result_mode)
5599 convert_move (target, op0, 0);
5600 return target;
5602 else
5603 return op0;
5607 /* A subroutine of emit_store_flag only including "tricks" that do not
5608 need a recursive call. These are kept separate to avoid infinite
5609 loops. */
5611 static rtx
5612 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5613 machine_mode mode, int unsignedp, int normalizep,
5614 machine_mode target_mode)
5616 rtx subtarget;
5617 enum insn_code icode;
5618 machine_mode compare_mode;
5619 enum mode_class mclass;
5620 enum rtx_code scode;
5622 if (unsignedp)
5623 code = unsigned_condition (code);
5624 scode = swap_condition (code);
5626 /* If one operand is constant, make it the second one. Only do this
5627 if the other operand is not constant as well. */
5629 if (swap_commutative_operands_p (op0, op1))
5631 std::swap (op0, op1);
5632 code = swap_condition (code);
5635 if (mode == VOIDmode)
5636 mode = GET_MODE (op0);
5638 if (CONST_SCALAR_INT_P (op1))
5639 canonicalize_comparison (mode, &code, &op1);
5641 /* For some comparisons with 1 and -1, we can convert this to
5642 comparisons with zero. This will often produce more opportunities for
5643 store-flag insns. */
5645 switch (code)
5647 case LT:
5648 if (op1 == const1_rtx)
5649 op1 = const0_rtx, code = LE;
5650 break;
5651 case LE:
5652 if (op1 == constm1_rtx)
5653 op1 = const0_rtx, code = LT;
5654 break;
5655 case GE:
5656 if (op1 == const1_rtx)
5657 op1 = const0_rtx, code = GT;
5658 break;
5659 case GT:
5660 if (op1 == constm1_rtx)
5661 op1 = const0_rtx, code = GE;
5662 break;
5663 case GEU:
5664 if (op1 == const1_rtx)
5665 op1 = const0_rtx, code = NE;
5666 break;
5667 case LTU:
5668 if (op1 == const1_rtx)
5669 op1 = const0_rtx, code = EQ;
5670 break;
5671 default:
5672 break;
5675 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5676 complement of A (for GE) and shifting the sign bit to the low bit. */
5677 scalar_int_mode int_mode;
5678 if (op1 == const0_rtx && (code == LT || code == GE)
5679 && is_int_mode (mode, &int_mode)
5680 && (normalizep || STORE_FLAG_VALUE == 1
5681 || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5683 scalar_int_mode int_target_mode;
5684 subtarget = target;
5686 if (!target)
5687 int_target_mode = int_mode;
5688 else
5690 /* If the result is to be wider than OP0, it is best to convert it
5691 first. If it is to be narrower, it is *incorrect* to convert it
5692 first. */
5693 int_target_mode = as_a <scalar_int_mode> (target_mode);
5694 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5696 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5697 int_mode = int_target_mode;
5701 if (int_target_mode != int_mode)
5702 subtarget = 0;
5704 if (code == GE)
5705 op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5706 ((STORE_FLAG_VALUE == 1 || normalizep)
5707 ? 0 : subtarget), 0);
5709 if (STORE_FLAG_VALUE == 1 || normalizep)
5710 /* If we are supposed to produce a 0/1 value, we want to do
5711 a logical shift from the sign bit to the low-order bit; for
5712 a -1/0 value, we do an arithmetic shift. */
5713 op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5714 GET_MODE_BITSIZE (int_mode) - 1,
5715 subtarget, normalizep != -1);
5717 if (int_mode != int_target_mode)
5718 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5720 return op0;
5723 /* Next try expanding this via the backend's cstore<mode>4. */
5724 mclass = GET_MODE_CLASS (mode);
5725 FOR_EACH_WIDER_MODE_FROM (compare_mode, mode)
5727 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5728 icode = optab_handler (cstore_optab, optab_mode);
5729 if (icode != CODE_FOR_nothing)
5731 do_pending_stack_adjust ();
5732 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5733 unsignedp, op0, op1, normalizep, target_mode);
5734 if (tem)
5735 return tem;
5737 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5739 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5740 unsignedp, op1, op0, normalizep, target_mode);
5741 if (tem)
5742 return tem;
5744 break;
5748 /* If we are comparing a double-word integer with zero or -1, we can
5749 convert the comparison into one involving a single word. */
5750 if (is_int_mode (mode, &int_mode)
5751 && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5752 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5754 rtx tem;
5755 if ((code == EQ || code == NE)
5756 && (op1 == const0_rtx || op1 == constm1_rtx))
5758 rtx op00, op01;
5760 /* Do a logical OR or AND of the two words and compare the
5761 result. */
5762 op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5763 op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5764 tem = expand_binop (word_mode,
5765 op1 == const0_rtx ? ior_optab : and_optab,
5766 op00, op01, NULL_RTX, unsignedp,
5767 OPTAB_DIRECT);
5769 if (tem != 0)
5770 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5771 unsignedp, normalizep);
5773 else if ((code == LT || code == GE) && op1 == const0_rtx)
5775 rtx op0h;
5777 /* If testing the sign bit, can just test on high word. */
5778 op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5779 subreg_highpart_offset (word_mode,
5780 int_mode));
5781 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5782 unsignedp, normalizep);
5784 else
5785 tem = NULL_RTX;
5787 if (tem)
5789 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5790 return tem;
5791 if (!target)
5792 target = gen_reg_rtx (target_mode);
5794 convert_move (target, tem,
5795 !val_signbit_known_set_p (word_mode,
5796 (normalizep ? normalizep
5797 : STORE_FLAG_VALUE)));
5798 return target;
5802 return 0;
5805 /* Subroutine of emit_store_flag that handles cases in which the operands
5806 are scalar integers. SUBTARGET is the target to use for temporary
5807 operations and TRUEVAL is the value to store when the condition is
5808 true. All other arguments are as for emit_store_flag. */
5811 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5812 rtx op1, scalar_int_mode mode, int unsignedp,
5813 int normalizep, rtx trueval)
5815 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5816 rtx_insn *last = get_last_insn ();
5818 /* If this is an equality comparison of integers, we can try to exclusive-or
5819 (or subtract) the two operands and use a recursive call to try the
5820 comparison with zero. Don't do any of these cases if branches are
5821 very cheap. */
5823 if ((code == EQ || code == NE) && op1 != const0_rtx)
5825 rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5826 OPTAB_WIDEN);
5828 if (tem == 0)
5829 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5830 OPTAB_WIDEN);
5831 if (tem != 0)
5832 tem = emit_store_flag (target, code, tem, const0_rtx,
5833 mode, unsignedp, normalizep);
5834 if (tem != 0)
5835 return tem;
5837 delete_insns_since (last);
5840 /* For integer comparisons, try the reverse comparison. However, for
5841 small X and if we'd have anyway to extend, implementing "X != 0"
5842 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5843 rtx_code rcode = reverse_condition (code);
5844 if (can_compare_p (rcode, mode, ccp_store_flag)
5845 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5846 && code == NE
5847 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5848 && op1 == const0_rtx))
5850 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5851 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5853 /* Again, for the reverse comparison, use either an addition or a XOR. */
5854 if (want_add
5855 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5856 optimize_insn_for_speed_p ()) == 0)
5858 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5859 STORE_FLAG_VALUE, target_mode);
5860 if (tem != 0)
5861 tem = expand_binop (target_mode, add_optab, tem,
5862 gen_int_mode (normalizep, target_mode),
5863 target, 0, OPTAB_WIDEN);
5864 if (tem != 0)
5865 return tem;
5867 else if (!want_add
5868 && rtx_cost (trueval, mode, XOR, 1,
5869 optimize_insn_for_speed_p ()) == 0)
5871 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5872 normalizep, target_mode);
5873 if (tem != 0)
5874 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5875 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5876 if (tem != 0)
5877 return tem;
5880 delete_insns_since (last);
5883 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5884 the constant zero. Reject all other comparisons at this point. Only
5885 do LE and GT if branches are expensive since they are expensive on
5886 2-operand machines. */
5888 if (op1 != const0_rtx
5889 || (code != EQ && code != NE
5890 && (BRANCH_COST (optimize_insn_for_speed_p (),
5891 false) <= 1 || (code != LE && code != GT))))
5892 return 0;
5894 /* Try to put the result of the comparison in the sign bit. Assume we can't
5895 do the necessary operation below. */
5897 rtx tem = 0;
5899 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5900 the sign bit set. */
5902 if (code == LE)
5904 /* This is destructive, so SUBTARGET can't be OP0. */
5905 if (rtx_equal_p (subtarget, op0))
5906 subtarget = 0;
5908 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5909 OPTAB_WIDEN);
5910 if (tem)
5911 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5912 OPTAB_WIDEN);
5915 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5916 number of bits in the mode of OP0, minus one. */
5918 if (code == GT)
5920 if (rtx_equal_p (subtarget, op0))
5921 subtarget = 0;
5923 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5924 GET_MODE_BITSIZE (mode) - 1,
5925 subtarget, 0);
5926 if (tem)
5927 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5928 OPTAB_WIDEN);
5931 if (code == EQ || code == NE)
5933 /* For EQ or NE, one way to do the comparison is to apply an operation
5934 that converts the operand into a positive number if it is nonzero
5935 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5936 for NE we negate. This puts the result in the sign bit. Then we
5937 normalize with a shift, if needed.
5939 Two operations that can do the above actions are ABS and FFS, so try
5940 them. If that doesn't work, and MODE is smaller than a full word,
5941 we can use zero-extension to the wider mode (an unsigned conversion)
5942 as the operation. */
5944 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5945 that is compensated by the subsequent overflow when subtracting
5946 one / negating. */
5948 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5949 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5950 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5951 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5952 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5954 tem = convert_modes (word_mode, mode, op0, 1);
5955 mode = word_mode;
5958 if (tem != 0)
5960 if (code == EQ)
5961 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5962 0, OPTAB_WIDEN);
5963 else
5964 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5967 /* If we couldn't do it that way, for NE we can "or" the two's complement
5968 of the value with itself. For EQ, we take the one's complement of
5969 that "or", which is an extra insn, so we only handle EQ if branches
5970 are expensive. */
5972 if (tem == 0
5973 && (code == NE
5974 || BRANCH_COST (optimize_insn_for_speed_p (),
5975 false) > 1))
5977 if (rtx_equal_p (subtarget, op0))
5978 subtarget = 0;
5980 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5981 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5982 OPTAB_WIDEN);
5984 if (tem && code == EQ)
5985 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5989 if (tem && normalizep)
5990 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5991 GET_MODE_BITSIZE (mode) - 1,
5992 subtarget, normalizep == 1);
5994 if (tem)
5996 if (!target)
5998 else if (GET_MODE (tem) != target_mode)
6000 convert_move (target, tem, 0);
6001 tem = target;
6003 else if (!subtarget)
6005 emit_move_insn (target, tem);
6006 tem = target;
6009 else
6010 delete_insns_since (last);
6012 return tem;
6015 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
6016 and storing in TARGET. Normally return TARGET.
6017 Return 0 if that cannot be done.
6019 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
6020 it is VOIDmode, they cannot both be CONST_INT.
6022 UNSIGNEDP is for the case where we have to widen the operands
6023 to perform the operation. It says to use zero-extension.
6025 NORMALIZEP is 1 if we should convert the result to be either zero
6026 or one. Normalize is -1 if we should convert the result to be
6027 either zero or -1. If NORMALIZEP is zero, the result will be left
6028 "raw" out of the scc insn. */
6031 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6032 machine_mode mode, int unsignedp, int normalizep)
6034 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6035 enum rtx_code rcode;
6036 rtx subtarget;
6037 rtx tem, trueval;
6038 rtx_insn *last;
6040 /* If we compare constants, we shouldn't use a store-flag operation,
6041 but a constant load. We can get there via the vanilla route that
6042 usually generates a compare-branch sequence, but will in this case
6043 fold the comparison to a constant, and thus elide the branch. */
6044 if (CONSTANT_P (op0) && CONSTANT_P (op1))
6045 return NULL_RTX;
6047 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6048 target_mode);
6049 if (tem)
6050 return tem;
6052 /* If we reached here, we can't do this with a scc insn, however there
6053 are some comparisons that can be done in other ways. Don't do any
6054 of these cases if branches are very cheap. */
6055 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6056 return 0;
6058 /* See what we need to return. We can only return a 1, -1, or the
6059 sign bit. */
6061 if (normalizep == 0)
6063 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6064 normalizep = STORE_FLAG_VALUE;
6066 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6068 else
6069 return 0;
6072 last = get_last_insn ();
6074 /* If optimizing, use different pseudo registers for each insn, instead
6075 of reusing the same pseudo. This leads to better CSE, but slows
6076 down the compiler, since there are more pseudos. */
6077 subtarget = (!optimize
6078 && (target_mode == mode)) ? target : NULL_RTX;
6079 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6081 /* For floating-point comparisons, try the reverse comparison or try
6082 changing the "orderedness" of the comparison. */
6083 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6085 enum rtx_code first_code;
6086 bool and_them;
6088 rcode = reverse_condition_maybe_unordered (code);
6089 if (can_compare_p (rcode, mode, ccp_store_flag)
6090 && (code == ORDERED || code == UNORDERED
6091 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6092 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6094 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6095 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6097 /* For the reverse comparison, use either an addition or a XOR. */
6098 if (want_add
6099 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6100 optimize_insn_for_speed_p ()) == 0)
6102 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6103 STORE_FLAG_VALUE, target_mode);
6104 if (tem)
6105 return expand_binop (target_mode, add_optab, tem,
6106 gen_int_mode (normalizep, target_mode),
6107 target, 0, OPTAB_WIDEN);
6109 else if (!want_add
6110 && rtx_cost (trueval, mode, XOR, 1,
6111 optimize_insn_for_speed_p ()) == 0)
6113 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6114 normalizep, target_mode);
6115 if (tem)
6116 return expand_binop (target_mode, xor_optab, tem, trueval,
6117 target, INTVAL (trueval) >= 0,
6118 OPTAB_WIDEN);
6122 delete_insns_since (last);
6124 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
6125 if (code == ORDERED || code == UNORDERED)
6126 return 0;
6128 and_them = split_comparison (code, mode, &first_code, &code);
6130 /* If there are no NaNs, the first comparison should always fall through.
6131 Effectively change the comparison to the other one. */
6132 if (!HONOR_NANS (mode))
6134 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6135 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6136 target_mode);
6139 if (!HAVE_conditional_move)
6140 return 0;
6142 /* Do not turn a trapping comparison into a non-trapping one. */
6143 if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6144 && flag_trapping_math)
6145 return 0;
6147 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6148 conditional move. */
6149 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6150 normalizep, target_mode);
6151 if (tem == 0)
6152 return 0;
6154 if (and_them)
6155 tem = emit_conditional_move (target, { code, op0, op1, mode },
6156 tem, const0_rtx, GET_MODE (tem), 0);
6157 else
6158 tem = emit_conditional_move (target, { code, op0, op1, mode },
6159 trueval, tem, GET_MODE (tem), 0);
6161 if (tem == 0)
6162 delete_insns_since (last);
6163 return tem;
6166 /* The remaining tricks only apply to integer comparisons. */
6168 scalar_int_mode int_mode;
6169 if (is_int_mode (mode, &int_mode))
6170 return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6171 unsignedp, normalizep, trueval);
6173 return 0;
6176 /* Like emit_store_flag, but always succeeds. */
6179 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6180 machine_mode mode, int unsignedp, int normalizep)
6182 rtx tem;
6183 rtx_code_label *label;
6184 rtx trueval, falseval;
6186 /* First see if emit_store_flag can do the job. */
6187 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6188 if (tem != 0)
6189 return tem;
6191 /* If one operand is constant, make it the second one. Only do this
6192 if the other operand is not constant as well. */
6193 if (swap_commutative_operands_p (op0, op1))
6195 std::swap (op0, op1);
6196 code = swap_condition (code);
6199 if (mode == VOIDmode)
6200 mode = GET_MODE (op0);
6202 if (!target)
6203 target = gen_reg_rtx (word_mode);
6205 /* If this failed, we have to do this with set/compare/jump/set code.
6206 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
6207 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6208 if (code == NE
6209 && GET_MODE_CLASS (mode) == MODE_INT
6210 && REG_P (target)
6211 && op0 == target
6212 && op1 == const0_rtx)
6214 label = gen_label_rtx ();
6215 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6216 NULL_RTX, NULL, label,
6217 profile_probability::uninitialized ());
6218 emit_move_insn (target, trueval);
6219 emit_label (label);
6220 return target;
6223 if (!REG_P (target)
6224 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6225 target = gen_reg_rtx (GET_MODE (target));
6227 /* Jump in the right direction if the target cannot implement CODE
6228 but can jump on its reverse condition. */
6229 falseval = const0_rtx;
6230 if (! can_compare_p (code, mode, ccp_jump)
6231 && (! FLOAT_MODE_P (mode)
6232 || code == ORDERED || code == UNORDERED
6233 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6234 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6236 enum rtx_code rcode;
6237 if (FLOAT_MODE_P (mode))
6238 rcode = reverse_condition_maybe_unordered (code);
6239 else
6240 rcode = reverse_condition (code);
6242 /* Canonicalize to UNORDERED for the libcall. */
6243 if (can_compare_p (rcode, mode, ccp_jump)
6244 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6246 falseval = trueval;
6247 trueval = const0_rtx;
6248 code = rcode;
6252 emit_move_insn (target, trueval);
6253 label = gen_label_rtx ();
6254 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6255 label, profile_probability::uninitialized ());
6257 emit_move_insn (target, falseval);
6258 emit_label (label);
6260 return target;
6263 /* Helper function for canonicalize_cmp_for_target. Swap between inclusive
6264 and exclusive ranges in order to create an equivalent comparison. See
6265 canonicalize_cmp_for_target for the possible cases. */
6267 static enum rtx_code
6268 equivalent_cmp_code (enum rtx_code code)
6270 switch (code)
6272 case GT:
6273 return GE;
6274 case GE:
6275 return GT;
6276 case LT:
6277 return LE;
6278 case LE:
6279 return LT;
6280 case GTU:
6281 return GEU;
6282 case GEU:
6283 return GTU;
6284 case LTU:
6285 return LEU;
6286 case LEU:
6287 return LTU;
6289 default:
6290 return code;
6294 /* Choose the more appropiate immediate in scalar integer comparisons. The
6295 purpose of this is to end up with an immediate which can be loaded into a
6296 register in fewer moves, if possible.
6298 For each integer comparison there exists an equivalent choice:
6299 i) a > b or a >= b + 1
6300 ii) a <= b or a < b + 1
6301 iii) a >= b or a > b - 1
6302 iv) a < b or a <= b - 1
6304 MODE is the mode of the first operand.
6305 CODE points to the comparison code.
6306 IMM points to the rtx containing the immediate. *IMM must satisfy
6307 CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6308 on exit. */
6310 void
6311 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6313 if (!SCALAR_INT_MODE_P (mode))
6314 return;
6316 int to_add = 0;
6317 enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6319 /* Extract the immediate value from the rtx. */
6320 wide_int imm_val = rtx_mode_t (*imm, mode);
6322 if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6323 to_add = 1;
6324 else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6325 to_add = -1;
6326 else
6327 return;
6329 /* Check for overflow/underflow in the case of signed values and
6330 wrapping around in the case of unsigned values. If any occur
6331 cancel the optimization. */
6332 wi::overflow_type overflow = wi::OVF_NONE;
6333 wide_int imm_modif;
6335 if (to_add == 1)
6336 imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6337 else
6338 imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6340 if (overflow)
6341 return;
6343 /* The following creates a pseudo; if we cannot do that, bail out. */
6344 if (!can_create_pseudo_p ())
6345 return;
6347 rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6348 rtx new_imm = immed_wide_int_const (imm_modif, mode);
6350 rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6351 rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6353 /* Update the immediate and the code. */
6354 if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6356 *code = equivalent_cmp_code (*code);
6357 *imm = new_imm;
6363 /* Perform possibly multi-word comparison and conditional jump to LABEL
6364 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6365 now a thin wrapper around do_compare_rtx_and_jump. */
6367 static void
6368 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6369 rtx_code_label *label)
6371 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6372 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6373 NULL, label, profile_probability::uninitialized ());