[4/77] Add FOR_EACH iterators for modes
[official-gcc.git] / gcc / expmed.c
blob0219e0dee7692f752f60f77c1021ceecb44ef123
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2017 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "emit-rtl.h"
35 #include "diagnostic-core.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "dojump.h"
39 #include "explow.h"
40 #include "expr.h"
41 #include "langhooks.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 rtx, bool);
53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 rtx, bool);
56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT,
59 unsigned HOST_WIDE_INT,
60 rtx, bool);
61 static rtx extract_fixed_bit_field (machine_mode, rtx,
62 unsigned HOST_WIDE_INT,
63 unsigned HOST_WIDE_INT, rtx, int, bool);
64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
65 unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, rtx, int, bool);
67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
69 unsigned HOST_WIDE_INT, int, bool);
70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
75 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
76 The mask is truncated if necessary to the width of mode MODE. The
77 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
79 static inline rtx
80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
82 return immed_wide_int_const
83 (wi::shifted_mask (bitpos, bitsize, complement,
84 GET_MODE_PRECISION (mode)), mode);
87 /* Test whether a value is zero of a power of two. */
88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
89 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
91 struct init_expmed_rtl
93 rtx reg;
94 rtx plus;
95 rtx neg;
96 rtx mult;
97 rtx sdiv;
98 rtx udiv;
99 rtx sdiv_32;
100 rtx smod_32;
101 rtx wide_mult;
102 rtx wide_lshr;
103 rtx wide_trunc;
104 rtx shift;
105 rtx shift_mult;
106 rtx shift_add;
107 rtx shift_sub0;
108 rtx shift_sub1;
109 rtx zext;
110 rtx trunc;
112 rtx pow2[MAX_BITS_PER_WORD];
113 rtx cint[MAX_BITS_PER_WORD];
116 static void
117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
118 machine_mode from_mode, bool speed)
120 int to_size, from_size;
121 rtx which;
123 to_size = GET_MODE_PRECISION (to_mode);
124 from_size = GET_MODE_PRECISION (from_mode);
126 /* Most partial integers have a precision less than the "full"
127 integer it requires for storage. In case one doesn't, for
128 comparison purposes here, reduce the bit size by one in that
129 case. */
130 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
131 && pow2p_hwi (to_size))
132 to_size --;
133 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
134 && pow2p_hwi (from_size))
135 from_size --;
137 /* Assume cost of zero-extend and sign-extend is the same. */
138 which = (to_size < from_size ? all->trunc : all->zext);
140 PUT_MODE (all->reg, from_mode);
141 set_convert_cost (to_mode, from_mode, speed,
142 set_src_cost (which, to_mode, speed));
145 static void
146 init_expmed_one_mode (struct init_expmed_rtl *all,
147 machine_mode mode, int speed)
149 int m, n, mode_bitsize;
150 machine_mode mode_from;
152 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
154 PUT_MODE (all->reg, mode);
155 PUT_MODE (all->plus, mode);
156 PUT_MODE (all->neg, mode);
157 PUT_MODE (all->mult, mode);
158 PUT_MODE (all->sdiv, mode);
159 PUT_MODE (all->udiv, mode);
160 PUT_MODE (all->sdiv_32, mode);
161 PUT_MODE (all->smod_32, mode);
162 PUT_MODE (all->wide_trunc, mode);
163 PUT_MODE (all->shift, mode);
164 PUT_MODE (all->shift_mult, mode);
165 PUT_MODE (all->shift_add, mode);
166 PUT_MODE (all->shift_sub0, mode);
167 PUT_MODE (all->shift_sub1, mode);
168 PUT_MODE (all->zext, mode);
169 PUT_MODE (all->trunc, mode);
171 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
172 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
173 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
174 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
175 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
177 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
178 <= 2 * add_cost (speed, mode)));
179 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
180 <= 4 * add_cost (speed, mode)));
182 set_shift_cost (speed, mode, 0, 0);
184 int cost = add_cost (speed, mode);
185 set_shiftadd_cost (speed, mode, 0, cost);
186 set_shiftsub0_cost (speed, mode, 0, cost);
187 set_shiftsub1_cost (speed, mode, 0, cost);
190 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
191 for (m = 1; m < n; m++)
193 XEXP (all->shift, 1) = all->cint[m];
194 XEXP (all->shift_mult, 1) = all->pow2[m];
196 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
197 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
198 speed));
199 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
200 speed));
201 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
202 speed));
205 if (SCALAR_INT_MODE_P (mode))
207 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
208 mode_from = (machine_mode)(mode_from + 1))
209 init_expmed_one_conv (all, mode, mode_from, speed);
211 if (GET_MODE_CLASS (mode) == MODE_INT)
213 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
214 if (wider_mode != VOIDmode)
216 PUT_MODE (all->zext, wider_mode);
217 PUT_MODE (all->wide_mult, wider_mode);
218 PUT_MODE (all->wide_lshr, wider_mode);
219 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
221 set_mul_widen_cost (speed, wider_mode,
222 set_src_cost (all->wide_mult, wider_mode, speed));
223 set_mul_highpart_cost (speed, mode,
224 set_src_cost (all->wide_trunc, mode, speed));
229 void
230 init_expmed (void)
232 struct init_expmed_rtl all;
233 machine_mode mode = QImode;
234 int m, speed;
236 memset (&all, 0, sizeof all);
237 for (m = 1; m < MAX_BITS_PER_WORD; m++)
239 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
240 all.cint[m] = GEN_INT (m);
243 /* Avoid using hard regs in ways which may be unsupported. */
244 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
245 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
246 all.neg = gen_rtx_NEG (mode, all.reg);
247 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
248 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
249 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
250 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
251 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
252 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
253 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
254 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
255 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
256 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
257 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
258 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
259 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
260 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
261 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
263 for (speed = 0; speed < 2; speed++)
265 crtl->maybe_hot_insn_p = speed;
266 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
268 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
269 mode = (machine_mode)(mode + 1))
270 init_expmed_one_mode (&all, mode, speed);
272 if (MIN_MODE_PARTIAL_INT != VOIDmode)
273 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
274 mode = (machine_mode)(mode + 1))
275 init_expmed_one_mode (&all, mode, speed);
277 if (MIN_MODE_VECTOR_INT != VOIDmode)
278 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
279 mode = (machine_mode)(mode + 1))
280 init_expmed_one_mode (&all, mode, speed);
283 if (alg_hash_used_p ())
285 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
286 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
288 else
289 set_alg_hash_used_p (true);
290 default_rtl_profile ();
292 ggc_free (all.trunc);
293 ggc_free (all.shift_sub1);
294 ggc_free (all.shift_sub0);
295 ggc_free (all.shift_add);
296 ggc_free (all.shift_mult);
297 ggc_free (all.shift);
298 ggc_free (all.wide_trunc);
299 ggc_free (all.wide_lshr);
300 ggc_free (all.wide_mult);
301 ggc_free (all.zext);
302 ggc_free (all.smod_32);
303 ggc_free (all.sdiv_32);
304 ggc_free (all.udiv);
305 ggc_free (all.sdiv);
306 ggc_free (all.mult);
307 ggc_free (all.neg);
308 ggc_free (all.plus);
309 ggc_free (all.reg);
312 /* Return an rtx representing minus the value of X.
313 MODE is the intended mode of the result,
314 useful if X is a CONST_INT. */
317 negate_rtx (machine_mode mode, rtx x)
319 rtx result = simplify_unary_operation (NEG, mode, x, mode);
321 if (result == 0)
322 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
324 return result;
327 /* Whether reverse storage order is supported on the target. */
328 static int reverse_storage_order_supported = -1;
330 /* Check whether reverse storage order is supported on the target. */
332 static void
333 check_reverse_storage_order_support (void)
335 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
337 reverse_storage_order_supported = 0;
338 sorry ("reverse scalar storage order");
340 else
341 reverse_storage_order_supported = 1;
344 /* Whether reverse FP storage order is supported on the target. */
345 static int reverse_float_storage_order_supported = -1;
347 /* Check whether reverse FP storage order is supported on the target. */
349 static void
350 check_reverse_float_storage_order_support (void)
352 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
354 reverse_float_storage_order_supported = 0;
355 sorry ("reverse floating-point scalar storage order");
357 else
358 reverse_float_storage_order_supported = 1;
361 /* Return an rtx representing value of X with reverse storage order.
362 MODE is the intended mode of the result,
363 useful if X is a CONST_INT. */
366 flip_storage_order (machine_mode mode, rtx x)
368 machine_mode int_mode;
369 rtx result;
371 if (mode == QImode)
372 return x;
374 if (COMPLEX_MODE_P (mode))
376 rtx real = read_complex_part (x, false);
377 rtx imag = read_complex_part (x, true);
379 real = flip_storage_order (GET_MODE_INNER (mode), real);
380 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
382 return gen_rtx_CONCAT (mode, real, imag);
385 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
386 check_reverse_storage_order_support ();
388 if (SCALAR_INT_MODE_P (mode))
389 int_mode = mode;
390 else
392 if (FLOAT_MODE_P (mode)
393 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
394 check_reverse_float_storage_order_support ();
396 int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
397 if (int_mode == BLKmode)
399 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
400 return x;
402 x = gen_lowpart (int_mode, x);
405 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
406 if (result == 0)
407 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
409 if (int_mode != mode)
410 result = gen_lowpart (mode, result);
412 return result;
415 /* Adjust bitfield memory MEM so that it points to the first unit of mode
416 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
417 If MODE is BLKmode, return a reference to every byte in the bitfield.
418 Set *NEW_BITNUM to the bit position of the field within the new memory. */
420 static rtx
421 narrow_bit_field_mem (rtx mem, machine_mode mode,
422 unsigned HOST_WIDE_INT bitsize,
423 unsigned HOST_WIDE_INT bitnum,
424 unsigned HOST_WIDE_INT *new_bitnum)
426 if (mode == BLKmode)
428 *new_bitnum = bitnum % BITS_PER_UNIT;
429 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
430 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
431 / BITS_PER_UNIT);
432 return adjust_bitfield_address_size (mem, mode, offset, size);
434 else
436 unsigned int unit = GET_MODE_BITSIZE (mode);
437 *new_bitnum = bitnum % unit;
438 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
439 return adjust_bitfield_address (mem, mode, offset);
443 /* The caller wants to perform insertion or extraction PATTERN on a
444 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
445 BITREGION_START and BITREGION_END are as for store_bit_field
446 and FIELDMODE is the natural mode of the field.
448 Search for a mode that is compatible with the memory access
449 restrictions and (where applicable) with a register insertion or
450 extraction. Return the new memory on success, storing the adjusted
451 bit position in *NEW_BITNUM. Return null otherwise. */
453 static rtx
454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
455 rtx op0, HOST_WIDE_INT bitsize,
456 HOST_WIDE_INT bitnum,
457 unsigned HOST_WIDE_INT bitregion_start,
458 unsigned HOST_WIDE_INT bitregion_end,
459 machine_mode fieldmode,
460 unsigned HOST_WIDE_INT *new_bitnum)
462 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
463 bitregion_end, MEM_ALIGN (op0),
464 MEM_VOLATILE_P (op0));
465 machine_mode best_mode;
466 if (iter.next_mode (&best_mode))
468 /* We can use a memory in BEST_MODE. See whether this is true for
469 any wider modes. All other things being equal, we prefer to
470 use the widest mode possible because it tends to expose more
471 CSE opportunities. */
472 if (!iter.prefer_smaller_modes ())
474 /* Limit the search to the mode required by the corresponding
475 register insertion or extraction instruction, if any. */
476 machine_mode limit_mode = word_mode;
477 extraction_insn insn;
478 if (get_best_reg_extraction_insn (&insn, pattern,
479 GET_MODE_BITSIZE (best_mode),
480 fieldmode))
481 limit_mode = insn.field_mode;
483 machine_mode wider_mode;
484 while (iter.next_mode (&wider_mode)
485 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
486 best_mode = wider_mode;
488 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
489 new_bitnum);
491 return NULL_RTX;
494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
495 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
496 offset is then BITNUM / BITS_PER_UNIT. */
498 static bool
499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
500 unsigned HOST_WIDE_INT bitsize,
501 machine_mode struct_mode)
503 if (BYTES_BIG_ENDIAN)
504 return (bitnum % BITS_PER_UNIT == 0
505 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
506 || (bitnum + bitsize) % BITS_PER_WORD == 0));
507 else
508 return bitnum % BITS_PER_WORD == 0;
511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
512 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
513 Return false if the access would touch memory outside the range
514 BITREGION_START to BITREGION_END for conformance to the C++ memory
515 model. */
517 static bool
518 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
519 unsigned HOST_WIDE_INT bitnum,
520 machine_mode fieldmode,
521 unsigned HOST_WIDE_INT bitregion_start,
522 unsigned HOST_WIDE_INT bitregion_end)
524 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
526 /* -fstrict-volatile-bitfields must be enabled and we must have a
527 volatile MEM. */
528 if (!MEM_P (op0)
529 || !MEM_VOLATILE_P (op0)
530 || flag_strict_volatile_bitfields <= 0)
531 return false;
533 /* Non-integral modes likely only happen with packed structures.
534 Punt. */
535 if (!SCALAR_INT_MODE_P (fieldmode))
536 return false;
538 /* The bit size must not be larger than the field mode, and
539 the field mode must not be larger than a word. */
540 if (bitsize > modesize || modesize > BITS_PER_WORD)
541 return false;
543 /* Check for cases of unaligned fields that must be split. */
544 if (bitnum % modesize + bitsize > modesize)
545 return false;
547 /* The memory must be sufficiently aligned for a MODESIZE access.
548 This condition guarantees, that the memory access will not
549 touch anything after the end of the structure. */
550 if (MEM_ALIGN (op0) < modesize)
551 return false;
553 /* Check for cases where the C++ memory model applies. */
554 if (bitregion_end != 0
555 && (bitnum - bitnum % modesize < bitregion_start
556 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
557 return false;
559 return true;
562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
563 bit number BITNUM can be treated as a simple value of mode MODE. */
565 static bool
566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
567 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
569 return (MEM_P (op0)
570 && bitnum % BITS_PER_UNIT == 0
571 && bitsize == GET_MODE_BITSIZE (mode)
572 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
573 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
574 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
577 /* Try to use instruction INSV to store VALUE into a field of OP0.
578 BITSIZE and BITNUM are as for store_bit_field. */
580 static bool
581 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
582 unsigned HOST_WIDE_INT bitsize,
583 unsigned HOST_WIDE_INT bitnum,
584 rtx value)
586 struct expand_operand ops[4];
587 rtx value1;
588 rtx xop0 = op0;
589 rtx_insn *last = get_last_insn ();
590 bool copy_back = false;
592 machine_mode op_mode = insv->field_mode;
593 unsigned int unit = GET_MODE_BITSIZE (op_mode);
594 if (bitsize == 0 || bitsize > unit)
595 return false;
597 if (MEM_P (xop0))
598 /* Get a reference to the first byte of the field. */
599 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
600 &bitnum);
601 else
603 /* Convert from counting within OP0 to counting in OP_MODE. */
604 if (BYTES_BIG_ENDIAN)
605 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
607 /* If xop0 is a register, we need it in OP_MODE
608 to make it acceptable to the format of insv. */
609 if (GET_CODE (xop0) == SUBREG)
610 /* We can't just change the mode, because this might clobber op0,
611 and we will need the original value of op0 if insv fails. */
612 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
613 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
614 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
617 /* If the destination is a paradoxical subreg such that we need a
618 truncate to the inner mode, perform the insertion on a temporary and
619 truncate the result to the original destination. Note that we can't
620 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
621 X) 0)) is (reg:N X). */
622 if (GET_CODE (xop0) == SUBREG
623 && REG_P (SUBREG_REG (xop0))
624 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
625 op_mode))
627 rtx tem = gen_reg_rtx (op_mode);
628 emit_move_insn (tem, xop0);
629 xop0 = tem;
630 copy_back = true;
633 /* There are similar overflow check at the start of store_bit_field_1,
634 but that only check the situation where the field lies completely
635 outside the register, while there do have situation where the field
636 lies partialy in the register, we need to adjust bitsize for this
637 partial overflow situation. Without this fix, pr48335-2.c on big-endian
638 will broken on those arch support bit insert instruction, like arm, aarch64
639 etc. */
640 if (bitsize + bitnum > unit && bitnum < unit)
642 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
643 "destination object, data truncated into %wu-bit",
644 bitsize, unit - bitnum);
645 bitsize = unit - bitnum;
648 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
649 "backwards" from the size of the unit we are inserting into.
650 Otherwise, we count bits from the most significant on a
651 BYTES/BITS_BIG_ENDIAN machine. */
653 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
654 bitnum = unit - bitsize - bitnum;
656 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
657 value1 = value;
658 if (GET_MODE (value) != op_mode)
660 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
662 rtx tmp;
663 /* Optimization: Don't bother really extending VALUE
664 if it has all the bits we will actually use. However,
665 if we must narrow it, be sure we do it correctly. */
667 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
669 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
670 if (! tmp)
671 tmp = simplify_gen_subreg (op_mode,
672 force_reg (GET_MODE (value),
673 value1),
674 GET_MODE (value), 0);
676 else
678 tmp = gen_lowpart_if_possible (op_mode, value1);
679 if (! tmp)
680 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
681 value1));
683 value1 = tmp;
685 else if (CONST_INT_P (value))
686 value1 = gen_int_mode (INTVAL (value), op_mode);
687 else
688 /* Parse phase is supposed to make VALUE's data type
689 match that of the component reference, which is a type
690 at least as wide as the field; so VALUE should have
691 a mode that corresponds to that type. */
692 gcc_assert (CONSTANT_P (value));
695 create_fixed_operand (&ops[0], xop0);
696 create_integer_operand (&ops[1], bitsize);
697 create_integer_operand (&ops[2], bitnum);
698 create_input_operand (&ops[3], value1, op_mode);
699 if (maybe_expand_insn (insv->icode, 4, ops))
701 if (copy_back)
702 convert_move (op0, xop0, true);
703 return true;
705 delete_insns_since (last);
706 return false;
709 /* A subroutine of store_bit_field, with the same arguments. Return true
710 if the operation could be implemented.
712 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
713 no other way of implementing the operation. If FALLBACK_P is false,
714 return false instead. */
716 static bool
717 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
718 unsigned HOST_WIDE_INT bitnum,
719 unsigned HOST_WIDE_INT bitregion_start,
720 unsigned HOST_WIDE_INT bitregion_end,
721 machine_mode fieldmode,
722 rtx value, bool reverse, bool fallback_p)
724 rtx op0 = str_rtx;
725 rtx orig_value;
727 while (GET_CODE (op0) == SUBREG)
729 /* The following line once was done only if WORDS_BIG_ENDIAN,
730 but I think that is a mistake. WORDS_BIG_ENDIAN is
731 meaningful at a much higher level; when structures are copied
732 between memory and regs, the higher-numbered regs
733 always get higher addresses. */
734 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
735 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
736 int byte_offset = 0;
738 /* Paradoxical subregs need special handling on big-endian machines. */
739 if (paradoxical_subreg_p (op0))
741 int difference = inner_mode_size - outer_mode_size;
743 if (WORDS_BIG_ENDIAN)
744 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
745 if (BYTES_BIG_ENDIAN)
746 byte_offset += difference % UNITS_PER_WORD;
748 else
749 byte_offset = SUBREG_BYTE (op0);
751 bitnum += byte_offset * BITS_PER_UNIT;
752 op0 = SUBREG_REG (op0);
755 /* No action is needed if the target is a register and if the field
756 lies completely outside that register. This can occur if the source
757 code contains an out-of-bounds access to a small array. */
758 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
759 return true;
761 /* Use vec_set patterns for inserting parts of vectors whenever
762 available. */
763 if (VECTOR_MODE_P (GET_MODE (op0))
764 && !MEM_P (op0)
765 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
766 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
767 && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
768 && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
770 struct expand_operand ops[3];
771 machine_mode outermode = GET_MODE (op0);
772 machine_mode innermode = GET_MODE_INNER (outermode);
773 enum insn_code icode = optab_handler (vec_set_optab, outermode);
774 int pos = bitnum / GET_MODE_BITSIZE (innermode);
776 create_fixed_operand (&ops[0], op0);
777 create_input_operand (&ops[1], value, innermode);
778 create_integer_operand (&ops[2], pos);
779 if (maybe_expand_insn (icode, 3, ops))
780 return true;
783 /* If the target is a register, overwriting the entire object, or storing
784 a full-word or multi-word field can be done with just a SUBREG. */
785 if (!MEM_P (op0)
786 && bitsize == GET_MODE_BITSIZE (fieldmode)
787 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
788 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
790 /* Use the subreg machinery either to narrow OP0 to the required
791 words or to cope with mode punning between equal-sized modes.
792 In the latter case, use subreg on the rhs side, not lhs. */
793 rtx sub;
795 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
797 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
798 if (sub)
800 if (reverse)
801 sub = flip_storage_order (GET_MODE (op0), sub);
802 emit_move_insn (op0, sub);
803 return true;
806 else
808 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
809 bitnum / BITS_PER_UNIT);
810 if (sub)
812 if (reverse)
813 value = flip_storage_order (fieldmode, value);
814 emit_move_insn (sub, value);
815 return true;
820 /* If the target is memory, storing any naturally aligned field can be
821 done with a simple store. For targets that support fast unaligned
822 memory, any naturally sized, unit aligned field can be done directly. */
823 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
825 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
826 if (reverse)
827 value = flip_storage_order (fieldmode, value);
828 emit_move_insn (op0, value);
829 return true;
832 /* Make sure we are playing with integral modes. Pun with subregs
833 if we aren't. This must come after the entire register case above,
834 since that case is valid for any mode. The following cases are only
835 valid for integral modes. */
837 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
838 if (imode != GET_MODE (op0))
840 if (MEM_P (op0))
841 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
842 else
844 gcc_assert (imode != BLKmode);
845 op0 = gen_lowpart (imode, op0);
850 /* Storing an lsb-aligned field in a register
851 can be done with a movstrict instruction. */
853 if (!MEM_P (op0)
854 && !reverse
855 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
856 && bitsize == GET_MODE_BITSIZE (fieldmode)
857 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
859 struct expand_operand ops[2];
860 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
861 rtx arg0 = op0;
862 unsigned HOST_WIDE_INT subreg_off;
864 if (GET_CODE (arg0) == SUBREG)
866 /* Else we've got some float mode source being extracted into
867 a different float mode destination -- this combination of
868 subregs results in Severe Tire Damage. */
869 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
870 || GET_MODE_CLASS (fieldmode) == MODE_INT
871 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
872 arg0 = SUBREG_REG (arg0);
875 subreg_off = bitnum / BITS_PER_UNIT;
876 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
878 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
880 create_fixed_operand (&ops[0], arg0);
881 /* Shrink the source operand to FIELDMODE. */
882 create_convert_operand_to (&ops[1], value, fieldmode, false);
883 if (maybe_expand_insn (icode, 2, ops))
884 return true;
888 /* Handle fields bigger than a word. */
890 if (bitsize > BITS_PER_WORD)
892 /* Here we transfer the words of the field
893 in the order least significant first.
894 This is because the most significant word is the one which may
895 be less than full.
896 However, only do that if the value is not BLKmode. */
898 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
899 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
900 unsigned int i;
901 rtx_insn *last;
903 /* This is the mode we must force value to, so that there will be enough
904 subwords to extract. Note that fieldmode will often (always?) be
905 VOIDmode, because that is what store_field uses to indicate that this
906 is a bit field, but passing VOIDmode to operand_subword_force
907 is not allowed. */
908 fieldmode = GET_MODE (value);
909 if (fieldmode == VOIDmode)
910 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
912 last = get_last_insn ();
913 for (i = 0; i < nwords; i++)
915 /* If I is 0, use the low-order word in both field and target;
916 if I is 1, use the next to lowest word; and so on. */
917 unsigned int wordnum = (backwards
918 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
919 - i - 1
920 : i);
921 unsigned int bit_offset = (backwards ^ reverse
922 ? MAX ((int) bitsize - ((int) i + 1)
923 * BITS_PER_WORD,
925 : (int) i * BITS_PER_WORD);
926 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
927 unsigned HOST_WIDE_INT new_bitsize =
928 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
930 /* If the remaining chunk doesn't have full wordsize we have
931 to make sure that for big-endian machines the higher order
932 bits are used. */
933 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
934 value_word = simplify_expand_binop (word_mode, lshr_optab,
935 value_word,
936 GEN_INT (BITS_PER_WORD
937 - new_bitsize),
938 NULL_RTX, true,
939 OPTAB_LIB_WIDEN);
941 if (!store_bit_field_1 (op0, new_bitsize,
942 bitnum + bit_offset,
943 bitregion_start, bitregion_end,
944 word_mode,
945 value_word, reverse, fallback_p))
947 delete_insns_since (last);
948 return false;
951 return true;
954 /* If VALUE has a floating-point or complex mode, access it as an
955 integer of the corresponding size. This can occur on a machine
956 with 64 bit registers that uses SFmode for float. It can also
957 occur for unaligned float or complex fields. */
958 orig_value = value;
959 if (GET_MODE (value) != VOIDmode
960 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
961 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
963 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
964 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
967 /* If OP0 is a multi-word register, narrow it to the affected word.
968 If the region spans two words, defer to store_split_bit_field.
969 Don't do this if op0 is a single hard register wider than word
970 such as a float or vector register. */
971 if (!MEM_P (op0)
972 && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD
973 && (!REG_P (op0)
974 || !HARD_REGISTER_P (op0)
975 || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1))
977 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
979 if (!fallback_p)
980 return false;
982 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
983 bitregion_end, value, reverse);
984 return true;
986 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
987 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
988 gcc_assert (op0);
989 bitnum %= BITS_PER_WORD;
992 /* From here on we can assume that the field to be stored in fits
993 within a word. If the destination is a register, it too fits
994 in a word. */
996 extraction_insn insv;
997 if (!MEM_P (op0)
998 && !reverse
999 && get_best_reg_extraction_insn (&insv, EP_insv,
1000 GET_MODE_BITSIZE (GET_MODE (op0)),
1001 fieldmode)
1002 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1003 return true;
1005 /* If OP0 is a memory, try copying it to a register and seeing if a
1006 cheap register alternative is available. */
1007 if (MEM_P (op0) && !reverse)
1009 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1010 fieldmode)
1011 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1012 return true;
1014 rtx_insn *last = get_last_insn ();
1016 /* Try loading part of OP0 into a register, inserting the bitfield
1017 into that, and then copying the result back to OP0. */
1018 unsigned HOST_WIDE_INT bitpos;
1019 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1020 bitregion_start, bitregion_end,
1021 fieldmode, &bitpos);
1022 if (xop0)
1024 rtx tempreg = copy_to_reg (xop0);
1025 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1026 bitregion_start, bitregion_end,
1027 fieldmode, orig_value, reverse, false))
1029 emit_move_insn (xop0, tempreg);
1030 return true;
1032 delete_insns_since (last);
1036 if (!fallback_p)
1037 return false;
1039 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1040 bitregion_end, value, reverse);
1041 return true;
1044 /* Generate code to store value from rtx VALUE
1045 into a bit-field within structure STR_RTX
1046 containing BITSIZE bits starting at bit BITNUM.
1048 BITREGION_START is bitpos of the first bitfield in this region.
1049 BITREGION_END is the bitpos of the ending bitfield in this region.
1050 These two fields are 0, if the C++ memory model does not apply,
1051 or we are not interested in keeping track of bitfield regions.
1053 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1055 If REVERSE is true, the store is to be done in reverse order. */
1057 void
1058 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1059 unsigned HOST_WIDE_INT bitnum,
1060 unsigned HOST_WIDE_INT bitregion_start,
1061 unsigned HOST_WIDE_INT bitregion_end,
1062 machine_mode fieldmode,
1063 rtx value, bool reverse)
1065 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1066 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1067 bitregion_start, bitregion_end))
1069 /* Storing of a full word can be done with a simple store.
1070 We know here that the field can be accessed with one single
1071 instruction. For targets that support unaligned memory,
1072 an unaligned access may be necessary. */
1073 if (bitsize == GET_MODE_BITSIZE (fieldmode))
1075 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1076 bitnum / BITS_PER_UNIT);
1077 if (reverse)
1078 value = flip_storage_order (fieldmode, value);
1079 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1080 emit_move_insn (str_rtx, value);
1082 else
1084 rtx temp;
1086 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1087 &bitnum);
1088 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1089 temp = copy_to_reg (str_rtx);
1090 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1091 fieldmode, value, reverse, true))
1092 gcc_unreachable ();
1094 emit_move_insn (str_rtx, temp);
1097 return;
1100 /* Under the C++0x memory model, we must not touch bits outside the
1101 bit region. Adjust the address to start at the beginning of the
1102 bit region. */
1103 if (MEM_P (str_rtx) && bitregion_start > 0)
1105 machine_mode bestmode;
1106 HOST_WIDE_INT offset, size;
1108 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1110 offset = bitregion_start / BITS_PER_UNIT;
1111 bitnum -= bitregion_start;
1112 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1113 bitregion_end -= bitregion_start;
1114 bitregion_start = 0;
1115 bestmode = get_best_mode (bitsize, bitnum,
1116 bitregion_start, bitregion_end,
1117 MEM_ALIGN (str_rtx), VOIDmode,
1118 MEM_VOLATILE_P (str_rtx));
1119 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1122 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1123 bitregion_start, bitregion_end,
1124 fieldmode, value, reverse, true))
1125 gcc_unreachable ();
1128 /* Use shifts and boolean operations to store VALUE into a bit field of
1129 width BITSIZE in OP0, starting at bit BITNUM.
1131 If REVERSE is true, the store is to be done in reverse order. */
1133 static void
1134 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1135 unsigned HOST_WIDE_INT bitnum,
1136 unsigned HOST_WIDE_INT bitregion_start,
1137 unsigned HOST_WIDE_INT bitregion_end,
1138 rtx value, bool reverse)
1140 /* There is a case not handled here:
1141 a structure with a known alignment of just a halfword
1142 and a field split across two aligned halfwords within the structure.
1143 Or likewise a structure with a known alignment of just a byte
1144 and a field split across two bytes.
1145 Such cases are not supposed to be able to occur. */
1147 if (MEM_P (op0))
1149 machine_mode mode = GET_MODE (op0);
1150 if (GET_MODE_BITSIZE (mode) == 0
1151 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1152 mode = word_mode;
1153 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1154 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1156 if (mode == VOIDmode)
1158 /* The only way this should occur is if the field spans word
1159 boundaries. */
1160 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1161 bitregion_end, value, reverse);
1162 return;
1165 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1168 store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1171 /* Helper function for store_fixed_bit_field, stores
1172 the bit field always using the MODE of OP0. */
1174 static void
1175 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1176 unsigned HOST_WIDE_INT bitnum,
1177 rtx value, bool reverse)
1179 machine_mode mode;
1180 rtx temp;
1181 int all_zero = 0;
1182 int all_one = 0;
1184 mode = GET_MODE (op0);
1185 gcc_assert (SCALAR_INT_MODE_P (mode));
1187 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1188 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1190 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1191 /* BITNUM is the distance between our msb
1192 and that of the containing datum.
1193 Convert it to the distance from the lsb. */
1194 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1196 /* Now BITNUM is always the distance between our lsb
1197 and that of OP0. */
1199 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1200 we must first convert its mode to MODE. */
1202 if (CONST_INT_P (value))
1204 unsigned HOST_WIDE_INT v = UINTVAL (value);
1206 if (bitsize < HOST_BITS_PER_WIDE_INT)
1207 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1209 if (v == 0)
1210 all_zero = 1;
1211 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1212 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1213 || (bitsize == HOST_BITS_PER_WIDE_INT
1214 && v == HOST_WIDE_INT_M1U))
1215 all_one = 1;
1217 value = lshift_value (mode, v, bitnum);
1219 else
1221 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1222 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1224 if (GET_MODE (value) != mode)
1225 value = convert_to_mode (mode, value, 1);
1227 if (must_and)
1228 value = expand_binop (mode, and_optab, value,
1229 mask_rtx (mode, 0, bitsize, 0),
1230 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1231 if (bitnum > 0)
1232 value = expand_shift (LSHIFT_EXPR, mode, value,
1233 bitnum, NULL_RTX, 1);
1236 if (reverse)
1237 value = flip_storage_order (mode, value);
1239 /* Now clear the chosen bits in OP0,
1240 except that if VALUE is -1 we need not bother. */
1241 /* We keep the intermediates in registers to allow CSE to combine
1242 consecutive bitfield assignments. */
1244 temp = force_reg (mode, op0);
1246 if (! all_one)
1248 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1249 if (reverse)
1250 mask = flip_storage_order (mode, mask);
1251 temp = expand_binop (mode, and_optab, temp, mask,
1252 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1253 temp = force_reg (mode, temp);
1256 /* Now logical-or VALUE into OP0, unless it is zero. */
1258 if (! all_zero)
1260 temp = expand_binop (mode, ior_optab, temp, value,
1261 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1262 temp = force_reg (mode, temp);
1265 if (op0 != temp)
1267 op0 = copy_rtx (op0);
1268 emit_move_insn (op0, temp);
1272 /* Store a bit field that is split across multiple accessible memory objects.
1274 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1275 BITSIZE is the field width; BITPOS the position of its first bit
1276 (within the word).
1277 VALUE is the value to store.
1279 If REVERSE is true, the store is to be done in reverse order.
1281 This does not yet handle fields wider than BITS_PER_WORD. */
1283 static void
1284 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1285 unsigned HOST_WIDE_INT bitpos,
1286 unsigned HOST_WIDE_INT bitregion_start,
1287 unsigned HOST_WIDE_INT bitregion_end,
1288 rtx value, bool reverse)
1290 unsigned int unit, total_bits, bitsdone = 0;
1292 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1293 much at a time. */
1294 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1295 unit = BITS_PER_WORD;
1296 else
1297 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1299 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1300 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1301 again, and we will mutually recurse forever. */
1302 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1303 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1305 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1306 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1307 that VALUE might be a floating-point constant. */
1308 if (CONSTANT_P (value) && !CONST_INT_P (value))
1310 rtx word = gen_lowpart_common (word_mode, value);
1312 if (word && (value != word))
1313 value = word;
1314 else
1315 value = gen_lowpart_common (word_mode,
1316 force_reg (GET_MODE (value) != VOIDmode
1317 ? GET_MODE (value)
1318 : word_mode, value));
1321 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1323 while (bitsdone < bitsize)
1325 unsigned HOST_WIDE_INT thissize;
1326 unsigned HOST_WIDE_INT thispos;
1327 unsigned HOST_WIDE_INT offset;
1328 rtx part, word;
1330 offset = (bitpos + bitsdone) / unit;
1331 thispos = (bitpos + bitsdone) % unit;
1333 /* When region of bytes we can touch is restricted, decrease
1334 UNIT close to the end of the region as needed. If op0 is a REG
1335 or SUBREG of REG, don't do this, as there can't be data races
1336 on a register and we can expand shorter code in some cases. */
1337 if (bitregion_end
1338 && unit > BITS_PER_UNIT
1339 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1340 && !REG_P (op0)
1341 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1343 unit = unit / 2;
1344 continue;
1347 /* THISSIZE must not overrun a word boundary. Otherwise,
1348 store_fixed_bit_field will call us again, and we will mutually
1349 recurse forever. */
1350 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1351 thissize = MIN (thissize, unit - thispos);
1353 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1355 /* Fetch successively less significant portions. */
1356 if (CONST_INT_P (value))
1357 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1358 >> (bitsize - bitsdone - thissize))
1359 & ((HOST_WIDE_INT_1 << thissize) - 1));
1360 /* Likewise, but the source is little-endian. */
1361 else if (reverse)
1362 part = extract_fixed_bit_field (word_mode, value, thissize,
1363 bitsize - bitsdone - thissize,
1364 NULL_RTX, 1, false);
1365 else
1367 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1368 /* The args are chosen so that the last part includes the
1369 lsb. Give extract_bit_field the value it needs (with
1370 endianness compensation) to fetch the piece we want. */
1371 part = extract_fixed_bit_field (word_mode, value, thissize,
1372 total_bits - bitsize + bitsdone,
1373 NULL_RTX, 1, false);
1376 else
1378 /* Fetch successively more significant portions. */
1379 if (CONST_INT_P (value))
1380 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1381 >> bitsdone)
1382 & ((HOST_WIDE_INT_1 << thissize) - 1));
1383 /* Likewise, but the source is big-endian. */
1384 else if (reverse)
1385 part = extract_fixed_bit_field (word_mode, value, thissize,
1386 total_bits - bitsdone - thissize,
1387 NULL_RTX, 1, false);
1388 else
1389 part = extract_fixed_bit_field (word_mode, value, thissize,
1390 bitsdone, NULL_RTX, 1, false);
1393 /* If OP0 is a register, then handle OFFSET here. */
1394 if (SUBREG_P (op0) || REG_P (op0))
1396 machine_mode op0_mode = GET_MODE (op0);
1397 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1398 word = offset ? const0_rtx : op0;
1399 else
1400 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1401 GET_MODE (op0));
1402 offset &= BITS_PER_WORD / unit - 1;
1404 else
1405 word = op0;
1407 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1408 it is just an out-of-bounds access. Ignore it. */
1409 if (word != const0_rtx)
1410 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1411 bitregion_start, bitregion_end, part,
1412 reverse);
1413 bitsdone += thissize;
1417 /* A subroutine of extract_bit_field_1 that converts return value X
1418 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1419 to extract_bit_field. */
1421 static rtx
1422 convert_extracted_bit_field (rtx x, machine_mode mode,
1423 machine_mode tmode, bool unsignedp)
1425 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1426 return x;
1428 /* If the x mode is not a scalar integral, first convert to the
1429 integer mode of that size and then access it as a floating-point
1430 value via a SUBREG. */
1431 if (!SCALAR_INT_MODE_P (tmode))
1433 machine_mode smode;
1435 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1436 x = convert_to_mode (smode, x, unsignedp);
1437 x = force_reg (smode, x);
1438 return gen_lowpart (tmode, x);
1441 return convert_to_mode (tmode, x, unsignedp);
1444 /* Try to use an ext(z)v pattern to extract a field from OP0.
1445 Return the extracted value on success, otherwise return null.
1446 EXT_MODE is the mode of the extraction and the other arguments
1447 are as for extract_bit_field. */
1449 static rtx
1450 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1451 unsigned HOST_WIDE_INT bitsize,
1452 unsigned HOST_WIDE_INT bitnum,
1453 int unsignedp, rtx target,
1454 machine_mode mode, machine_mode tmode)
1456 struct expand_operand ops[4];
1457 rtx spec_target = target;
1458 rtx spec_target_subreg = 0;
1459 machine_mode ext_mode = extv->field_mode;
1460 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1462 if (bitsize == 0 || unit < bitsize)
1463 return NULL_RTX;
1465 if (MEM_P (op0))
1466 /* Get a reference to the first byte of the field. */
1467 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1468 &bitnum);
1469 else
1471 /* Convert from counting within OP0 to counting in EXT_MODE. */
1472 if (BYTES_BIG_ENDIAN)
1473 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1475 /* If op0 is a register, we need it in EXT_MODE to make it
1476 acceptable to the format of ext(z)v. */
1477 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1478 return NULL_RTX;
1479 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1480 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1483 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1484 "backwards" from the size of the unit we are extracting from.
1485 Otherwise, we count bits from the most significant on a
1486 BYTES/BITS_BIG_ENDIAN machine. */
1488 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1489 bitnum = unit - bitsize - bitnum;
1491 if (target == 0)
1492 target = spec_target = gen_reg_rtx (tmode);
1494 if (GET_MODE (target) != ext_mode)
1496 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1497 between the mode of the extraction (word_mode) and the target
1498 mode. Instead, create a temporary and use convert_move to set
1499 the target. */
1500 if (REG_P (target)
1501 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1503 target = gen_lowpart (ext_mode, target);
1504 if (GET_MODE_PRECISION (ext_mode)
1505 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1506 spec_target_subreg = target;
1508 else
1509 target = gen_reg_rtx (ext_mode);
1512 create_output_operand (&ops[0], target, ext_mode);
1513 create_fixed_operand (&ops[1], op0);
1514 create_integer_operand (&ops[2], bitsize);
1515 create_integer_operand (&ops[3], bitnum);
1516 if (maybe_expand_insn (extv->icode, 4, ops))
1518 target = ops[0].value;
1519 if (target == spec_target)
1520 return target;
1521 if (target == spec_target_subreg)
1522 return spec_target;
1523 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1525 return NULL_RTX;
1528 /* A subroutine of extract_bit_field, with the same arguments.
1529 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1530 if we can find no other means of implementing the operation.
1531 if FALLBACK_P is false, return NULL instead. */
1533 static rtx
1534 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1535 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1536 machine_mode mode, machine_mode tmode,
1537 bool reverse, bool fallback_p, rtx *alt_rtl)
1539 rtx op0 = str_rtx;
1540 machine_mode int_mode;
1541 machine_mode mode1;
1543 if (tmode == VOIDmode)
1544 tmode = mode;
1546 while (GET_CODE (op0) == SUBREG)
1548 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1549 op0 = SUBREG_REG (op0);
1552 /* If we have an out-of-bounds access to a register, just return an
1553 uninitialized register of the required mode. This can occur if the
1554 source code contains an out-of-bounds access to a small array. */
1555 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1556 return gen_reg_rtx (tmode);
1558 if (REG_P (op0)
1559 && mode == GET_MODE (op0)
1560 && bitnum == 0
1561 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1563 if (reverse)
1564 op0 = flip_storage_order (mode, op0);
1565 /* We're trying to extract a full register from itself. */
1566 return op0;
1569 /* First try to check for vector from vector extractions. */
1570 if (VECTOR_MODE_P (GET_MODE (op0))
1571 && !MEM_P (op0)
1572 && VECTOR_MODE_P (tmode)
1573 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1575 machine_mode new_mode = GET_MODE (op0);
1576 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1578 new_mode = mode_for_vector (GET_MODE_INNER (tmode),
1579 GET_MODE_BITSIZE (GET_MODE (op0))
1580 / GET_MODE_UNIT_BITSIZE (tmode));
1581 if (!VECTOR_MODE_P (new_mode)
1582 || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1583 || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1584 || !targetm.vector_mode_supported_p (new_mode))
1585 new_mode = VOIDmode;
1587 if (new_mode != VOIDmode
1588 && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1589 != CODE_FOR_nothing)
1590 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1591 == bitnum / GET_MODE_BITSIZE (tmode)))
1593 struct expand_operand ops[3];
1594 machine_mode outermode = new_mode;
1595 machine_mode innermode = tmode;
1596 enum insn_code icode
1597 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1598 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1600 if (new_mode != GET_MODE (op0))
1601 op0 = gen_lowpart (new_mode, op0);
1602 create_output_operand (&ops[0], target, innermode);
1603 ops[0].target = 1;
1604 create_input_operand (&ops[1], op0, outermode);
1605 create_integer_operand (&ops[2], pos);
1606 if (maybe_expand_insn (icode, 3, ops))
1608 if (alt_rtl && ops[0].target)
1609 *alt_rtl = target;
1610 target = ops[0].value;
1611 if (GET_MODE (target) != mode)
1612 return gen_lowpart (tmode, target);
1613 return target;
1618 /* See if we can get a better vector mode before extracting. */
1619 if (VECTOR_MODE_P (GET_MODE (op0))
1620 && !MEM_P (op0)
1621 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1623 machine_mode new_mode;
1625 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1626 new_mode = MIN_MODE_VECTOR_FLOAT;
1627 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1628 new_mode = MIN_MODE_VECTOR_FRACT;
1629 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1630 new_mode = MIN_MODE_VECTOR_UFRACT;
1631 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1632 new_mode = MIN_MODE_VECTOR_ACCUM;
1633 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1634 new_mode = MIN_MODE_VECTOR_UACCUM;
1635 else
1636 new_mode = MIN_MODE_VECTOR_INT;
1638 FOR_EACH_MODE_FROM (new_mode, new_mode)
1639 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1640 && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1641 && targetm.vector_mode_supported_p (new_mode))
1642 break;
1643 if (new_mode != VOIDmode)
1644 op0 = gen_lowpart (new_mode, op0);
1647 /* Use vec_extract patterns for extracting parts of vectors whenever
1648 available. */
1649 if (VECTOR_MODE_P (GET_MODE (op0))
1650 && !MEM_P (op0)
1651 && (convert_optab_handler (vec_extract_optab, GET_MODE (op0),
1652 GET_MODE_INNER (GET_MODE (op0)))
1653 != CODE_FOR_nothing)
1654 && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1655 == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1657 struct expand_operand ops[3];
1658 machine_mode outermode = GET_MODE (op0);
1659 machine_mode innermode = GET_MODE_INNER (outermode);
1660 enum insn_code icode
1661 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1662 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1664 create_output_operand (&ops[0], target, innermode);
1665 ops[0].target = 1;
1666 create_input_operand (&ops[1], op0, outermode);
1667 create_integer_operand (&ops[2], pos);
1668 if (maybe_expand_insn (icode, 3, ops))
1670 if (alt_rtl && ops[0].target)
1671 *alt_rtl = target;
1672 target = ops[0].value;
1673 if (GET_MODE (target) != mode)
1674 return gen_lowpart (tmode, target);
1675 return target;
1679 /* Make sure we are playing with integral modes. Pun with subregs
1680 if we aren't. */
1682 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1683 if (imode != GET_MODE (op0))
1685 if (MEM_P (op0))
1686 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1687 else if (imode != BLKmode)
1689 op0 = gen_lowpart (imode, op0);
1691 /* If we got a SUBREG, force it into a register since we
1692 aren't going to be able to do another SUBREG on it. */
1693 if (GET_CODE (op0) == SUBREG)
1694 op0 = force_reg (imode, op0);
1696 else
1698 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1699 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1700 emit_move_insn (mem, op0);
1701 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1706 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1707 If that's wrong, the solution is to test for it and set TARGET to 0
1708 if needed. */
1710 /* Get the mode of the field to use for atomic access or subreg
1711 conversion. */
1712 mode1 = mode;
1713 if (SCALAR_INT_MODE_P (tmode))
1715 machine_mode try_mode = mode_for_size (bitsize,
1716 GET_MODE_CLASS (tmode), 0);
1717 if (try_mode != BLKmode)
1718 mode1 = try_mode;
1720 gcc_assert (mode1 != BLKmode);
1722 /* Extraction of a full MODE1 value can be done with a subreg as long
1723 as the least significant bit of the value is the least significant
1724 bit of either OP0 or a word of OP0. */
1725 if (!MEM_P (op0)
1726 && !reverse
1727 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1728 && bitsize == GET_MODE_BITSIZE (mode1)
1729 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1731 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1732 bitnum / BITS_PER_UNIT);
1733 if (sub)
1734 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1737 /* Extraction of a full MODE1 value can be done with a load as long as
1738 the field is on a byte boundary and is sufficiently aligned. */
1739 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1741 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1742 if (reverse)
1743 op0 = flip_storage_order (mode1, op0);
1744 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1747 /* Handle fields bigger than a word. */
1749 if (bitsize > BITS_PER_WORD)
1751 /* Here we transfer the words of the field
1752 in the order least significant first.
1753 This is because the most significant word is the one which may
1754 be less than full. */
1756 const bool backwards = WORDS_BIG_ENDIAN;
1757 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1758 unsigned int i;
1759 rtx_insn *last;
1761 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1762 target = gen_reg_rtx (mode);
1764 /* In case we're about to clobber a base register or something
1765 (see gcc.c-torture/execute/20040625-1.c). */
1766 if (reg_mentioned_p (target, str_rtx))
1767 target = gen_reg_rtx (mode);
1769 /* Indicate for flow that the entire target reg is being set. */
1770 emit_clobber (target);
1772 last = get_last_insn ();
1773 for (i = 0; i < nwords; i++)
1775 /* If I is 0, use the low-order word in both field and target;
1776 if I is 1, use the next to lowest word; and so on. */
1777 /* Word number in TARGET to use. */
1778 unsigned int wordnum
1779 = (backwards
1780 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1781 : i);
1782 /* Offset from start of field in OP0. */
1783 unsigned int bit_offset = (backwards ^ reverse
1784 ? MAX ((int) bitsize - ((int) i + 1)
1785 * BITS_PER_WORD,
1787 : (int) i * BITS_PER_WORD);
1788 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1789 rtx result_part
1790 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1791 bitsize - i * BITS_PER_WORD),
1792 bitnum + bit_offset, 1, target_part,
1793 mode, word_mode, reverse, fallback_p, NULL);
1795 gcc_assert (target_part);
1796 if (!result_part)
1798 delete_insns_since (last);
1799 return NULL;
1802 if (result_part != target_part)
1803 emit_move_insn (target_part, result_part);
1806 if (unsignedp)
1808 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1809 need to be zero'd out. */
1810 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1812 unsigned int i, total_words;
1814 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1815 for (i = nwords; i < total_words; i++)
1816 emit_move_insn
1817 (operand_subword (target,
1818 backwards ? total_words - i - 1 : i,
1819 1, VOIDmode),
1820 const0_rtx);
1822 return target;
1825 /* Signed bit field: sign-extend with two arithmetic shifts. */
1826 target = expand_shift (LSHIFT_EXPR, mode, target,
1827 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1828 return expand_shift (RSHIFT_EXPR, mode, target,
1829 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1832 /* If OP0 is a multi-word register, narrow it to the affected word.
1833 If the region spans two words, defer to extract_split_bit_field. */
1834 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1836 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1838 if (!fallback_p)
1839 return NULL_RTX;
1840 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1841 reverse);
1842 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1844 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1845 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1846 bitnum %= BITS_PER_WORD;
1849 /* From here on we know the desired field is smaller than a word.
1850 If OP0 is a register, it too fits within a word. */
1851 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1852 extraction_insn extv;
1853 if (!MEM_P (op0)
1854 && !reverse
1855 /* ??? We could limit the structure size to the part of OP0 that
1856 contains the field, with appropriate checks for endianness
1857 and TRULY_NOOP_TRUNCATION. */
1858 && get_best_reg_extraction_insn (&extv, pattern,
1859 GET_MODE_BITSIZE (GET_MODE (op0)),
1860 tmode))
1862 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1863 unsignedp, target, mode,
1864 tmode);
1865 if (result)
1866 return result;
1869 /* If OP0 is a memory, try copying it to a register and seeing if a
1870 cheap register alternative is available. */
1871 if (MEM_P (op0) & !reverse)
1873 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1874 tmode))
1876 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1877 bitnum, unsignedp,
1878 target, mode,
1879 tmode);
1880 if (result)
1881 return result;
1884 rtx_insn *last = get_last_insn ();
1886 /* Try loading part of OP0 into a register and extracting the
1887 bitfield from that. */
1888 unsigned HOST_WIDE_INT bitpos;
1889 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1890 0, 0, tmode, &bitpos);
1891 if (xop0)
1893 xop0 = copy_to_reg (xop0);
1894 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1895 unsignedp, target,
1896 mode, tmode, reverse, false, NULL);
1897 if (result)
1898 return result;
1899 delete_insns_since (last);
1903 if (!fallback_p)
1904 return NULL;
1906 /* Find a correspondingly-sized integer field, so we can apply
1907 shifts and masks to it. */
1908 int_mode = int_mode_for_mode (tmode);
1909 if (int_mode == BLKmode)
1910 int_mode = int_mode_for_mode (mode);
1911 /* Should probably push op0 out to memory and then do a load. */
1912 gcc_assert (int_mode != BLKmode);
1914 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1915 unsignedp, reverse);
1917 /* Complex values must be reversed piecewise, so we need to undo the global
1918 reversal, convert to the complex mode and reverse again. */
1919 if (reverse && COMPLEX_MODE_P (tmode))
1921 target = flip_storage_order (int_mode, target);
1922 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1923 target = flip_storage_order (tmode, target);
1925 else
1926 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1928 return target;
1931 /* Generate code to extract a byte-field from STR_RTX
1932 containing BITSIZE bits, starting at BITNUM,
1933 and put it in TARGET if possible (if TARGET is nonzero).
1934 Regardless of TARGET, we return the rtx for where the value is placed.
1936 STR_RTX is the structure containing the byte (a REG or MEM).
1937 UNSIGNEDP is nonzero if this is an unsigned bit field.
1938 MODE is the natural mode of the field value once extracted.
1939 TMODE is the mode the caller would like the value to have;
1940 but the value may be returned with type MODE instead.
1942 If REVERSE is true, the extraction is to be done in reverse order.
1944 If a TARGET is specified and we can store in it at no extra cost,
1945 we do so, and return TARGET.
1946 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1947 if they are equally easy. */
1950 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1951 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1952 machine_mode mode, machine_mode tmode, bool reverse,
1953 rtx *alt_rtl)
1955 machine_mode mode1;
1957 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1958 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1959 mode1 = GET_MODE (str_rtx);
1960 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1961 mode1 = GET_MODE (target);
1962 else
1963 mode1 = tmode;
1965 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1967 /* Extraction of a full MODE1 value can be done with a simple load.
1968 We know here that the field can be accessed with one single
1969 instruction. For targets that support unaligned memory,
1970 an unaligned access may be necessary. */
1971 if (bitsize == GET_MODE_BITSIZE (mode1))
1973 rtx result = adjust_bitfield_address (str_rtx, mode1,
1974 bitnum / BITS_PER_UNIT);
1975 if (reverse)
1976 result = flip_storage_order (mode1, result);
1977 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1978 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1981 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1982 &bitnum);
1983 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1984 str_rtx = copy_to_reg (str_rtx);
1987 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1988 target, mode, tmode, reverse, true, alt_rtl);
1991 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1992 from bit BITNUM of OP0.
1994 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1995 If REVERSE is true, the extraction is to be done in reverse order.
1997 If TARGET is nonzero, attempts to store the value there
1998 and return TARGET, but this is not guaranteed.
1999 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2001 static rtx
2002 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2003 unsigned HOST_WIDE_INT bitsize,
2004 unsigned HOST_WIDE_INT bitnum, rtx target,
2005 int unsignedp, bool reverse)
2007 if (MEM_P (op0))
2009 machine_mode mode
2010 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
2011 MEM_VOLATILE_P (op0));
2013 if (mode == VOIDmode)
2014 /* The only way this should occur is if the field spans word
2015 boundaries. */
2016 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
2017 reverse);
2019 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2022 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
2023 target, unsignedp, reverse);
2026 /* Helper function for extract_fixed_bit_field, extracts
2027 the bit field always using the MODE of OP0. */
2029 static rtx
2030 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
2031 unsigned HOST_WIDE_INT bitsize,
2032 unsigned HOST_WIDE_INT bitnum, rtx target,
2033 int unsignedp, bool reverse)
2035 machine_mode mode = GET_MODE (op0);
2036 gcc_assert (SCALAR_INT_MODE_P (mode));
2038 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2039 for invalid input, such as extract equivalent of f5 from
2040 gcc.dg/pr48335-2.c. */
2042 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2043 /* BITNUM is the distance between our msb and that of OP0.
2044 Convert it to the distance from the lsb. */
2045 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2047 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2048 We have reduced the big-endian case to the little-endian case. */
2049 if (reverse)
2050 op0 = flip_storage_order (mode, op0);
2052 if (unsignedp)
2054 if (bitnum)
2056 /* If the field does not already start at the lsb,
2057 shift it so it does. */
2058 /* Maybe propagate the target for the shift. */
2059 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2060 if (tmode != mode)
2061 subtarget = 0;
2062 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2064 /* Convert the value to the desired mode. */
2065 if (mode != tmode)
2066 op0 = convert_to_mode (tmode, op0, 1);
2068 /* Unless the msb of the field used to be the msb when we shifted,
2069 mask out the upper bits. */
2071 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2072 return expand_binop (GET_MODE (op0), and_optab, op0,
2073 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2074 target, 1, OPTAB_LIB_WIDEN);
2075 return op0;
2078 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2079 then arithmetic-shift its lsb to the lsb of the word. */
2080 op0 = force_reg (mode, op0);
2082 /* Find the narrowest integer mode that contains the field. */
2084 FOR_EACH_MODE_IN_CLASS (mode, MODE_INT)
2085 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2087 op0 = convert_to_mode (mode, op0, 0);
2088 break;
2091 if (mode != tmode)
2092 target = 0;
2094 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2096 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2097 /* Maybe propagate the target for the shift. */
2098 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2099 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2102 return expand_shift (RSHIFT_EXPR, mode, op0,
2103 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2106 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2107 VALUE << BITPOS. */
2109 static rtx
2110 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2111 int bitpos)
2113 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2116 /* Extract a bit field that is split across two words
2117 and return an RTX for the result.
2119 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2120 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2121 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2123 If REVERSE is true, the extraction is to be done in reverse order. */
2125 static rtx
2126 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2127 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2128 bool reverse)
2130 unsigned int unit;
2131 unsigned int bitsdone = 0;
2132 rtx result = NULL_RTX;
2133 int first = 1;
2135 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2136 much at a time. */
2137 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2138 unit = BITS_PER_WORD;
2139 else
2140 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2142 while (bitsdone < bitsize)
2144 unsigned HOST_WIDE_INT thissize;
2145 rtx part, word;
2146 unsigned HOST_WIDE_INT thispos;
2147 unsigned HOST_WIDE_INT offset;
2149 offset = (bitpos + bitsdone) / unit;
2150 thispos = (bitpos + bitsdone) % unit;
2152 /* THISSIZE must not overrun a word boundary. Otherwise,
2153 extract_fixed_bit_field will call us again, and we will mutually
2154 recurse forever. */
2155 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2156 thissize = MIN (thissize, unit - thispos);
2158 /* If OP0 is a register, then handle OFFSET here. */
2159 if (SUBREG_P (op0) || REG_P (op0))
2161 word = operand_subword_force (op0, offset, GET_MODE (op0));
2162 offset = 0;
2164 else
2165 word = op0;
2167 /* Extract the parts in bit-counting order,
2168 whose meaning is determined by BYTES_PER_UNIT.
2169 OFFSET is in UNITs, and UNIT is in bits. */
2170 part = extract_fixed_bit_field (word_mode, word, thissize,
2171 offset * unit + thispos, 0, 1, reverse);
2172 bitsdone += thissize;
2174 /* Shift this part into place for the result. */
2175 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2177 if (bitsize != bitsdone)
2178 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2179 bitsize - bitsdone, 0, 1);
2181 else
2183 if (bitsdone != thissize)
2184 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2185 bitsdone - thissize, 0, 1);
2188 if (first)
2189 result = part;
2190 else
2191 /* Combine the parts with bitwise or. This works
2192 because we extracted each part as an unsigned bit field. */
2193 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2194 OPTAB_LIB_WIDEN);
2196 first = 0;
2199 /* Unsigned bit field: we are done. */
2200 if (unsignedp)
2201 return result;
2202 /* Signed bit field: sign-extend with two arithmetic shifts. */
2203 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2204 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2205 return expand_shift (RSHIFT_EXPR, word_mode, result,
2206 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2209 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2210 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2211 MODE, fill the upper bits with zeros. Fail if the layout of either
2212 mode is unknown (as for CC modes) or if the extraction would involve
2213 unprofitable mode punning. Return the value on success, otherwise
2214 return null.
2216 This is different from gen_lowpart* in these respects:
2218 - the returned value must always be considered an rvalue
2220 - when MODE is wider than SRC_MODE, the extraction involves
2221 a zero extension
2223 - when MODE is smaller than SRC_MODE, the extraction involves
2224 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2226 In other words, this routine performs a computation, whereas the
2227 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2228 operations. */
2231 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2233 machine_mode int_mode, src_int_mode;
2235 if (mode == src_mode)
2236 return src;
2238 if (CONSTANT_P (src))
2240 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2241 fails, it will happily create (subreg (symbol_ref)) or similar
2242 invalid SUBREGs. */
2243 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2244 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2245 if (ret)
2246 return ret;
2248 if (GET_MODE (src) == VOIDmode
2249 || !validate_subreg (mode, src_mode, src, byte))
2250 return NULL_RTX;
2252 src = force_reg (GET_MODE (src), src);
2253 return gen_rtx_SUBREG (mode, src, byte);
2256 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2257 return NULL_RTX;
2259 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2260 && MODES_TIEABLE_P (mode, src_mode))
2262 rtx x = gen_lowpart_common (mode, src);
2263 if (x)
2264 return x;
2267 src_int_mode = int_mode_for_mode (src_mode);
2268 int_mode = int_mode_for_mode (mode);
2269 if (src_int_mode == BLKmode || int_mode == BLKmode)
2270 return NULL_RTX;
2272 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2273 return NULL_RTX;
2274 if (!MODES_TIEABLE_P (int_mode, mode))
2275 return NULL_RTX;
2277 src = gen_lowpart (src_int_mode, src);
2278 src = convert_modes (int_mode, src_int_mode, src, true);
2279 src = gen_lowpart (mode, src);
2280 return src;
2283 /* Add INC into TARGET. */
2285 void
2286 expand_inc (rtx target, rtx inc)
2288 rtx value = expand_binop (GET_MODE (target), add_optab,
2289 target, inc,
2290 target, 0, OPTAB_LIB_WIDEN);
2291 if (value != target)
2292 emit_move_insn (target, value);
2295 /* Subtract DEC from TARGET. */
2297 void
2298 expand_dec (rtx target, rtx dec)
2300 rtx value = expand_binop (GET_MODE (target), sub_optab,
2301 target, dec,
2302 target, 0, OPTAB_LIB_WIDEN);
2303 if (value != target)
2304 emit_move_insn (target, value);
2307 /* Output a shift instruction for expression code CODE,
2308 with SHIFTED being the rtx for the value to shift,
2309 and AMOUNT the rtx for the amount to shift by.
2310 Store the result in the rtx TARGET, if that is convenient.
2311 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2312 Return the rtx for where the value is.
2313 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2314 in which case 0 is returned. */
2316 static rtx
2317 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2318 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2320 rtx op1, temp = 0;
2321 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2322 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2323 optab lshift_optab = ashl_optab;
2324 optab rshift_arith_optab = ashr_optab;
2325 optab rshift_uns_optab = lshr_optab;
2326 optab lrotate_optab = rotl_optab;
2327 optab rrotate_optab = rotr_optab;
2328 machine_mode op1_mode;
2329 machine_mode scalar_mode = mode;
2330 int attempt;
2331 bool speed = optimize_insn_for_speed_p ();
2333 if (VECTOR_MODE_P (mode))
2334 scalar_mode = GET_MODE_INNER (mode);
2335 op1 = amount;
2336 op1_mode = GET_MODE (op1);
2338 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2339 shift amount is a vector, use the vector/vector shift patterns. */
2340 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2342 lshift_optab = vashl_optab;
2343 rshift_arith_optab = vashr_optab;
2344 rshift_uns_optab = vlshr_optab;
2345 lrotate_optab = vrotl_optab;
2346 rrotate_optab = vrotr_optab;
2349 /* Previously detected shift-counts computed by NEGATE_EXPR
2350 and shifted in the other direction; but that does not work
2351 on all machines. */
2353 if (SHIFT_COUNT_TRUNCATED)
2355 if (CONST_INT_P (op1)
2356 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2357 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2358 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2359 % GET_MODE_BITSIZE (scalar_mode));
2360 else if (GET_CODE (op1) == SUBREG
2361 && subreg_lowpart_p (op1)
2362 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2363 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2364 op1 = SUBREG_REG (op1);
2367 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2368 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2369 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2370 amount instead. */
2371 if (rotate
2372 && CONST_INT_P (op1)
2373 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2374 GET_MODE_BITSIZE (scalar_mode) - 1))
2376 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2377 left = !left;
2378 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2381 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2382 Note that this is not the case for bigger values. For instance a rotation
2383 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2384 0x04030201 (bswapsi). */
2385 if (rotate
2386 && CONST_INT_P (op1)
2387 && INTVAL (op1) == BITS_PER_UNIT
2388 && GET_MODE_SIZE (scalar_mode) == 2
2389 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2390 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2391 unsignedp);
2393 if (op1 == const0_rtx)
2394 return shifted;
2396 /* Check whether its cheaper to implement a left shift by a constant
2397 bit count by a sequence of additions. */
2398 if (code == LSHIFT_EXPR
2399 && CONST_INT_P (op1)
2400 && INTVAL (op1) > 0
2401 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2402 && INTVAL (op1) < MAX_BITS_PER_WORD
2403 && (shift_cost (speed, mode, INTVAL (op1))
2404 > INTVAL (op1) * add_cost (speed, mode))
2405 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2407 int i;
2408 for (i = 0; i < INTVAL (op1); i++)
2410 temp = force_reg (mode, shifted);
2411 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2412 unsignedp, OPTAB_LIB_WIDEN);
2414 return shifted;
2417 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2419 enum optab_methods methods;
2421 if (attempt == 0)
2422 methods = OPTAB_DIRECT;
2423 else if (attempt == 1)
2424 methods = OPTAB_WIDEN;
2425 else
2426 methods = OPTAB_LIB_WIDEN;
2428 if (rotate)
2430 /* Widening does not work for rotation. */
2431 if (methods == OPTAB_WIDEN)
2432 continue;
2433 else if (methods == OPTAB_LIB_WIDEN)
2435 /* If we have been unable to open-code this by a rotation,
2436 do it as the IOR of two shifts. I.e., to rotate A
2437 by N bits, compute
2438 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2439 where C is the bitsize of A.
2441 It is theoretically possible that the target machine might
2442 not be able to perform either shift and hence we would
2443 be making two libcalls rather than just the one for the
2444 shift (similarly if IOR could not be done). We will allow
2445 this extremely unlikely lossage to avoid complicating the
2446 code below. */
2448 rtx subtarget = target == shifted ? 0 : target;
2449 rtx new_amount, other_amount;
2450 rtx temp1;
2452 new_amount = op1;
2453 if (op1 == const0_rtx)
2454 return shifted;
2455 else if (CONST_INT_P (op1))
2456 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2457 - INTVAL (op1));
2458 else
2460 other_amount
2461 = simplify_gen_unary (NEG, GET_MODE (op1),
2462 op1, GET_MODE (op1));
2463 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2464 other_amount
2465 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2466 gen_int_mode (mask, GET_MODE (op1)));
2469 shifted = force_reg (mode, shifted);
2471 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2472 mode, shifted, new_amount, 0, 1);
2473 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2474 mode, shifted, other_amount,
2475 subtarget, 1);
2476 return expand_binop (mode, ior_optab, temp, temp1, target,
2477 unsignedp, methods);
2480 temp = expand_binop (mode,
2481 left ? lrotate_optab : rrotate_optab,
2482 shifted, op1, target, unsignedp, methods);
2484 else if (unsignedp)
2485 temp = expand_binop (mode,
2486 left ? lshift_optab : rshift_uns_optab,
2487 shifted, op1, target, unsignedp, methods);
2489 /* Do arithmetic shifts.
2490 Also, if we are going to widen the operand, we can just as well
2491 use an arithmetic right-shift instead of a logical one. */
2492 if (temp == 0 && ! rotate
2493 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2495 enum optab_methods methods1 = methods;
2497 /* If trying to widen a log shift to an arithmetic shift,
2498 don't accept an arithmetic shift of the same size. */
2499 if (unsignedp)
2500 methods1 = OPTAB_MUST_WIDEN;
2502 /* Arithmetic shift */
2504 temp = expand_binop (mode,
2505 left ? lshift_optab : rshift_arith_optab,
2506 shifted, op1, target, unsignedp, methods1);
2509 /* We used to try extzv here for logical right shifts, but that was
2510 only useful for one machine, the VAX, and caused poor code
2511 generation there for lshrdi3, so the code was deleted and a
2512 define_expand for lshrsi3 was added to vax.md. */
2515 gcc_assert (temp != NULL_RTX || may_fail);
2516 return temp;
2519 /* Output a shift instruction for expression code CODE,
2520 with SHIFTED being the rtx for the value to shift,
2521 and AMOUNT the amount to shift by.
2522 Store the result in the rtx TARGET, if that is convenient.
2523 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2524 Return the rtx for where the value is. */
2527 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2528 int amount, rtx target, int unsignedp)
2530 return expand_shift_1 (code, mode,
2531 shifted, GEN_INT (amount), target, unsignedp);
2534 /* Likewise, but return 0 if that cannot be done. */
2536 static rtx
2537 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2538 int amount, rtx target, int unsignedp)
2540 return expand_shift_1 (code, mode,
2541 shifted, GEN_INT (amount), target, unsignedp, true);
2544 /* Output a shift instruction for expression code CODE,
2545 with SHIFTED being the rtx for the value to shift,
2546 and AMOUNT the tree for the amount to shift by.
2547 Store the result in the rtx TARGET, if that is convenient.
2548 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2549 Return the rtx for where the value is. */
2552 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2553 tree amount, rtx target, int unsignedp)
2555 return expand_shift_1 (code, mode,
2556 shifted, expand_normal (amount), target, unsignedp);
2560 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2561 const struct mult_cost *, machine_mode mode);
2562 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2563 const struct algorithm *, enum mult_variant);
2564 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2565 static rtx extract_high_half (machine_mode, rtx);
2566 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2567 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2568 int, int);
2569 /* Compute and return the best algorithm for multiplying by T.
2570 The algorithm must cost less than cost_limit
2571 If retval.cost >= COST_LIMIT, no algorithm was found and all
2572 other field of the returned struct are undefined.
2573 MODE is the machine mode of the multiplication. */
2575 static void
2576 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2577 const struct mult_cost *cost_limit, machine_mode mode)
2579 int m;
2580 struct algorithm *alg_in, *best_alg;
2581 struct mult_cost best_cost;
2582 struct mult_cost new_limit;
2583 int op_cost, op_latency;
2584 unsigned HOST_WIDE_INT orig_t = t;
2585 unsigned HOST_WIDE_INT q;
2586 int maxm, hash_index;
2587 bool cache_hit = false;
2588 enum alg_code cache_alg = alg_zero;
2589 bool speed = optimize_insn_for_speed_p ();
2590 machine_mode imode;
2591 struct alg_hash_entry *entry_ptr;
2593 /* Indicate that no algorithm is yet found. If no algorithm
2594 is found, this value will be returned and indicate failure. */
2595 alg_out->cost.cost = cost_limit->cost + 1;
2596 alg_out->cost.latency = cost_limit->latency + 1;
2598 if (cost_limit->cost < 0
2599 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2600 return;
2602 /* Be prepared for vector modes. */
2603 imode = GET_MODE_INNER (mode);
2605 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2607 /* Restrict the bits of "t" to the multiplication's mode. */
2608 t &= GET_MODE_MASK (imode);
2610 /* t == 1 can be done in zero cost. */
2611 if (t == 1)
2613 alg_out->ops = 1;
2614 alg_out->cost.cost = 0;
2615 alg_out->cost.latency = 0;
2616 alg_out->op[0] = alg_m;
2617 return;
2620 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2621 fail now. */
2622 if (t == 0)
2624 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2625 return;
2626 else
2628 alg_out->ops = 1;
2629 alg_out->cost.cost = zero_cost (speed);
2630 alg_out->cost.latency = zero_cost (speed);
2631 alg_out->op[0] = alg_zero;
2632 return;
2636 /* We'll be needing a couple extra algorithm structures now. */
2638 alg_in = XALLOCA (struct algorithm);
2639 best_alg = XALLOCA (struct algorithm);
2640 best_cost = *cost_limit;
2642 /* Compute the hash index. */
2643 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2645 /* See if we already know what to do for T. */
2646 entry_ptr = alg_hash_entry_ptr (hash_index);
2647 if (entry_ptr->t == t
2648 && entry_ptr->mode == mode
2649 && entry_ptr->speed == speed
2650 && entry_ptr->alg != alg_unknown)
2652 cache_alg = entry_ptr->alg;
2654 if (cache_alg == alg_impossible)
2656 /* The cache tells us that it's impossible to synthesize
2657 multiplication by T within entry_ptr->cost. */
2658 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2659 /* COST_LIMIT is at least as restrictive as the one
2660 recorded in the hash table, in which case we have no
2661 hope of synthesizing a multiplication. Just
2662 return. */
2663 return;
2665 /* If we get here, COST_LIMIT is less restrictive than the
2666 one recorded in the hash table, so we may be able to
2667 synthesize a multiplication. Proceed as if we didn't
2668 have the cache entry. */
2670 else
2672 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2673 /* The cached algorithm shows that this multiplication
2674 requires more cost than COST_LIMIT. Just return. This
2675 way, we don't clobber this cache entry with
2676 alg_impossible but retain useful information. */
2677 return;
2679 cache_hit = true;
2681 switch (cache_alg)
2683 case alg_shift:
2684 goto do_alg_shift;
2686 case alg_add_t_m2:
2687 case alg_sub_t_m2:
2688 goto do_alg_addsub_t_m2;
2690 case alg_add_factor:
2691 case alg_sub_factor:
2692 goto do_alg_addsub_factor;
2694 case alg_add_t2_m:
2695 goto do_alg_add_t2_m;
2697 case alg_sub_t2_m:
2698 goto do_alg_sub_t2_m;
2700 default:
2701 gcc_unreachable ();
2706 /* If we have a group of zero bits at the low-order part of T, try
2707 multiplying by the remaining bits and then doing a shift. */
2709 if ((t & 1) == 0)
2711 do_alg_shift:
2712 m = ctz_or_zero (t); /* m = number of low zero bits */
2713 if (m < maxm)
2715 q = t >> m;
2716 /* The function expand_shift will choose between a shift and
2717 a sequence of additions, so the observed cost is given as
2718 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2719 op_cost = m * add_cost (speed, mode);
2720 if (shift_cost (speed, mode, m) < op_cost)
2721 op_cost = shift_cost (speed, mode, m);
2722 new_limit.cost = best_cost.cost - op_cost;
2723 new_limit.latency = best_cost.latency - op_cost;
2724 synth_mult (alg_in, q, &new_limit, mode);
2726 alg_in->cost.cost += op_cost;
2727 alg_in->cost.latency += op_cost;
2728 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2730 best_cost = alg_in->cost;
2731 std::swap (alg_in, best_alg);
2732 best_alg->log[best_alg->ops] = m;
2733 best_alg->op[best_alg->ops] = alg_shift;
2736 /* See if treating ORIG_T as a signed number yields a better
2737 sequence. Try this sequence only for a negative ORIG_T
2738 as it would be useless for a non-negative ORIG_T. */
2739 if ((HOST_WIDE_INT) orig_t < 0)
2741 /* Shift ORIG_T as follows because a right shift of a
2742 negative-valued signed type is implementation
2743 defined. */
2744 q = ~(~orig_t >> m);
2745 /* The function expand_shift will choose between a shift
2746 and a sequence of additions, so the observed cost is
2747 given as MIN (m * add_cost(speed, mode),
2748 shift_cost(speed, mode, m)). */
2749 op_cost = m * add_cost (speed, mode);
2750 if (shift_cost (speed, mode, m) < op_cost)
2751 op_cost = shift_cost (speed, mode, m);
2752 new_limit.cost = best_cost.cost - op_cost;
2753 new_limit.latency = best_cost.latency - op_cost;
2754 synth_mult (alg_in, q, &new_limit, mode);
2756 alg_in->cost.cost += op_cost;
2757 alg_in->cost.latency += op_cost;
2758 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2760 best_cost = alg_in->cost;
2761 std::swap (alg_in, best_alg);
2762 best_alg->log[best_alg->ops] = m;
2763 best_alg->op[best_alg->ops] = alg_shift;
2767 if (cache_hit)
2768 goto done;
2771 /* If we have an odd number, add or subtract one. */
2772 if ((t & 1) != 0)
2774 unsigned HOST_WIDE_INT w;
2776 do_alg_addsub_t_m2:
2777 for (w = 1; (w & t) != 0; w <<= 1)
2779 /* If T was -1, then W will be zero after the loop. This is another
2780 case where T ends with ...111. Handling this with (T + 1) and
2781 subtract 1 produces slightly better code and results in algorithm
2782 selection much faster than treating it like the ...0111 case
2783 below. */
2784 if (w == 0
2785 || (w > 2
2786 /* Reject the case where t is 3.
2787 Thus we prefer addition in that case. */
2788 && t != 3))
2790 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2792 op_cost = add_cost (speed, mode);
2793 new_limit.cost = best_cost.cost - op_cost;
2794 new_limit.latency = best_cost.latency - op_cost;
2795 synth_mult (alg_in, t + 1, &new_limit, mode);
2797 alg_in->cost.cost += op_cost;
2798 alg_in->cost.latency += op_cost;
2799 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2801 best_cost = alg_in->cost;
2802 std::swap (alg_in, best_alg);
2803 best_alg->log[best_alg->ops] = 0;
2804 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2807 else
2809 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2811 op_cost = add_cost (speed, mode);
2812 new_limit.cost = best_cost.cost - op_cost;
2813 new_limit.latency = best_cost.latency - op_cost;
2814 synth_mult (alg_in, t - 1, &new_limit, mode);
2816 alg_in->cost.cost += op_cost;
2817 alg_in->cost.latency += op_cost;
2818 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2820 best_cost = alg_in->cost;
2821 std::swap (alg_in, best_alg);
2822 best_alg->log[best_alg->ops] = 0;
2823 best_alg->op[best_alg->ops] = alg_add_t_m2;
2827 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2828 quickly with a - a * n for some appropriate constant n. */
2829 m = exact_log2 (-orig_t + 1);
2830 if (m >= 0 && m < maxm)
2832 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2833 /* If the target has a cheap shift-and-subtract insn use
2834 that in preference to a shift insn followed by a sub insn.
2835 Assume that the shift-and-sub is "atomic" with a latency
2836 equal to it's cost, otherwise assume that on superscalar
2837 hardware the shift may be executed concurrently with the
2838 earlier steps in the algorithm. */
2839 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2841 op_cost = shiftsub1_cost (speed, mode, m);
2842 op_latency = op_cost;
2844 else
2845 op_latency = add_cost (speed, mode);
2847 new_limit.cost = best_cost.cost - op_cost;
2848 new_limit.latency = best_cost.latency - op_latency;
2849 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2850 &new_limit, mode);
2852 alg_in->cost.cost += op_cost;
2853 alg_in->cost.latency += op_latency;
2854 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2856 best_cost = alg_in->cost;
2857 std::swap (alg_in, best_alg);
2858 best_alg->log[best_alg->ops] = m;
2859 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2863 if (cache_hit)
2864 goto done;
2867 /* Look for factors of t of the form
2868 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2869 If we find such a factor, we can multiply by t using an algorithm that
2870 multiplies by q, shift the result by m and add/subtract it to itself.
2872 We search for large factors first and loop down, even if large factors
2873 are less probable than small; if we find a large factor we will find a
2874 good sequence quickly, and therefore be able to prune (by decreasing
2875 COST_LIMIT) the search. */
2877 do_alg_addsub_factor:
2878 for (m = floor_log2 (t - 1); m >= 2; m--)
2880 unsigned HOST_WIDE_INT d;
2882 d = (HOST_WIDE_INT_1U << m) + 1;
2883 if (t % d == 0 && t > d && m < maxm
2884 && (!cache_hit || cache_alg == alg_add_factor))
2886 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2887 if (shiftadd_cost (speed, mode, m) <= op_cost)
2888 op_cost = shiftadd_cost (speed, mode, m);
2890 op_latency = op_cost;
2893 new_limit.cost = best_cost.cost - op_cost;
2894 new_limit.latency = best_cost.latency - op_latency;
2895 synth_mult (alg_in, t / d, &new_limit, mode);
2897 alg_in->cost.cost += op_cost;
2898 alg_in->cost.latency += op_latency;
2899 if (alg_in->cost.latency < op_cost)
2900 alg_in->cost.latency = op_cost;
2901 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2903 best_cost = alg_in->cost;
2904 std::swap (alg_in, best_alg);
2905 best_alg->log[best_alg->ops] = m;
2906 best_alg->op[best_alg->ops] = alg_add_factor;
2908 /* Other factors will have been taken care of in the recursion. */
2909 break;
2912 d = (HOST_WIDE_INT_1U << m) - 1;
2913 if (t % d == 0 && t > d && m < maxm
2914 && (!cache_hit || cache_alg == alg_sub_factor))
2916 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2917 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2918 op_cost = shiftsub0_cost (speed, mode, m);
2920 op_latency = op_cost;
2922 new_limit.cost = best_cost.cost - op_cost;
2923 new_limit.latency = best_cost.latency - op_latency;
2924 synth_mult (alg_in, t / d, &new_limit, mode);
2926 alg_in->cost.cost += op_cost;
2927 alg_in->cost.latency += op_latency;
2928 if (alg_in->cost.latency < op_cost)
2929 alg_in->cost.latency = op_cost;
2930 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2932 best_cost = alg_in->cost;
2933 std::swap (alg_in, best_alg);
2934 best_alg->log[best_alg->ops] = m;
2935 best_alg->op[best_alg->ops] = alg_sub_factor;
2937 break;
2940 if (cache_hit)
2941 goto done;
2943 /* Try shift-and-add (load effective address) instructions,
2944 i.e. do a*3, a*5, a*9. */
2945 if ((t & 1) != 0)
2947 do_alg_add_t2_m:
2948 q = t - 1;
2949 m = ctz_hwi (q);
2950 if (q && m < maxm)
2952 op_cost = shiftadd_cost (speed, mode, m);
2953 new_limit.cost = best_cost.cost - op_cost;
2954 new_limit.latency = best_cost.latency - op_cost;
2955 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2957 alg_in->cost.cost += op_cost;
2958 alg_in->cost.latency += op_cost;
2959 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2961 best_cost = alg_in->cost;
2962 std::swap (alg_in, best_alg);
2963 best_alg->log[best_alg->ops] = m;
2964 best_alg->op[best_alg->ops] = alg_add_t2_m;
2967 if (cache_hit)
2968 goto done;
2970 do_alg_sub_t2_m:
2971 q = t + 1;
2972 m = ctz_hwi (q);
2973 if (q && m < maxm)
2975 op_cost = shiftsub0_cost (speed, mode, m);
2976 new_limit.cost = best_cost.cost - op_cost;
2977 new_limit.latency = best_cost.latency - op_cost;
2978 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2980 alg_in->cost.cost += op_cost;
2981 alg_in->cost.latency += op_cost;
2982 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2984 best_cost = alg_in->cost;
2985 std::swap (alg_in, best_alg);
2986 best_alg->log[best_alg->ops] = m;
2987 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2990 if (cache_hit)
2991 goto done;
2994 done:
2995 /* If best_cost has not decreased, we have not found any algorithm. */
2996 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2998 /* We failed to find an algorithm. Record alg_impossible for
2999 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3000 we are asked to find an algorithm for T within the same or
3001 lower COST_LIMIT, we can immediately return to the
3002 caller. */
3003 entry_ptr->t = t;
3004 entry_ptr->mode = mode;
3005 entry_ptr->speed = speed;
3006 entry_ptr->alg = alg_impossible;
3007 entry_ptr->cost = *cost_limit;
3008 return;
3011 /* Cache the result. */
3012 if (!cache_hit)
3014 entry_ptr->t = t;
3015 entry_ptr->mode = mode;
3016 entry_ptr->speed = speed;
3017 entry_ptr->alg = best_alg->op[best_alg->ops];
3018 entry_ptr->cost.cost = best_cost.cost;
3019 entry_ptr->cost.latency = best_cost.latency;
3022 /* If we are getting a too long sequence for `struct algorithm'
3023 to record, make this search fail. */
3024 if (best_alg->ops == MAX_BITS_PER_WORD)
3025 return;
3027 /* Copy the algorithm from temporary space to the space at alg_out.
3028 We avoid using structure assignment because the majority of
3029 best_alg is normally undefined, and this is a critical function. */
3030 alg_out->ops = best_alg->ops + 1;
3031 alg_out->cost = best_cost;
3032 memcpy (alg_out->op, best_alg->op,
3033 alg_out->ops * sizeof *alg_out->op);
3034 memcpy (alg_out->log, best_alg->log,
3035 alg_out->ops * sizeof *alg_out->log);
3038 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3039 Try three variations:
3041 - a shift/add sequence based on VAL itself
3042 - a shift/add sequence based on -VAL, followed by a negation
3043 - a shift/add sequence based on VAL - 1, followed by an addition.
3045 Return true if the cheapest of these cost less than MULT_COST,
3046 describing the algorithm in *ALG and final fixup in *VARIANT. */
3048 bool
3049 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3050 struct algorithm *alg, enum mult_variant *variant,
3051 int mult_cost)
3053 struct algorithm alg2;
3054 struct mult_cost limit;
3055 int op_cost;
3056 bool speed = optimize_insn_for_speed_p ();
3058 /* Fail quickly for impossible bounds. */
3059 if (mult_cost < 0)
3060 return false;
3062 /* Ensure that mult_cost provides a reasonable upper bound.
3063 Any constant multiplication can be performed with less
3064 than 2 * bits additions. */
3065 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3066 if (mult_cost > op_cost)
3067 mult_cost = op_cost;
3069 *variant = basic_variant;
3070 limit.cost = mult_cost;
3071 limit.latency = mult_cost;
3072 synth_mult (alg, val, &limit, mode);
3074 /* This works only if the inverted value actually fits in an
3075 `unsigned int' */
3076 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3078 op_cost = neg_cost (speed, mode);
3079 if (MULT_COST_LESS (&alg->cost, mult_cost))
3081 limit.cost = alg->cost.cost - op_cost;
3082 limit.latency = alg->cost.latency - op_cost;
3084 else
3086 limit.cost = mult_cost - op_cost;
3087 limit.latency = mult_cost - op_cost;
3090 synth_mult (&alg2, -val, &limit, mode);
3091 alg2.cost.cost += op_cost;
3092 alg2.cost.latency += op_cost;
3093 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3094 *alg = alg2, *variant = negate_variant;
3097 /* This proves very useful for division-by-constant. */
3098 op_cost = add_cost (speed, mode);
3099 if (MULT_COST_LESS (&alg->cost, mult_cost))
3101 limit.cost = alg->cost.cost - op_cost;
3102 limit.latency = alg->cost.latency - op_cost;
3104 else
3106 limit.cost = mult_cost - op_cost;
3107 limit.latency = mult_cost - op_cost;
3110 synth_mult (&alg2, val - 1, &limit, mode);
3111 alg2.cost.cost += op_cost;
3112 alg2.cost.latency += op_cost;
3113 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3114 *alg = alg2, *variant = add_variant;
3116 return MULT_COST_LESS (&alg->cost, mult_cost);
3119 /* A subroutine of expand_mult, used for constant multiplications.
3120 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3121 convenient. Use the shift/add sequence described by ALG and apply
3122 the final fixup specified by VARIANT. */
3124 static rtx
3125 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3126 rtx target, const struct algorithm *alg,
3127 enum mult_variant variant)
3129 unsigned HOST_WIDE_INT val_so_far;
3130 rtx_insn *insn;
3131 rtx accum, tem;
3132 int opno;
3133 machine_mode nmode;
3135 /* Avoid referencing memory over and over and invalid sharing
3136 on SUBREGs. */
3137 op0 = force_reg (mode, op0);
3139 /* ACCUM starts out either as OP0 or as a zero, depending on
3140 the first operation. */
3142 if (alg->op[0] == alg_zero)
3144 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3145 val_so_far = 0;
3147 else if (alg->op[0] == alg_m)
3149 accum = copy_to_mode_reg (mode, op0);
3150 val_so_far = 1;
3152 else
3153 gcc_unreachable ();
3155 for (opno = 1; opno < alg->ops; opno++)
3157 int log = alg->log[opno];
3158 rtx shift_subtarget = optimize ? 0 : accum;
3159 rtx add_target
3160 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3161 && !optimize)
3162 ? target : 0;
3163 rtx accum_target = optimize ? 0 : accum;
3164 rtx accum_inner;
3166 switch (alg->op[opno])
3168 case alg_shift:
3169 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3170 /* REG_EQUAL note will be attached to the following insn. */
3171 emit_move_insn (accum, tem);
3172 val_so_far <<= log;
3173 break;
3175 case alg_add_t_m2:
3176 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3177 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3178 add_target ? add_target : accum_target);
3179 val_so_far += HOST_WIDE_INT_1U << log;
3180 break;
3182 case alg_sub_t_m2:
3183 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3184 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3185 add_target ? add_target : accum_target);
3186 val_so_far -= HOST_WIDE_INT_1U << log;
3187 break;
3189 case alg_add_t2_m:
3190 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3191 log, shift_subtarget, 0);
3192 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3193 add_target ? add_target : accum_target);
3194 val_so_far = (val_so_far << log) + 1;
3195 break;
3197 case alg_sub_t2_m:
3198 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3199 log, shift_subtarget, 0);
3200 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3201 add_target ? add_target : accum_target);
3202 val_so_far = (val_so_far << log) - 1;
3203 break;
3205 case alg_add_factor:
3206 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3207 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3208 add_target ? add_target : accum_target);
3209 val_so_far += val_so_far << log;
3210 break;
3212 case alg_sub_factor:
3213 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3214 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3215 (add_target
3216 ? add_target : (optimize ? 0 : tem)));
3217 val_so_far = (val_so_far << log) - val_so_far;
3218 break;
3220 default:
3221 gcc_unreachable ();
3224 if (SCALAR_INT_MODE_P (mode))
3226 /* Write a REG_EQUAL note on the last insn so that we can cse
3227 multiplication sequences. Note that if ACCUM is a SUBREG,
3228 we've set the inner register and must properly indicate that. */
3229 tem = op0, nmode = mode;
3230 accum_inner = accum;
3231 if (GET_CODE (accum) == SUBREG)
3233 accum_inner = SUBREG_REG (accum);
3234 nmode = GET_MODE (accum_inner);
3235 tem = gen_lowpart (nmode, op0);
3238 insn = get_last_insn ();
3239 set_dst_reg_note (insn, REG_EQUAL,
3240 gen_rtx_MULT (nmode, tem,
3241 gen_int_mode (val_so_far, nmode)),
3242 accum_inner);
3246 if (variant == negate_variant)
3248 val_so_far = -val_so_far;
3249 accum = expand_unop (mode, neg_optab, accum, target, 0);
3251 else if (variant == add_variant)
3253 val_so_far = val_so_far + 1;
3254 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3257 /* Compare only the bits of val and val_so_far that are significant
3258 in the result mode, to avoid sign-/zero-extension confusion. */
3259 nmode = GET_MODE_INNER (mode);
3260 val &= GET_MODE_MASK (nmode);
3261 val_so_far &= GET_MODE_MASK (nmode);
3262 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3264 return accum;
3267 /* Perform a multiplication and return an rtx for the result.
3268 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3269 TARGET is a suggestion for where to store the result (an rtx).
3271 We check specially for a constant integer as OP1.
3272 If you want this check for OP0 as well, then before calling
3273 you should swap the two operands if OP0 would be constant. */
3276 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3277 int unsignedp)
3279 enum mult_variant variant;
3280 struct algorithm algorithm;
3281 rtx scalar_op1;
3282 int max_cost;
3283 bool speed = optimize_insn_for_speed_p ();
3284 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3286 if (CONSTANT_P (op0))
3287 std::swap (op0, op1);
3289 /* For vectors, there are several simplifications that can be made if
3290 all elements of the vector constant are identical. */
3291 scalar_op1 = unwrap_const_vec_duplicate (op1);
3293 if (INTEGRAL_MODE_P (mode))
3295 rtx fake_reg;
3296 HOST_WIDE_INT coeff;
3297 bool is_neg;
3298 int mode_bitsize;
3300 if (op1 == CONST0_RTX (mode))
3301 return op1;
3302 if (op1 == CONST1_RTX (mode))
3303 return op0;
3304 if (op1 == CONSTM1_RTX (mode))
3305 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3306 op0, target, 0);
3308 if (do_trapv)
3309 goto skip_synth;
3311 /* If mode is integer vector mode, check if the backend supports
3312 vector lshift (by scalar or vector) at all. If not, we can't use
3313 synthetized multiply. */
3314 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3315 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3316 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3317 goto skip_synth;
3319 /* These are the operations that are potentially turned into
3320 a sequence of shifts and additions. */
3321 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3323 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3324 less than or equal in size to `unsigned int' this doesn't matter.
3325 If the mode is larger than `unsigned int', then synth_mult works
3326 only if the constant value exactly fits in an `unsigned int' without
3327 any truncation. This means that multiplying by negative values does
3328 not work; results are off by 2^32 on a 32 bit machine. */
3329 if (CONST_INT_P (scalar_op1))
3331 coeff = INTVAL (scalar_op1);
3332 is_neg = coeff < 0;
3334 #if TARGET_SUPPORTS_WIDE_INT
3335 else if (CONST_WIDE_INT_P (scalar_op1))
3336 #else
3337 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3338 #endif
3340 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3341 /* Perfect power of 2 (other than 1, which is handled above). */
3342 if (shift > 0)
3343 return expand_shift (LSHIFT_EXPR, mode, op0,
3344 shift, target, unsignedp);
3345 else
3346 goto skip_synth;
3348 else
3349 goto skip_synth;
3351 /* We used to test optimize here, on the grounds that it's better to
3352 produce a smaller program when -O is not used. But this causes
3353 such a terrible slowdown sometimes that it seems better to always
3354 use synth_mult. */
3356 /* Special case powers of two. */
3357 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3358 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3359 return expand_shift (LSHIFT_EXPR, mode, op0,
3360 floor_log2 (coeff), target, unsignedp);
3362 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3364 /* Attempt to handle multiplication of DImode values by negative
3365 coefficients, by performing the multiplication by a positive
3366 multiplier and then inverting the result. */
3367 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3369 /* Its safe to use -coeff even for INT_MIN, as the
3370 result is interpreted as an unsigned coefficient.
3371 Exclude cost of op0 from max_cost to match the cost
3372 calculation of the synth_mult. */
3373 coeff = -(unsigned HOST_WIDE_INT) coeff;
3374 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3375 mode, speed)
3376 - neg_cost (speed, mode));
3377 if (max_cost <= 0)
3378 goto skip_synth;
3380 /* Special case powers of two. */
3381 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3383 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3384 floor_log2 (coeff), target, unsignedp);
3385 return expand_unop (mode, neg_optab, temp, target, 0);
3388 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3389 max_cost))
3391 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3392 &algorithm, variant);
3393 return expand_unop (mode, neg_optab, temp, target, 0);
3395 goto skip_synth;
3398 /* Exclude cost of op0 from max_cost to match the cost
3399 calculation of the synth_mult. */
3400 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3401 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3402 return expand_mult_const (mode, op0, coeff, target,
3403 &algorithm, variant);
3405 skip_synth:
3407 /* Expand x*2.0 as x+x. */
3408 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3409 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3411 op0 = force_reg (GET_MODE (op0), op0);
3412 return expand_binop (mode, add_optab, op0, op0,
3413 target, unsignedp, OPTAB_LIB_WIDEN);
3416 /* This used to use umul_optab if unsigned, but for non-widening multiply
3417 there is no difference between signed and unsigned. */
3418 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3419 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3420 gcc_assert (op0);
3421 return op0;
3424 /* Return a cost estimate for multiplying a register by the given
3425 COEFFicient in the given MODE and SPEED. */
3428 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3430 int max_cost;
3431 struct algorithm algorithm;
3432 enum mult_variant variant;
3434 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3435 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3436 mode, speed);
3437 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3438 return algorithm.cost.cost;
3439 else
3440 return max_cost;
3443 /* Perform a widening multiplication and return an rtx for the result.
3444 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3445 TARGET is a suggestion for where to store the result (an rtx).
3446 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3447 or smul_widen_optab.
3449 We check specially for a constant integer as OP1, comparing the
3450 cost of a widening multiply against the cost of a sequence of shifts
3451 and adds. */
3454 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3455 int unsignedp, optab this_optab)
3457 bool speed = optimize_insn_for_speed_p ();
3458 rtx cop1;
3460 if (CONST_INT_P (op1)
3461 && GET_MODE (op0) != VOIDmode
3462 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3463 this_optab == umul_widen_optab))
3464 && CONST_INT_P (cop1)
3465 && (INTVAL (cop1) >= 0
3466 || HWI_COMPUTABLE_MODE_P (mode)))
3468 HOST_WIDE_INT coeff = INTVAL (cop1);
3469 int max_cost;
3470 enum mult_variant variant;
3471 struct algorithm algorithm;
3473 if (coeff == 0)
3474 return CONST0_RTX (mode);
3476 /* Special case powers of two. */
3477 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3479 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3480 return expand_shift (LSHIFT_EXPR, mode, op0,
3481 floor_log2 (coeff), target, unsignedp);
3484 /* Exclude cost of op0 from max_cost to match the cost
3485 calculation of the synth_mult. */
3486 max_cost = mul_widen_cost (speed, mode);
3487 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3488 max_cost))
3490 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3491 return expand_mult_const (mode, op0, coeff, target,
3492 &algorithm, variant);
3495 return expand_binop (mode, this_optab, op0, op1, target,
3496 unsignedp, OPTAB_LIB_WIDEN);
3499 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3500 replace division by D, and put the least significant N bits of the result
3501 in *MULTIPLIER_PTR and return the most significant bit.
3503 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3504 needed precision is in PRECISION (should be <= N).
3506 PRECISION should be as small as possible so this function can choose
3507 multiplier more freely.
3509 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3510 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3512 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3513 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3515 unsigned HOST_WIDE_INT
3516 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3517 unsigned HOST_WIDE_INT *multiplier_ptr,
3518 int *post_shift_ptr, int *lgup_ptr)
3520 int lgup, post_shift;
3521 int pow, pow2;
3523 /* lgup = ceil(log2(divisor)); */
3524 lgup = ceil_log2 (d);
3526 gcc_assert (lgup <= n);
3528 pow = n + lgup;
3529 pow2 = n + lgup - precision;
3531 /* mlow = 2^(N + lgup)/d */
3532 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3533 wide_int mlow = wi::udiv_trunc (val, d);
3535 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3536 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3537 wide_int mhigh = wi::udiv_trunc (val, d);
3539 /* If precision == N, then mlow, mhigh exceed 2^N
3540 (but they do not exceed 2^(N+1)). */
3542 /* Reduce to lowest terms. */
3543 for (post_shift = lgup; post_shift > 0; post_shift--)
3545 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3546 HOST_BITS_PER_WIDE_INT);
3547 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3548 HOST_BITS_PER_WIDE_INT);
3549 if (ml_lo >= mh_lo)
3550 break;
3552 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3553 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3556 *post_shift_ptr = post_shift;
3557 *lgup_ptr = lgup;
3558 if (n < HOST_BITS_PER_WIDE_INT)
3560 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3561 *multiplier_ptr = mhigh.to_uhwi () & mask;
3562 return mhigh.to_uhwi () >= mask;
3564 else
3566 *multiplier_ptr = mhigh.to_uhwi ();
3567 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3571 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3572 congruent to 1 (mod 2**N). */
3574 static unsigned HOST_WIDE_INT
3575 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3577 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3579 /* The algorithm notes that the choice y = x satisfies
3580 x*y == 1 mod 2^3, since x is assumed odd.
3581 Each iteration doubles the number of bits of significance in y. */
3583 unsigned HOST_WIDE_INT mask;
3584 unsigned HOST_WIDE_INT y = x;
3585 int nbit = 3;
3587 mask = (n == HOST_BITS_PER_WIDE_INT
3588 ? HOST_WIDE_INT_M1U
3589 : (HOST_WIDE_INT_1U << n) - 1);
3591 while (nbit < n)
3593 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3594 nbit *= 2;
3596 return y;
3599 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3600 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3601 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3602 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3603 become signed.
3605 The result is put in TARGET if that is convenient.
3607 MODE is the mode of operation. */
3610 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3611 rtx op1, rtx target, int unsignedp)
3613 rtx tem;
3614 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3616 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3617 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3618 tem = expand_and (mode, tem, op1, NULL_RTX);
3619 adj_operand
3620 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3621 adj_operand);
3623 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3624 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3625 tem = expand_and (mode, tem, op0, NULL_RTX);
3626 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3627 target);
3629 return target;
3632 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3634 static rtx
3635 extract_high_half (machine_mode mode, rtx op)
3637 machine_mode wider_mode;
3639 if (mode == word_mode)
3640 return gen_highpart (mode, op);
3642 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3644 wider_mode = GET_MODE_WIDER_MODE (mode);
3645 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3646 GET_MODE_BITSIZE (mode), 0, 1);
3647 return convert_modes (mode, wider_mode, op, 0);
3650 /* Like expmed_mult_highpart, but only consider using a multiplication
3651 optab. OP1 is an rtx for the constant operand. */
3653 static rtx
3654 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3655 rtx target, int unsignedp, int max_cost)
3657 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3658 machine_mode wider_mode;
3659 optab moptab;
3660 rtx tem;
3661 int size;
3662 bool speed = optimize_insn_for_speed_p ();
3664 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3666 wider_mode = GET_MODE_WIDER_MODE (mode);
3667 size = GET_MODE_BITSIZE (mode);
3669 /* Firstly, try using a multiplication insn that only generates the needed
3670 high part of the product, and in the sign flavor of unsignedp. */
3671 if (mul_highpart_cost (speed, mode) < max_cost)
3673 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3674 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3675 unsignedp, OPTAB_DIRECT);
3676 if (tem)
3677 return tem;
3680 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3681 Need to adjust the result after the multiplication. */
3682 if (size - 1 < BITS_PER_WORD
3683 && (mul_highpart_cost (speed, mode)
3684 + 2 * shift_cost (speed, mode, size-1)
3685 + 4 * add_cost (speed, mode) < max_cost))
3687 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3688 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3689 unsignedp, OPTAB_DIRECT);
3690 if (tem)
3691 /* We used the wrong signedness. Adjust the result. */
3692 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3693 tem, unsignedp);
3696 /* Try widening multiplication. */
3697 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3698 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3699 && mul_widen_cost (speed, wider_mode) < max_cost)
3701 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3702 unsignedp, OPTAB_WIDEN);
3703 if (tem)
3704 return extract_high_half (mode, tem);
3707 /* Try widening the mode and perform a non-widening multiplication. */
3708 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3709 && size - 1 < BITS_PER_WORD
3710 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3711 < max_cost))
3713 rtx_insn *insns;
3714 rtx wop0, wop1;
3716 /* We need to widen the operands, for example to ensure the
3717 constant multiplier is correctly sign or zero extended.
3718 Use a sequence to clean-up any instructions emitted by
3719 the conversions if things don't work out. */
3720 start_sequence ();
3721 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3722 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3723 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3724 unsignedp, OPTAB_WIDEN);
3725 insns = get_insns ();
3726 end_sequence ();
3728 if (tem)
3730 emit_insn (insns);
3731 return extract_high_half (mode, tem);
3735 /* Try widening multiplication of opposite signedness, and adjust. */
3736 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3737 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3738 && size - 1 < BITS_PER_WORD
3739 && (mul_widen_cost (speed, wider_mode)
3740 + 2 * shift_cost (speed, mode, size-1)
3741 + 4 * add_cost (speed, mode) < max_cost))
3743 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3744 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3745 if (tem != 0)
3747 tem = extract_high_half (mode, tem);
3748 /* We used the wrong signedness. Adjust the result. */
3749 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3750 target, unsignedp);
3754 return 0;
3757 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3758 putting the high half of the result in TARGET if that is convenient,
3759 and return where the result is. If the operation can not be performed,
3760 0 is returned.
3762 MODE is the mode of operation and result.
3764 UNSIGNEDP nonzero means unsigned multiply.
3766 MAX_COST is the total allowed cost for the expanded RTL. */
3768 static rtx
3769 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3770 rtx target, int unsignedp, int max_cost)
3772 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3773 unsigned HOST_WIDE_INT cnst1;
3774 int extra_cost;
3775 bool sign_adjust = false;
3776 enum mult_variant variant;
3777 struct algorithm alg;
3778 rtx tem;
3779 bool speed = optimize_insn_for_speed_p ();
3781 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3782 /* We can't support modes wider than HOST_BITS_PER_INT. */
3783 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3785 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3787 /* We can't optimize modes wider than BITS_PER_WORD.
3788 ??? We might be able to perform double-word arithmetic if
3789 mode == word_mode, however all the cost calculations in
3790 synth_mult etc. assume single-word operations. */
3791 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3792 return expmed_mult_highpart_optab (mode, op0, op1, target,
3793 unsignedp, max_cost);
3795 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3797 /* Check whether we try to multiply by a negative constant. */
3798 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3800 sign_adjust = true;
3801 extra_cost += add_cost (speed, mode);
3804 /* See whether shift/add multiplication is cheap enough. */
3805 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3806 max_cost - extra_cost))
3808 /* See whether the specialized multiplication optabs are
3809 cheaper than the shift/add version. */
3810 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3811 alg.cost.cost + extra_cost);
3812 if (tem)
3813 return tem;
3815 tem = convert_to_mode (wider_mode, op0, unsignedp);
3816 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3817 tem = extract_high_half (mode, tem);
3819 /* Adjust result for signedness. */
3820 if (sign_adjust)
3821 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3823 return tem;
3825 return expmed_mult_highpart_optab (mode, op0, op1, target,
3826 unsignedp, max_cost);
3830 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3832 static rtx
3833 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3835 rtx result, temp, shift;
3836 rtx_code_label *label;
3837 int logd;
3838 int prec = GET_MODE_PRECISION (mode);
3840 logd = floor_log2 (d);
3841 result = gen_reg_rtx (mode);
3843 /* Avoid conditional branches when they're expensive. */
3844 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3845 && optimize_insn_for_speed_p ())
3847 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3848 mode, 0, -1);
3849 if (signmask)
3851 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3852 signmask = force_reg (mode, signmask);
3853 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3855 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3856 which instruction sequence to use. If logical right shifts
3857 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3858 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3860 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3861 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3862 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3863 > COSTS_N_INSNS (2)))
3865 temp = expand_binop (mode, xor_optab, op0, signmask,
3866 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3867 temp = expand_binop (mode, sub_optab, temp, signmask,
3868 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3869 temp = expand_binop (mode, and_optab, temp,
3870 gen_int_mode (masklow, mode),
3871 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3872 temp = expand_binop (mode, xor_optab, temp, signmask,
3873 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3874 temp = expand_binop (mode, sub_optab, temp, signmask,
3875 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3877 else
3879 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3880 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3881 signmask = force_reg (mode, signmask);
3883 temp = expand_binop (mode, add_optab, op0, signmask,
3884 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3885 temp = expand_binop (mode, and_optab, temp,
3886 gen_int_mode (masklow, mode),
3887 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3888 temp = expand_binop (mode, sub_optab, temp, signmask,
3889 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3891 return temp;
3895 /* Mask contains the mode's signbit and the significant bits of the
3896 modulus. By including the signbit in the operation, many targets
3897 can avoid an explicit compare operation in the following comparison
3898 against zero. */
3899 wide_int mask = wi::mask (logd, false, prec);
3900 mask = wi::set_bit (mask, prec - 1);
3902 temp = expand_binop (mode, and_optab, op0,
3903 immed_wide_int_const (mask, mode),
3904 result, 1, OPTAB_LIB_WIDEN);
3905 if (temp != result)
3906 emit_move_insn (result, temp);
3908 label = gen_label_rtx ();
3909 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3911 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3912 0, OPTAB_LIB_WIDEN);
3914 mask = wi::mask (logd, true, prec);
3915 temp = expand_binop (mode, ior_optab, temp,
3916 immed_wide_int_const (mask, mode),
3917 result, 1, OPTAB_LIB_WIDEN);
3918 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3919 0, OPTAB_LIB_WIDEN);
3920 if (temp != result)
3921 emit_move_insn (result, temp);
3922 emit_label (label);
3923 return result;
3926 /* Expand signed division of OP0 by a power of two D in mode MODE.
3927 This routine is only called for positive values of D. */
3929 static rtx
3930 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3932 rtx temp;
3933 rtx_code_label *label;
3934 int logd;
3936 logd = floor_log2 (d);
3938 if (d == 2
3939 && BRANCH_COST (optimize_insn_for_speed_p (),
3940 false) >= 1)
3942 temp = gen_reg_rtx (mode);
3943 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3944 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3945 0, OPTAB_LIB_WIDEN);
3946 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3949 if (HAVE_conditional_move
3950 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3952 rtx temp2;
3954 start_sequence ();
3955 temp2 = copy_to_mode_reg (mode, op0);
3956 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3957 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3958 temp = force_reg (mode, temp);
3960 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3961 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3962 mode, temp, temp2, mode, 0);
3963 if (temp2)
3965 rtx_insn *seq = get_insns ();
3966 end_sequence ();
3967 emit_insn (seq);
3968 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3970 end_sequence ();
3973 if (BRANCH_COST (optimize_insn_for_speed_p (),
3974 false) >= 2)
3976 int ushift = GET_MODE_BITSIZE (mode) - logd;
3978 temp = gen_reg_rtx (mode);
3979 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3980 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3981 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3982 > COSTS_N_INSNS (1))
3983 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3984 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3985 else
3986 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3987 ushift, NULL_RTX, 1);
3988 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3989 0, OPTAB_LIB_WIDEN);
3990 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3993 label = gen_label_rtx ();
3994 temp = copy_to_mode_reg (mode, op0);
3995 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3996 expand_inc (temp, gen_int_mode (d - 1, mode));
3997 emit_label (label);
3998 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4001 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4002 if that is convenient, and returning where the result is.
4003 You may request either the quotient or the remainder as the result;
4004 specify REM_FLAG nonzero to get the remainder.
4006 CODE is the expression code for which kind of division this is;
4007 it controls how rounding is done. MODE is the machine mode to use.
4008 UNSIGNEDP nonzero means do unsigned division. */
4010 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4011 and then correct it by or'ing in missing high bits
4012 if result of ANDI is nonzero.
4013 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4014 This could optimize to a bfexts instruction.
4015 But C doesn't use these operations, so their optimizations are
4016 left for later. */
4017 /* ??? For modulo, we don't actually need the highpart of the first product,
4018 the low part will do nicely. And for small divisors, the second multiply
4019 can also be a low-part only multiply or even be completely left out.
4020 E.g. to calculate the remainder of a division by 3 with a 32 bit
4021 multiply, multiply with 0x55555556 and extract the upper two bits;
4022 the result is exact for inputs up to 0x1fffffff.
4023 The input range can be reduced by using cross-sum rules.
4024 For odd divisors >= 3, the following table gives right shift counts
4025 so that if a number is shifted by an integer multiple of the given
4026 amount, the remainder stays the same:
4027 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4028 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4029 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4030 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4031 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4033 Cross-sum rules for even numbers can be derived by leaving as many bits
4034 to the right alone as the divisor has zeros to the right.
4035 E.g. if x is an unsigned 32 bit number:
4036 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4040 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4041 rtx op0, rtx op1, rtx target, int unsignedp)
4043 machine_mode compute_mode;
4044 rtx tquotient;
4045 rtx quotient = 0, remainder = 0;
4046 rtx_insn *last;
4047 int size;
4048 rtx_insn *insn;
4049 optab optab1, optab2;
4050 int op1_is_constant, op1_is_pow2 = 0;
4051 int max_cost, extra_cost;
4052 static HOST_WIDE_INT last_div_const = 0;
4053 bool speed = optimize_insn_for_speed_p ();
4055 op1_is_constant = CONST_INT_P (op1);
4056 if (op1_is_constant)
4058 wide_int ext_op1 = rtx_mode_t (op1, mode);
4059 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4060 || (! unsignedp
4061 && wi::popcount (wi::neg (ext_op1)) == 1));
4065 This is the structure of expand_divmod:
4067 First comes code to fix up the operands so we can perform the operations
4068 correctly and efficiently.
4070 Second comes a switch statement with code specific for each rounding mode.
4071 For some special operands this code emits all RTL for the desired
4072 operation, for other cases, it generates only a quotient and stores it in
4073 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4074 to indicate that it has not done anything.
4076 Last comes code that finishes the operation. If QUOTIENT is set and
4077 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4078 QUOTIENT is not set, it is computed using trunc rounding.
4080 We try to generate special code for division and remainder when OP1 is a
4081 constant. If |OP1| = 2**n we can use shifts and some other fast
4082 operations. For other values of OP1, we compute a carefully selected
4083 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4084 by m.
4086 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4087 half of the product. Different strategies for generating the product are
4088 implemented in expmed_mult_highpart.
4090 If what we actually want is the remainder, we generate that by another
4091 by-constant multiplication and a subtraction. */
4093 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4094 code below will malfunction if we are, so check here and handle
4095 the special case if so. */
4096 if (op1 == const1_rtx)
4097 return rem_flag ? const0_rtx : op0;
4099 /* When dividing by -1, we could get an overflow.
4100 negv_optab can handle overflows. */
4101 if (! unsignedp && op1 == constm1_rtx)
4103 if (rem_flag)
4104 return const0_rtx;
4105 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4106 ? negv_optab : neg_optab, op0, target, 0);
4109 if (target
4110 /* Don't use the function value register as a target
4111 since we have to read it as well as write it,
4112 and function-inlining gets confused by this. */
4113 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4114 /* Don't clobber an operand while doing a multi-step calculation. */
4115 || ((rem_flag || op1_is_constant)
4116 && (reg_mentioned_p (target, op0)
4117 || (MEM_P (op0) && MEM_P (target))))
4118 || reg_mentioned_p (target, op1)
4119 || (MEM_P (op1) && MEM_P (target))))
4120 target = 0;
4122 /* Get the mode in which to perform this computation. Normally it will
4123 be MODE, but sometimes we can't do the desired operation in MODE.
4124 If so, pick a wider mode in which we can do the operation. Convert
4125 to that mode at the start to avoid repeated conversions.
4127 First see what operations we need. These depend on the expression
4128 we are evaluating. (We assume that divxx3 insns exist under the
4129 same conditions that modxx3 insns and that these insns don't normally
4130 fail. If these assumptions are not correct, we may generate less
4131 efficient code in some cases.)
4133 Then see if we find a mode in which we can open-code that operation
4134 (either a division, modulus, or shift). Finally, check for the smallest
4135 mode for which we can do the operation with a library call. */
4137 /* We might want to refine this now that we have division-by-constant
4138 optimization. Since expmed_mult_highpart tries so many variants, it is
4139 not straightforward to generalize this. Maybe we should make an array
4140 of possible modes in init_expmed? Save this for GCC 2.7. */
4142 optab1 = (op1_is_pow2
4143 ? (unsignedp ? lshr_optab : ashr_optab)
4144 : (unsignedp ? udiv_optab : sdiv_optab));
4145 optab2 = (op1_is_pow2 ? optab1
4146 : (unsignedp ? udivmod_optab : sdivmod_optab));
4148 FOR_EACH_MODE_FROM (compute_mode, mode)
4149 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4150 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4151 break;
4153 if (compute_mode == VOIDmode)
4154 FOR_EACH_MODE_FROM (compute_mode, mode)
4155 if (optab_libfunc (optab1, compute_mode)
4156 || optab_libfunc (optab2, compute_mode))
4157 break;
4159 /* If we still couldn't find a mode, use MODE, but expand_binop will
4160 probably die. */
4161 if (compute_mode == VOIDmode)
4162 compute_mode = mode;
4164 if (target && GET_MODE (target) == compute_mode)
4165 tquotient = target;
4166 else
4167 tquotient = gen_reg_rtx (compute_mode);
4169 size = GET_MODE_BITSIZE (compute_mode);
4170 #if 0
4171 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4172 (mode), and thereby get better code when OP1 is a constant. Do that
4173 later. It will require going over all usages of SIZE below. */
4174 size = GET_MODE_BITSIZE (mode);
4175 #endif
4177 /* Only deduct something for a REM if the last divide done was
4178 for a different constant. Then set the constant of the last
4179 divide. */
4180 max_cost = (unsignedp
4181 ? udiv_cost (speed, compute_mode)
4182 : sdiv_cost (speed, compute_mode));
4183 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4184 && INTVAL (op1) == last_div_const))
4185 max_cost -= (mul_cost (speed, compute_mode)
4186 + add_cost (speed, compute_mode));
4188 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4190 /* Now convert to the best mode to use. */
4191 if (compute_mode != mode)
4193 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4194 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4196 /* convert_modes may have placed op1 into a register, so we
4197 must recompute the following. */
4198 op1_is_constant = CONST_INT_P (op1);
4199 if (op1_is_constant)
4201 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4202 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4203 || (! unsignedp
4204 && wi::popcount (wi::neg (ext_op1)) == 1));
4206 else
4207 op1_is_pow2 = 0;
4210 /* If one of the operands is a volatile MEM, copy it into a register. */
4212 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4213 op0 = force_reg (compute_mode, op0);
4214 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4215 op1 = force_reg (compute_mode, op1);
4217 /* If we need the remainder or if OP1 is constant, we need to
4218 put OP0 in a register in case it has any queued subexpressions. */
4219 if (rem_flag || op1_is_constant)
4220 op0 = force_reg (compute_mode, op0);
4222 last = get_last_insn ();
4224 /* Promote floor rounding to trunc rounding for unsigned operations. */
4225 if (unsignedp)
4227 if (code == FLOOR_DIV_EXPR)
4228 code = TRUNC_DIV_EXPR;
4229 if (code == FLOOR_MOD_EXPR)
4230 code = TRUNC_MOD_EXPR;
4231 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4232 code = TRUNC_DIV_EXPR;
4235 if (op1 != const0_rtx)
4236 switch (code)
4238 case TRUNC_MOD_EXPR:
4239 case TRUNC_DIV_EXPR:
4240 if (op1_is_constant)
4242 if (unsignedp)
4244 unsigned HOST_WIDE_INT mh, ml;
4245 int pre_shift, post_shift;
4246 int dummy;
4247 wide_int wd = rtx_mode_t (op1, compute_mode);
4248 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4250 if (wi::popcount (wd) == 1)
4252 pre_shift = floor_log2 (d);
4253 if (rem_flag)
4255 unsigned HOST_WIDE_INT mask
4256 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4257 remainder
4258 = expand_binop (compute_mode, and_optab, op0,
4259 gen_int_mode (mask, compute_mode),
4260 remainder, 1,
4261 OPTAB_LIB_WIDEN);
4262 if (remainder)
4263 return gen_lowpart (mode, remainder);
4265 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4266 pre_shift, tquotient, 1);
4268 else if (size <= HOST_BITS_PER_WIDE_INT)
4270 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4272 /* Most significant bit of divisor is set; emit an scc
4273 insn. */
4274 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4275 compute_mode, 1, 1);
4277 else
4279 /* Find a suitable multiplier and right shift count
4280 instead of multiplying with D. */
4282 mh = choose_multiplier (d, size, size,
4283 &ml, &post_shift, &dummy);
4285 /* If the suggested multiplier is more than SIZE bits,
4286 we can do better for even divisors, using an
4287 initial right shift. */
4288 if (mh != 0 && (d & 1) == 0)
4290 pre_shift = ctz_or_zero (d);
4291 mh = choose_multiplier (d >> pre_shift, size,
4292 size - pre_shift,
4293 &ml, &post_shift, &dummy);
4294 gcc_assert (!mh);
4296 else
4297 pre_shift = 0;
4299 if (mh != 0)
4301 rtx t1, t2, t3, t4;
4303 if (post_shift - 1 >= BITS_PER_WORD)
4304 goto fail1;
4306 extra_cost
4307 = (shift_cost (speed, compute_mode, post_shift - 1)
4308 + shift_cost (speed, compute_mode, 1)
4309 + 2 * add_cost (speed, compute_mode));
4310 t1 = expmed_mult_highpart
4311 (compute_mode, op0,
4312 gen_int_mode (ml, compute_mode),
4313 NULL_RTX, 1, max_cost - extra_cost);
4314 if (t1 == 0)
4315 goto fail1;
4316 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4317 op0, t1),
4318 NULL_RTX);
4319 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4320 t2, 1, NULL_RTX, 1);
4321 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4322 t1, t3),
4323 NULL_RTX);
4324 quotient = expand_shift
4325 (RSHIFT_EXPR, compute_mode, t4,
4326 post_shift - 1, tquotient, 1);
4328 else
4330 rtx t1, t2;
4332 if (pre_shift >= BITS_PER_WORD
4333 || post_shift >= BITS_PER_WORD)
4334 goto fail1;
4336 t1 = expand_shift
4337 (RSHIFT_EXPR, compute_mode, op0,
4338 pre_shift, NULL_RTX, 1);
4339 extra_cost
4340 = (shift_cost (speed, compute_mode, pre_shift)
4341 + shift_cost (speed, compute_mode, post_shift));
4342 t2 = expmed_mult_highpart
4343 (compute_mode, t1,
4344 gen_int_mode (ml, compute_mode),
4345 NULL_RTX, 1, max_cost - extra_cost);
4346 if (t2 == 0)
4347 goto fail1;
4348 quotient = expand_shift
4349 (RSHIFT_EXPR, compute_mode, t2,
4350 post_shift, tquotient, 1);
4354 else /* Too wide mode to use tricky code */
4355 break;
4357 insn = get_last_insn ();
4358 if (insn != last)
4359 set_dst_reg_note (insn, REG_EQUAL,
4360 gen_rtx_UDIV (compute_mode, op0, op1),
4361 quotient);
4363 else /* TRUNC_DIV, signed */
4365 unsigned HOST_WIDE_INT ml;
4366 int lgup, post_shift;
4367 rtx mlr;
4368 HOST_WIDE_INT d = INTVAL (op1);
4369 unsigned HOST_WIDE_INT abs_d;
4371 /* Since d might be INT_MIN, we have to cast to
4372 unsigned HOST_WIDE_INT before negating to avoid
4373 undefined signed overflow. */
4374 abs_d = (d >= 0
4375 ? (unsigned HOST_WIDE_INT) d
4376 : - (unsigned HOST_WIDE_INT) d);
4378 /* n rem d = n rem -d */
4379 if (rem_flag && d < 0)
4381 d = abs_d;
4382 op1 = gen_int_mode (abs_d, compute_mode);
4385 if (d == 1)
4386 quotient = op0;
4387 else if (d == -1)
4388 quotient = expand_unop (compute_mode, neg_optab, op0,
4389 tquotient, 0);
4390 else if (size <= HOST_BITS_PER_WIDE_INT
4391 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4393 /* This case is not handled correctly below. */
4394 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4395 compute_mode, 1, 1);
4396 if (quotient == 0)
4397 goto fail1;
4399 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4400 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4401 && (rem_flag
4402 ? smod_pow2_cheap (speed, compute_mode)
4403 : sdiv_pow2_cheap (speed, compute_mode))
4404 /* We assume that cheap metric is true if the
4405 optab has an expander for this mode. */
4406 && ((optab_handler ((rem_flag ? smod_optab
4407 : sdiv_optab),
4408 compute_mode)
4409 != CODE_FOR_nothing)
4410 || (optab_handler (sdivmod_optab,
4411 compute_mode)
4412 != CODE_FOR_nothing)))
4414 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4415 && (size <= HOST_BITS_PER_WIDE_INT
4416 || abs_d != (unsigned HOST_WIDE_INT) d))
4418 if (rem_flag)
4420 remainder = expand_smod_pow2 (compute_mode, op0, d);
4421 if (remainder)
4422 return gen_lowpart (mode, remainder);
4425 if (sdiv_pow2_cheap (speed, compute_mode)
4426 && ((optab_handler (sdiv_optab, compute_mode)
4427 != CODE_FOR_nothing)
4428 || (optab_handler (sdivmod_optab, compute_mode)
4429 != CODE_FOR_nothing)))
4430 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4431 compute_mode, op0,
4432 gen_int_mode (abs_d,
4433 compute_mode),
4434 NULL_RTX, 0);
4435 else
4436 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4438 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4439 negate the quotient. */
4440 if (d < 0)
4442 insn = get_last_insn ();
4443 if (insn != last
4444 && abs_d < (HOST_WIDE_INT_1U
4445 << (HOST_BITS_PER_WIDE_INT - 1)))
4446 set_dst_reg_note (insn, REG_EQUAL,
4447 gen_rtx_DIV (compute_mode, op0,
4448 gen_int_mode
4449 (abs_d,
4450 compute_mode)),
4451 quotient);
4453 quotient = expand_unop (compute_mode, neg_optab,
4454 quotient, quotient, 0);
4457 else if (size <= HOST_BITS_PER_WIDE_INT)
4459 choose_multiplier (abs_d, size, size - 1,
4460 &ml, &post_shift, &lgup);
4461 if (ml < HOST_WIDE_INT_1U << (size - 1))
4463 rtx t1, t2, t3;
4465 if (post_shift >= BITS_PER_WORD
4466 || size - 1 >= BITS_PER_WORD)
4467 goto fail1;
4469 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4470 + shift_cost (speed, compute_mode, size - 1)
4471 + add_cost (speed, compute_mode));
4472 t1 = expmed_mult_highpart
4473 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4474 NULL_RTX, 0, max_cost - extra_cost);
4475 if (t1 == 0)
4476 goto fail1;
4477 t2 = expand_shift
4478 (RSHIFT_EXPR, compute_mode, t1,
4479 post_shift, NULL_RTX, 0);
4480 t3 = expand_shift
4481 (RSHIFT_EXPR, compute_mode, op0,
4482 size - 1, NULL_RTX, 0);
4483 if (d < 0)
4484 quotient
4485 = force_operand (gen_rtx_MINUS (compute_mode,
4486 t3, t2),
4487 tquotient);
4488 else
4489 quotient
4490 = force_operand (gen_rtx_MINUS (compute_mode,
4491 t2, t3),
4492 tquotient);
4494 else
4496 rtx t1, t2, t3, t4;
4498 if (post_shift >= BITS_PER_WORD
4499 || size - 1 >= BITS_PER_WORD)
4500 goto fail1;
4502 ml |= HOST_WIDE_INT_M1U << (size - 1);
4503 mlr = gen_int_mode (ml, compute_mode);
4504 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4505 + shift_cost (speed, compute_mode, size - 1)
4506 + 2 * add_cost (speed, compute_mode));
4507 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4508 NULL_RTX, 0,
4509 max_cost - extra_cost);
4510 if (t1 == 0)
4511 goto fail1;
4512 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4513 t1, op0),
4514 NULL_RTX);
4515 t3 = expand_shift
4516 (RSHIFT_EXPR, compute_mode, t2,
4517 post_shift, NULL_RTX, 0);
4518 t4 = expand_shift
4519 (RSHIFT_EXPR, compute_mode, op0,
4520 size - 1, NULL_RTX, 0);
4521 if (d < 0)
4522 quotient
4523 = force_operand (gen_rtx_MINUS (compute_mode,
4524 t4, t3),
4525 tquotient);
4526 else
4527 quotient
4528 = force_operand (gen_rtx_MINUS (compute_mode,
4529 t3, t4),
4530 tquotient);
4533 else /* Too wide mode to use tricky code */
4534 break;
4536 insn = get_last_insn ();
4537 if (insn != last)
4538 set_dst_reg_note (insn, REG_EQUAL,
4539 gen_rtx_DIV (compute_mode, op0, op1),
4540 quotient);
4542 break;
4544 fail1:
4545 delete_insns_since (last);
4546 break;
4548 case FLOOR_DIV_EXPR:
4549 case FLOOR_MOD_EXPR:
4550 /* We will come here only for signed operations. */
4551 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4553 unsigned HOST_WIDE_INT mh, ml;
4554 int pre_shift, lgup, post_shift;
4555 HOST_WIDE_INT d = INTVAL (op1);
4557 if (d > 0)
4559 /* We could just as easily deal with negative constants here,
4560 but it does not seem worth the trouble for GCC 2.6. */
4561 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4563 pre_shift = floor_log2 (d);
4564 if (rem_flag)
4566 unsigned HOST_WIDE_INT mask
4567 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4568 remainder = expand_binop
4569 (compute_mode, and_optab, op0,
4570 gen_int_mode (mask, compute_mode),
4571 remainder, 0, OPTAB_LIB_WIDEN);
4572 if (remainder)
4573 return gen_lowpart (mode, remainder);
4575 quotient = expand_shift
4576 (RSHIFT_EXPR, compute_mode, op0,
4577 pre_shift, tquotient, 0);
4579 else
4581 rtx t1, t2, t3, t4;
4583 mh = choose_multiplier (d, size, size - 1,
4584 &ml, &post_shift, &lgup);
4585 gcc_assert (!mh);
4587 if (post_shift < BITS_PER_WORD
4588 && size - 1 < BITS_PER_WORD)
4590 t1 = expand_shift
4591 (RSHIFT_EXPR, compute_mode, op0,
4592 size - 1, NULL_RTX, 0);
4593 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4594 NULL_RTX, 0, OPTAB_WIDEN);
4595 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4596 + shift_cost (speed, compute_mode, size - 1)
4597 + 2 * add_cost (speed, compute_mode));
4598 t3 = expmed_mult_highpart
4599 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4600 NULL_RTX, 1, max_cost - extra_cost);
4601 if (t3 != 0)
4603 t4 = expand_shift
4604 (RSHIFT_EXPR, compute_mode, t3,
4605 post_shift, NULL_RTX, 1);
4606 quotient = expand_binop (compute_mode, xor_optab,
4607 t4, t1, tquotient, 0,
4608 OPTAB_WIDEN);
4613 else
4615 rtx nsign, t1, t2, t3, t4;
4616 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4617 op0, constm1_rtx), NULL_RTX);
4618 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4619 0, OPTAB_WIDEN);
4620 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4621 size - 1, NULL_RTX, 0);
4622 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4623 NULL_RTX);
4624 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4625 NULL_RTX, 0);
4626 if (t4)
4628 rtx t5;
4629 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4630 NULL_RTX, 0);
4631 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4632 t4, t5),
4633 tquotient);
4638 if (quotient != 0)
4639 break;
4640 delete_insns_since (last);
4642 /* Try using an instruction that produces both the quotient and
4643 remainder, using truncation. We can easily compensate the quotient
4644 or remainder to get floor rounding, once we have the remainder.
4645 Notice that we compute also the final remainder value here,
4646 and return the result right away. */
4647 if (target == 0 || GET_MODE (target) != compute_mode)
4648 target = gen_reg_rtx (compute_mode);
4650 if (rem_flag)
4652 remainder
4653 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4654 quotient = gen_reg_rtx (compute_mode);
4656 else
4658 quotient
4659 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4660 remainder = gen_reg_rtx (compute_mode);
4663 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4664 quotient, remainder, 0))
4666 /* This could be computed with a branch-less sequence.
4667 Save that for later. */
4668 rtx tem;
4669 rtx_code_label *label = gen_label_rtx ();
4670 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4671 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4672 NULL_RTX, 0, OPTAB_WIDEN);
4673 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4674 expand_dec (quotient, const1_rtx);
4675 expand_inc (remainder, op1);
4676 emit_label (label);
4677 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4680 /* No luck with division elimination or divmod. Have to do it
4681 by conditionally adjusting op0 *and* the result. */
4683 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4684 rtx adjusted_op0;
4685 rtx tem;
4687 quotient = gen_reg_rtx (compute_mode);
4688 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4689 label1 = gen_label_rtx ();
4690 label2 = gen_label_rtx ();
4691 label3 = gen_label_rtx ();
4692 label4 = gen_label_rtx ();
4693 label5 = gen_label_rtx ();
4694 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4695 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4696 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4697 quotient, 0, OPTAB_LIB_WIDEN);
4698 if (tem != quotient)
4699 emit_move_insn (quotient, tem);
4700 emit_jump_insn (targetm.gen_jump (label5));
4701 emit_barrier ();
4702 emit_label (label1);
4703 expand_inc (adjusted_op0, const1_rtx);
4704 emit_jump_insn (targetm.gen_jump (label4));
4705 emit_barrier ();
4706 emit_label (label2);
4707 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4708 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4709 quotient, 0, OPTAB_LIB_WIDEN);
4710 if (tem != quotient)
4711 emit_move_insn (quotient, tem);
4712 emit_jump_insn (targetm.gen_jump (label5));
4713 emit_barrier ();
4714 emit_label (label3);
4715 expand_dec (adjusted_op0, const1_rtx);
4716 emit_label (label4);
4717 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4718 quotient, 0, OPTAB_LIB_WIDEN);
4719 if (tem != quotient)
4720 emit_move_insn (quotient, tem);
4721 expand_dec (quotient, const1_rtx);
4722 emit_label (label5);
4724 break;
4726 case CEIL_DIV_EXPR:
4727 case CEIL_MOD_EXPR:
4728 if (unsignedp)
4730 if (op1_is_constant
4731 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4732 && (size <= HOST_BITS_PER_WIDE_INT
4733 || INTVAL (op1) >= 0))
4735 rtx t1, t2, t3;
4736 unsigned HOST_WIDE_INT d = INTVAL (op1);
4737 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4738 floor_log2 (d), tquotient, 1);
4739 t2 = expand_binop (compute_mode, and_optab, op0,
4740 gen_int_mode (d - 1, compute_mode),
4741 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4742 t3 = gen_reg_rtx (compute_mode);
4743 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4744 compute_mode, 1, 1);
4745 if (t3 == 0)
4747 rtx_code_label *lab;
4748 lab = gen_label_rtx ();
4749 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4750 expand_inc (t1, const1_rtx);
4751 emit_label (lab);
4752 quotient = t1;
4754 else
4755 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4756 t1, t3),
4757 tquotient);
4758 break;
4761 /* Try using an instruction that produces both the quotient and
4762 remainder, using truncation. We can easily compensate the
4763 quotient or remainder to get ceiling rounding, once we have the
4764 remainder. Notice that we compute also the final remainder
4765 value here, and return the result right away. */
4766 if (target == 0 || GET_MODE (target) != compute_mode)
4767 target = gen_reg_rtx (compute_mode);
4769 if (rem_flag)
4771 remainder = (REG_P (target)
4772 ? target : gen_reg_rtx (compute_mode));
4773 quotient = gen_reg_rtx (compute_mode);
4775 else
4777 quotient = (REG_P (target)
4778 ? target : gen_reg_rtx (compute_mode));
4779 remainder = gen_reg_rtx (compute_mode);
4782 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4783 remainder, 1))
4785 /* This could be computed with a branch-less sequence.
4786 Save that for later. */
4787 rtx_code_label *label = gen_label_rtx ();
4788 do_cmp_and_jump (remainder, const0_rtx, EQ,
4789 compute_mode, label);
4790 expand_inc (quotient, const1_rtx);
4791 expand_dec (remainder, op1);
4792 emit_label (label);
4793 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4796 /* No luck with division elimination or divmod. Have to do it
4797 by conditionally adjusting op0 *and* the result. */
4799 rtx_code_label *label1, *label2;
4800 rtx adjusted_op0, tem;
4802 quotient = gen_reg_rtx (compute_mode);
4803 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4804 label1 = gen_label_rtx ();
4805 label2 = gen_label_rtx ();
4806 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4807 compute_mode, label1);
4808 emit_move_insn (quotient, const0_rtx);
4809 emit_jump_insn (targetm.gen_jump (label2));
4810 emit_barrier ();
4811 emit_label (label1);
4812 expand_dec (adjusted_op0, const1_rtx);
4813 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4814 quotient, 1, OPTAB_LIB_WIDEN);
4815 if (tem != quotient)
4816 emit_move_insn (quotient, tem);
4817 expand_inc (quotient, const1_rtx);
4818 emit_label (label2);
4821 else /* signed */
4823 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4824 && INTVAL (op1) >= 0)
4826 /* This is extremely similar to the code for the unsigned case
4827 above. For 2.7 we should merge these variants, but for
4828 2.6.1 I don't want to touch the code for unsigned since that
4829 get used in C. The signed case will only be used by other
4830 languages (Ada). */
4832 rtx t1, t2, t3;
4833 unsigned HOST_WIDE_INT d = INTVAL (op1);
4834 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4835 floor_log2 (d), tquotient, 0);
4836 t2 = expand_binop (compute_mode, and_optab, op0,
4837 gen_int_mode (d - 1, compute_mode),
4838 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4839 t3 = gen_reg_rtx (compute_mode);
4840 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4841 compute_mode, 1, 1);
4842 if (t3 == 0)
4844 rtx_code_label *lab;
4845 lab = gen_label_rtx ();
4846 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4847 expand_inc (t1, const1_rtx);
4848 emit_label (lab);
4849 quotient = t1;
4851 else
4852 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4853 t1, t3),
4854 tquotient);
4855 break;
4858 /* Try using an instruction that produces both the quotient and
4859 remainder, using truncation. We can easily compensate the
4860 quotient or remainder to get ceiling rounding, once we have the
4861 remainder. Notice that we compute also the final remainder
4862 value here, and return the result right away. */
4863 if (target == 0 || GET_MODE (target) != compute_mode)
4864 target = gen_reg_rtx (compute_mode);
4865 if (rem_flag)
4867 remainder= (REG_P (target)
4868 ? target : gen_reg_rtx (compute_mode));
4869 quotient = gen_reg_rtx (compute_mode);
4871 else
4873 quotient = (REG_P (target)
4874 ? target : gen_reg_rtx (compute_mode));
4875 remainder = gen_reg_rtx (compute_mode);
4878 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4879 remainder, 0))
4881 /* This could be computed with a branch-less sequence.
4882 Save that for later. */
4883 rtx tem;
4884 rtx_code_label *label = gen_label_rtx ();
4885 do_cmp_and_jump (remainder, const0_rtx, EQ,
4886 compute_mode, label);
4887 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4888 NULL_RTX, 0, OPTAB_WIDEN);
4889 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4890 expand_inc (quotient, const1_rtx);
4891 expand_dec (remainder, op1);
4892 emit_label (label);
4893 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4896 /* No luck with division elimination or divmod. Have to do it
4897 by conditionally adjusting op0 *and* the result. */
4899 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4900 rtx adjusted_op0;
4901 rtx tem;
4903 quotient = gen_reg_rtx (compute_mode);
4904 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4905 label1 = gen_label_rtx ();
4906 label2 = gen_label_rtx ();
4907 label3 = gen_label_rtx ();
4908 label4 = gen_label_rtx ();
4909 label5 = gen_label_rtx ();
4910 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4911 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4912 compute_mode, label1);
4913 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4914 quotient, 0, OPTAB_LIB_WIDEN);
4915 if (tem != quotient)
4916 emit_move_insn (quotient, tem);
4917 emit_jump_insn (targetm.gen_jump (label5));
4918 emit_barrier ();
4919 emit_label (label1);
4920 expand_dec (adjusted_op0, const1_rtx);
4921 emit_jump_insn (targetm.gen_jump (label4));
4922 emit_barrier ();
4923 emit_label (label2);
4924 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4925 compute_mode, label3);
4926 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4927 quotient, 0, OPTAB_LIB_WIDEN);
4928 if (tem != quotient)
4929 emit_move_insn (quotient, tem);
4930 emit_jump_insn (targetm.gen_jump (label5));
4931 emit_barrier ();
4932 emit_label (label3);
4933 expand_inc (adjusted_op0, const1_rtx);
4934 emit_label (label4);
4935 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4936 quotient, 0, OPTAB_LIB_WIDEN);
4937 if (tem != quotient)
4938 emit_move_insn (quotient, tem);
4939 expand_inc (quotient, const1_rtx);
4940 emit_label (label5);
4943 break;
4945 case EXACT_DIV_EXPR:
4946 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4948 HOST_WIDE_INT d = INTVAL (op1);
4949 unsigned HOST_WIDE_INT ml;
4950 int pre_shift;
4951 rtx t1;
4953 pre_shift = ctz_or_zero (d);
4954 ml = invert_mod2n (d >> pre_shift, size);
4955 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4956 pre_shift, NULL_RTX, unsignedp);
4957 quotient = expand_mult (compute_mode, t1,
4958 gen_int_mode (ml, compute_mode),
4959 NULL_RTX, 1);
4961 insn = get_last_insn ();
4962 set_dst_reg_note (insn, REG_EQUAL,
4963 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4964 compute_mode, op0, op1),
4965 quotient);
4967 break;
4969 case ROUND_DIV_EXPR:
4970 case ROUND_MOD_EXPR:
4971 if (unsignedp)
4973 rtx tem;
4974 rtx_code_label *label;
4975 label = gen_label_rtx ();
4976 quotient = gen_reg_rtx (compute_mode);
4977 remainder = gen_reg_rtx (compute_mode);
4978 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4980 rtx tem;
4981 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4982 quotient, 1, OPTAB_LIB_WIDEN);
4983 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4984 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4985 remainder, 1, OPTAB_LIB_WIDEN);
4987 tem = plus_constant (compute_mode, op1, -1);
4988 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4989 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4990 expand_inc (quotient, const1_rtx);
4991 expand_dec (remainder, op1);
4992 emit_label (label);
4994 else
4996 rtx abs_rem, abs_op1, tem, mask;
4997 rtx_code_label *label;
4998 label = gen_label_rtx ();
4999 quotient = gen_reg_rtx (compute_mode);
5000 remainder = gen_reg_rtx (compute_mode);
5001 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5003 rtx tem;
5004 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
5005 quotient, 0, OPTAB_LIB_WIDEN);
5006 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
5007 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
5008 remainder, 0, OPTAB_LIB_WIDEN);
5010 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
5011 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
5012 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
5013 1, NULL_RTX, 1);
5014 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
5015 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5016 NULL_RTX, 0, OPTAB_WIDEN);
5017 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
5018 size - 1, NULL_RTX, 0);
5019 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
5020 NULL_RTX, 0, OPTAB_WIDEN);
5021 tem = expand_binop (compute_mode, sub_optab, tem, mask,
5022 NULL_RTX, 0, OPTAB_WIDEN);
5023 expand_inc (quotient, tem);
5024 tem = expand_binop (compute_mode, xor_optab, mask, op1,
5025 NULL_RTX, 0, OPTAB_WIDEN);
5026 tem = expand_binop (compute_mode, sub_optab, tem, mask,
5027 NULL_RTX, 0, OPTAB_WIDEN);
5028 expand_dec (remainder, tem);
5029 emit_label (label);
5031 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5033 default:
5034 gcc_unreachable ();
5037 if (quotient == 0)
5039 if (target && GET_MODE (target) != compute_mode)
5040 target = 0;
5042 if (rem_flag)
5044 /* Try to produce the remainder without producing the quotient.
5045 If we seem to have a divmod pattern that does not require widening,
5046 don't try widening here. We should really have a WIDEN argument
5047 to expand_twoval_binop, since what we'd really like to do here is
5048 1) try a mod insn in compute_mode
5049 2) try a divmod insn in compute_mode
5050 3) try a div insn in compute_mode and multiply-subtract to get
5051 remainder
5052 4) try the same things with widening allowed. */
5053 remainder
5054 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5055 op0, op1, target,
5056 unsignedp,
5057 ((optab_handler (optab2, compute_mode)
5058 != CODE_FOR_nothing)
5059 ? OPTAB_DIRECT : OPTAB_WIDEN));
5060 if (remainder == 0)
5062 /* No luck there. Can we do remainder and divide at once
5063 without a library call? */
5064 remainder = gen_reg_rtx (compute_mode);
5065 if (! expand_twoval_binop ((unsignedp
5066 ? udivmod_optab
5067 : sdivmod_optab),
5068 op0, op1,
5069 NULL_RTX, remainder, unsignedp))
5070 remainder = 0;
5073 if (remainder)
5074 return gen_lowpart (mode, remainder);
5077 /* Produce the quotient. Try a quotient insn, but not a library call.
5078 If we have a divmod in this mode, use it in preference to widening
5079 the div (for this test we assume it will not fail). Note that optab2
5080 is set to the one of the two optabs that the call below will use. */
5081 quotient
5082 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5083 op0, op1, rem_flag ? NULL_RTX : target,
5084 unsignedp,
5085 ((optab_handler (optab2, compute_mode)
5086 != CODE_FOR_nothing)
5087 ? OPTAB_DIRECT : OPTAB_WIDEN));
5089 if (quotient == 0)
5091 /* No luck there. Try a quotient-and-remainder insn,
5092 keeping the quotient alone. */
5093 quotient = gen_reg_rtx (compute_mode);
5094 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5095 op0, op1,
5096 quotient, NULL_RTX, unsignedp))
5098 quotient = 0;
5099 if (! rem_flag)
5100 /* Still no luck. If we are not computing the remainder,
5101 use a library call for the quotient. */
5102 quotient = sign_expand_binop (compute_mode,
5103 udiv_optab, sdiv_optab,
5104 op0, op1, target,
5105 unsignedp, OPTAB_LIB_WIDEN);
5110 if (rem_flag)
5112 if (target && GET_MODE (target) != compute_mode)
5113 target = 0;
5115 if (quotient == 0)
5117 /* No divide instruction either. Use library for remainder. */
5118 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5119 op0, op1, target,
5120 unsignedp, OPTAB_LIB_WIDEN);
5121 /* No remainder function. Try a quotient-and-remainder
5122 function, keeping the remainder. */
5123 if (!remainder)
5125 remainder = gen_reg_rtx (compute_mode);
5126 if (!expand_twoval_binop_libfunc
5127 (unsignedp ? udivmod_optab : sdivmod_optab,
5128 op0, op1,
5129 NULL_RTX, remainder,
5130 unsignedp ? UMOD : MOD))
5131 remainder = NULL_RTX;
5134 else
5136 /* We divided. Now finish doing X - Y * (X / Y). */
5137 remainder = expand_mult (compute_mode, quotient, op1,
5138 NULL_RTX, unsignedp);
5139 remainder = expand_binop (compute_mode, sub_optab, op0,
5140 remainder, target, unsignedp,
5141 OPTAB_LIB_WIDEN);
5145 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5148 /* Return a tree node with data type TYPE, describing the value of X.
5149 Usually this is an VAR_DECL, if there is no obvious better choice.
5150 X may be an expression, however we only support those expressions
5151 generated by loop.c. */
5153 tree
5154 make_tree (tree type, rtx x)
5156 tree t;
5158 switch (GET_CODE (x))
5160 case CONST_INT:
5161 case CONST_WIDE_INT:
5162 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5163 return t;
5165 case CONST_DOUBLE:
5166 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5167 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5168 t = wide_int_to_tree (type,
5169 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5170 HOST_BITS_PER_WIDE_INT * 2));
5171 else
5172 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5174 return t;
5176 case CONST_VECTOR:
5178 int units = CONST_VECTOR_NUNITS (x);
5179 tree itype = TREE_TYPE (type);
5180 tree *elts;
5181 int i;
5183 /* Build a tree with vector elements. */
5184 elts = XALLOCAVEC (tree, units);
5185 for (i = units - 1; i >= 0; --i)
5187 rtx elt = CONST_VECTOR_ELT (x, i);
5188 elts[i] = make_tree (itype, elt);
5191 return build_vector (type, elts);
5194 case PLUS:
5195 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5196 make_tree (type, XEXP (x, 1)));
5198 case MINUS:
5199 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5200 make_tree (type, XEXP (x, 1)));
5202 case NEG:
5203 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5205 case MULT:
5206 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5207 make_tree (type, XEXP (x, 1)));
5209 case ASHIFT:
5210 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5211 make_tree (type, XEXP (x, 1)));
5213 case LSHIFTRT:
5214 t = unsigned_type_for (type);
5215 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5216 make_tree (t, XEXP (x, 0)),
5217 make_tree (type, XEXP (x, 1))));
5219 case ASHIFTRT:
5220 t = signed_type_for (type);
5221 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5222 make_tree (t, XEXP (x, 0)),
5223 make_tree (type, XEXP (x, 1))));
5225 case DIV:
5226 if (TREE_CODE (type) != REAL_TYPE)
5227 t = signed_type_for (type);
5228 else
5229 t = type;
5231 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5232 make_tree (t, XEXP (x, 0)),
5233 make_tree (t, XEXP (x, 1))));
5234 case UDIV:
5235 t = unsigned_type_for (type);
5236 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5237 make_tree (t, XEXP (x, 0)),
5238 make_tree (t, XEXP (x, 1))));
5240 case SIGN_EXTEND:
5241 case ZERO_EXTEND:
5242 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5243 GET_CODE (x) == ZERO_EXTEND);
5244 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5246 case CONST:
5247 return make_tree (type, XEXP (x, 0));
5249 case SYMBOL_REF:
5250 t = SYMBOL_REF_DECL (x);
5251 if (t)
5252 return fold_convert (type, build_fold_addr_expr (t));
5253 /* fall through. */
5255 default:
5256 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5258 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5259 address mode to pointer mode. */
5260 if (POINTER_TYPE_P (type))
5261 x = convert_memory_address_addr_space
5262 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5264 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5265 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5266 t->decl_with_rtl.rtl = x;
5268 return t;
5272 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5273 and returning TARGET.
5275 If TARGET is 0, a pseudo-register or constant is returned. */
5278 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5280 rtx tem = 0;
5282 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5283 tem = simplify_binary_operation (AND, mode, op0, op1);
5284 if (tem == 0)
5285 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5287 if (target == 0)
5288 target = tem;
5289 else if (tem != target)
5290 emit_move_insn (target, tem);
5291 return target;
5294 /* Helper function for emit_store_flag. */
5296 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5297 machine_mode mode, machine_mode compare_mode,
5298 int unsignedp, rtx x, rtx y, int normalizep,
5299 machine_mode target_mode)
5301 struct expand_operand ops[4];
5302 rtx op0, comparison, subtarget;
5303 rtx_insn *last;
5304 machine_mode result_mode = targetm.cstore_mode (icode);
5306 last = get_last_insn ();
5307 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5308 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5309 if (!x || !y)
5311 delete_insns_since (last);
5312 return NULL_RTX;
5315 if (target_mode == VOIDmode)
5316 target_mode = result_mode;
5317 if (!target)
5318 target = gen_reg_rtx (target_mode);
5320 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5322 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5323 create_fixed_operand (&ops[1], comparison);
5324 create_fixed_operand (&ops[2], x);
5325 create_fixed_operand (&ops[3], y);
5326 if (!maybe_expand_insn (icode, 4, ops))
5328 delete_insns_since (last);
5329 return NULL_RTX;
5331 subtarget = ops[0].value;
5333 /* If we are converting to a wider mode, first convert to
5334 TARGET_MODE, then normalize. This produces better combining
5335 opportunities on machines that have a SIGN_EXTRACT when we are
5336 testing a single bit. This mostly benefits the 68k.
5338 If STORE_FLAG_VALUE does not have the sign bit set when
5339 interpreted in MODE, we can do this conversion as unsigned, which
5340 is usually more efficient. */
5341 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5343 convert_move (target, subtarget,
5344 val_signbit_known_clear_p (result_mode,
5345 STORE_FLAG_VALUE));
5346 op0 = target;
5347 result_mode = target_mode;
5349 else
5350 op0 = subtarget;
5352 /* If we want to keep subexpressions around, don't reuse our last
5353 target. */
5354 if (optimize)
5355 subtarget = 0;
5357 /* Now normalize to the proper value in MODE. Sometimes we don't
5358 have to do anything. */
5359 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5361 /* STORE_FLAG_VALUE might be the most negative number, so write
5362 the comparison this way to avoid a compiler-time warning. */
5363 else if (- normalizep == STORE_FLAG_VALUE)
5364 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5366 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5367 it hard to use a value of just the sign bit due to ANSI integer
5368 constant typing rules. */
5369 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5370 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5371 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5372 normalizep == 1);
5373 else
5375 gcc_assert (STORE_FLAG_VALUE & 1);
5377 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5378 if (normalizep == -1)
5379 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5382 /* If we were converting to a smaller mode, do the conversion now. */
5383 if (target_mode != result_mode)
5385 convert_move (target, op0, 0);
5386 return target;
5388 else
5389 return op0;
5393 /* A subroutine of emit_store_flag only including "tricks" that do not
5394 need a recursive call. These are kept separate to avoid infinite
5395 loops. */
5397 static rtx
5398 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5399 machine_mode mode, int unsignedp, int normalizep,
5400 machine_mode target_mode)
5402 rtx subtarget;
5403 enum insn_code icode;
5404 machine_mode compare_mode;
5405 enum mode_class mclass;
5406 enum rtx_code scode;
5408 if (unsignedp)
5409 code = unsigned_condition (code);
5410 scode = swap_condition (code);
5412 /* If one operand is constant, make it the second one. Only do this
5413 if the other operand is not constant as well. */
5415 if (swap_commutative_operands_p (op0, op1))
5417 std::swap (op0, op1);
5418 code = swap_condition (code);
5421 if (mode == VOIDmode)
5422 mode = GET_MODE (op0);
5424 /* For some comparisons with 1 and -1, we can convert this to
5425 comparisons with zero. This will often produce more opportunities for
5426 store-flag insns. */
5428 switch (code)
5430 case LT:
5431 if (op1 == const1_rtx)
5432 op1 = const0_rtx, code = LE;
5433 break;
5434 case LE:
5435 if (op1 == constm1_rtx)
5436 op1 = const0_rtx, code = LT;
5437 break;
5438 case GE:
5439 if (op1 == const1_rtx)
5440 op1 = const0_rtx, code = GT;
5441 break;
5442 case GT:
5443 if (op1 == constm1_rtx)
5444 op1 = const0_rtx, code = GE;
5445 break;
5446 case GEU:
5447 if (op1 == const1_rtx)
5448 op1 = const0_rtx, code = NE;
5449 break;
5450 case LTU:
5451 if (op1 == const1_rtx)
5452 op1 = const0_rtx, code = EQ;
5453 break;
5454 default:
5455 break;
5458 /* If we are comparing a double-word integer with zero or -1, we can
5459 convert the comparison into one involving a single word. */
5460 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5461 && GET_MODE_CLASS (mode) == MODE_INT
5462 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5464 rtx tem;
5465 if ((code == EQ || code == NE)
5466 && (op1 == const0_rtx || op1 == constm1_rtx))
5468 rtx op00, op01;
5470 /* Do a logical OR or AND of the two words and compare the
5471 result. */
5472 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5473 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5474 tem = expand_binop (word_mode,
5475 op1 == const0_rtx ? ior_optab : and_optab,
5476 op00, op01, NULL_RTX, unsignedp,
5477 OPTAB_DIRECT);
5479 if (tem != 0)
5480 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5481 unsignedp, normalizep);
5483 else if ((code == LT || code == GE) && op1 == const0_rtx)
5485 rtx op0h;
5487 /* If testing the sign bit, can just test on high word. */
5488 op0h = simplify_gen_subreg (word_mode, op0, mode,
5489 subreg_highpart_offset (word_mode,
5490 mode));
5491 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5492 unsignedp, normalizep);
5494 else
5495 tem = NULL_RTX;
5497 if (tem)
5499 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5500 return tem;
5501 if (!target)
5502 target = gen_reg_rtx (target_mode);
5504 convert_move (target, tem,
5505 !val_signbit_known_set_p (word_mode,
5506 (normalizep ? normalizep
5507 : STORE_FLAG_VALUE)));
5508 return target;
5512 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5513 complement of A (for GE) and shifting the sign bit to the low bit. */
5514 if (op1 == const0_rtx && (code == LT || code == GE)
5515 && GET_MODE_CLASS (mode) == MODE_INT
5516 && (normalizep || STORE_FLAG_VALUE == 1
5517 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5519 subtarget = target;
5521 if (!target)
5522 target_mode = mode;
5524 /* If the result is to be wider than OP0, it is best to convert it
5525 first. If it is to be narrower, it is *incorrect* to convert it
5526 first. */
5527 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5529 op0 = convert_modes (target_mode, mode, op0, 0);
5530 mode = target_mode;
5533 if (target_mode != mode)
5534 subtarget = 0;
5536 if (code == GE)
5537 op0 = expand_unop (mode, one_cmpl_optab, op0,
5538 ((STORE_FLAG_VALUE == 1 || normalizep)
5539 ? 0 : subtarget), 0);
5541 if (STORE_FLAG_VALUE == 1 || normalizep)
5542 /* If we are supposed to produce a 0/1 value, we want to do
5543 a logical shift from the sign bit to the low-order bit; for
5544 a -1/0 value, we do an arithmetic shift. */
5545 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5546 GET_MODE_BITSIZE (mode) - 1,
5547 subtarget, normalizep != -1);
5549 if (mode != target_mode)
5550 op0 = convert_modes (target_mode, mode, op0, 0);
5552 return op0;
5555 mclass = GET_MODE_CLASS (mode);
5556 FOR_EACH_MODE_FROM (compare_mode, mode)
5558 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5559 icode = optab_handler (cstore_optab, optab_mode);
5560 if (icode != CODE_FOR_nothing)
5562 do_pending_stack_adjust ();
5563 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5564 unsignedp, op0, op1, normalizep, target_mode);
5565 if (tem)
5566 return tem;
5568 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5570 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5571 unsignedp, op1, op0, normalizep, target_mode);
5572 if (tem)
5573 return tem;
5575 break;
5579 return 0;
5582 /* Subroutine of emit_store_flag that handles cases in which the operands
5583 are scalar integers. SUBTARGET is the target to use for temporary
5584 operations and TRUEVAL is the value to store when the condition is
5585 true. All other arguments are as for emit_store_flag. */
5588 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5589 rtx op1, machine_mode mode, int unsignedp,
5590 int normalizep, rtx trueval)
5592 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5593 rtx_insn *last = get_last_insn ();
5594 rtx tem;
5596 /* If this is an equality comparison of integers, we can try to exclusive-or
5597 (or subtract) the two operands and use a recursive call to try the
5598 comparison with zero. Don't do any of these cases if branches are
5599 very cheap. */
5601 if ((code == EQ || code == NE) && op1 != const0_rtx)
5603 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5604 OPTAB_WIDEN);
5606 if (tem == 0)
5607 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5608 OPTAB_WIDEN);
5609 if (tem != 0)
5610 tem = emit_store_flag (target, code, tem, const0_rtx,
5611 mode, unsignedp, normalizep);
5612 if (tem != 0)
5613 return tem;
5615 delete_insns_since (last);
5618 /* For integer comparisons, try the reverse comparison. However, for
5619 small X and if we'd have anyway to extend, implementing "X != 0"
5620 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5621 rtx_code rcode = reverse_condition (code);
5622 if (can_compare_p (rcode, mode, ccp_store_flag)
5623 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5624 && code == NE
5625 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5626 && op1 == const0_rtx))
5628 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5629 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5631 /* Again, for the reverse comparison, use either an addition or a XOR. */
5632 if (want_add
5633 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5634 optimize_insn_for_speed_p ()) == 0)
5636 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5637 STORE_FLAG_VALUE, target_mode);
5638 if (tem != 0)
5639 tem = expand_binop (target_mode, add_optab, tem,
5640 gen_int_mode (normalizep, target_mode),
5641 target, 0, OPTAB_WIDEN);
5643 else if (!want_add
5644 && rtx_cost (trueval, mode, XOR, 1,
5645 optimize_insn_for_speed_p ()) == 0)
5647 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5648 normalizep, target_mode);
5649 if (tem != 0)
5650 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5651 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5654 if (tem != 0)
5655 return tem;
5656 delete_insns_since (last);
5659 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5660 the constant zero. Reject all other comparisons at this point. Only
5661 do LE and GT if branches are expensive since they are expensive on
5662 2-operand machines. */
5664 if (op1 != const0_rtx
5665 || (code != EQ && code != NE
5666 && (BRANCH_COST (optimize_insn_for_speed_p (),
5667 false) <= 1 || (code != LE && code != GT))))
5668 return 0;
5670 /* Try to put the result of the comparison in the sign bit. Assume we can't
5671 do the necessary operation below. */
5673 tem = 0;
5675 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5676 the sign bit set. */
5678 if (code == LE)
5680 /* This is destructive, so SUBTARGET can't be OP0. */
5681 if (rtx_equal_p (subtarget, op0))
5682 subtarget = 0;
5684 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5685 OPTAB_WIDEN);
5686 if (tem)
5687 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5688 OPTAB_WIDEN);
5691 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5692 number of bits in the mode of OP0, minus one. */
5694 if (code == GT)
5696 if (rtx_equal_p (subtarget, op0))
5697 subtarget = 0;
5699 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5700 GET_MODE_BITSIZE (mode) - 1,
5701 subtarget, 0);
5702 if (tem)
5703 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5704 OPTAB_WIDEN);
5707 if (code == EQ || code == NE)
5709 /* For EQ or NE, one way to do the comparison is to apply an operation
5710 that converts the operand into a positive number if it is nonzero
5711 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5712 for NE we negate. This puts the result in the sign bit. Then we
5713 normalize with a shift, if needed.
5715 Two operations that can do the above actions are ABS and FFS, so try
5716 them. If that doesn't work, and MODE is smaller than a full word,
5717 we can use zero-extension to the wider mode (an unsigned conversion)
5718 as the operation. */
5720 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5721 that is compensated by the subsequent overflow when subtracting
5722 one / negating. */
5724 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5725 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5726 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5727 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5728 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5730 tem = convert_modes (word_mode, mode, op0, 1);
5731 mode = word_mode;
5734 if (tem != 0)
5736 if (code == EQ)
5737 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5738 0, OPTAB_WIDEN);
5739 else
5740 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5743 /* If we couldn't do it that way, for NE we can "or" the two's complement
5744 of the value with itself. For EQ, we take the one's complement of
5745 that "or", which is an extra insn, so we only handle EQ if branches
5746 are expensive. */
5748 if (tem == 0
5749 && (code == NE
5750 || BRANCH_COST (optimize_insn_for_speed_p (),
5751 false) > 1))
5753 if (rtx_equal_p (subtarget, op0))
5754 subtarget = 0;
5756 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5757 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5758 OPTAB_WIDEN);
5760 if (tem && code == EQ)
5761 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5765 if (tem && normalizep)
5766 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5767 GET_MODE_BITSIZE (mode) - 1,
5768 subtarget, normalizep == 1);
5770 if (tem)
5772 if (!target)
5774 else if (GET_MODE (tem) != target_mode)
5776 convert_move (target, tem, 0);
5777 tem = target;
5779 else if (!subtarget)
5781 emit_move_insn (target, tem);
5782 tem = target;
5785 else
5786 delete_insns_since (last);
5788 return tem;
5791 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5792 and storing in TARGET. Normally return TARGET.
5793 Return 0 if that cannot be done.
5795 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5796 it is VOIDmode, they cannot both be CONST_INT.
5798 UNSIGNEDP is for the case where we have to widen the operands
5799 to perform the operation. It says to use zero-extension.
5801 NORMALIZEP is 1 if we should convert the result to be either zero
5802 or one. Normalize is -1 if we should convert the result to be
5803 either zero or -1. If NORMALIZEP is zero, the result will be left
5804 "raw" out of the scc insn. */
5807 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5808 machine_mode mode, int unsignedp, int normalizep)
5810 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5811 enum rtx_code rcode;
5812 rtx subtarget;
5813 rtx tem, trueval;
5814 rtx_insn *last;
5816 /* If we compare constants, we shouldn't use a store-flag operation,
5817 but a constant load. We can get there via the vanilla route that
5818 usually generates a compare-branch sequence, but will in this case
5819 fold the comparison to a constant, and thus elide the branch. */
5820 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5821 return NULL_RTX;
5823 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5824 target_mode);
5825 if (tem)
5826 return tem;
5828 /* If we reached here, we can't do this with a scc insn, however there
5829 are some comparisons that can be done in other ways. Don't do any
5830 of these cases if branches are very cheap. */
5831 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5832 return 0;
5834 /* See what we need to return. We can only return a 1, -1, or the
5835 sign bit. */
5837 if (normalizep == 0)
5839 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5840 normalizep = STORE_FLAG_VALUE;
5842 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5844 else
5845 return 0;
5848 last = get_last_insn ();
5850 /* If optimizing, use different pseudo registers for each insn, instead
5851 of reusing the same pseudo. This leads to better CSE, but slows
5852 down the compiler, since there are more pseudos. */
5853 subtarget = (!optimize
5854 && (target_mode == mode)) ? target : NULL_RTX;
5855 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5857 /* For floating-point comparisons, try the reverse comparison or try
5858 changing the "orderedness" of the comparison. */
5859 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5861 enum rtx_code first_code;
5862 bool and_them;
5864 rcode = reverse_condition_maybe_unordered (code);
5865 if (can_compare_p (rcode, mode, ccp_store_flag)
5866 && (code == ORDERED || code == UNORDERED
5867 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5868 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5870 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5871 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5873 /* For the reverse comparison, use either an addition or a XOR. */
5874 if (want_add
5875 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5876 optimize_insn_for_speed_p ()) == 0)
5878 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5879 STORE_FLAG_VALUE, target_mode);
5880 if (tem)
5881 return expand_binop (target_mode, add_optab, tem,
5882 gen_int_mode (normalizep, target_mode),
5883 target, 0, OPTAB_WIDEN);
5885 else if (!want_add
5886 && rtx_cost (trueval, mode, XOR, 1,
5887 optimize_insn_for_speed_p ()) == 0)
5889 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5890 normalizep, target_mode);
5891 if (tem)
5892 return expand_binop (target_mode, xor_optab, tem, trueval,
5893 target, INTVAL (trueval) >= 0,
5894 OPTAB_WIDEN);
5898 delete_insns_since (last);
5900 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5901 if (code == ORDERED || code == UNORDERED)
5902 return 0;
5904 and_them = split_comparison (code, mode, &first_code, &code);
5906 /* If there are no NaNs, the first comparison should always fall through.
5907 Effectively change the comparison to the other one. */
5908 if (!HONOR_NANS (mode))
5910 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5911 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5912 target_mode);
5915 if (!HAVE_conditional_move)
5916 return 0;
5918 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5919 conditional move. */
5920 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5921 normalizep, target_mode);
5922 if (tem == 0)
5923 return 0;
5925 if (and_them)
5926 tem = emit_conditional_move (target, code, op0, op1, mode,
5927 tem, const0_rtx, GET_MODE (tem), 0);
5928 else
5929 tem = emit_conditional_move (target, code, op0, op1, mode,
5930 trueval, tem, GET_MODE (tem), 0);
5932 if (tem == 0)
5933 delete_insns_since (last);
5934 return tem;
5937 /* The remaining tricks only apply to integer comparisons. */
5939 if (GET_MODE_CLASS (mode) == MODE_INT)
5940 return emit_store_flag_int (target, subtarget, code, op0, op1, mode,
5941 unsignedp, normalizep, trueval);
5943 return 0;
5946 /* Like emit_store_flag, but always succeeds. */
5949 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5950 machine_mode mode, int unsignedp, int normalizep)
5952 rtx tem;
5953 rtx_code_label *label;
5954 rtx trueval, falseval;
5956 /* First see if emit_store_flag can do the job. */
5957 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5958 if (tem != 0)
5959 return tem;
5961 if (!target)
5962 target = gen_reg_rtx (word_mode);
5964 /* If this failed, we have to do this with set/compare/jump/set code.
5965 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5966 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5967 if (code == NE
5968 && GET_MODE_CLASS (mode) == MODE_INT
5969 && REG_P (target)
5970 && op0 == target
5971 && op1 == const0_rtx)
5973 label = gen_label_rtx ();
5974 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5975 NULL_RTX, NULL, label,
5976 profile_probability::uninitialized ());
5977 emit_move_insn (target, trueval);
5978 emit_label (label);
5979 return target;
5982 if (!REG_P (target)
5983 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5984 target = gen_reg_rtx (GET_MODE (target));
5986 /* Jump in the right direction if the target cannot implement CODE
5987 but can jump on its reverse condition. */
5988 falseval = const0_rtx;
5989 if (! can_compare_p (code, mode, ccp_jump)
5990 && (! FLOAT_MODE_P (mode)
5991 || code == ORDERED || code == UNORDERED
5992 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5993 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5995 enum rtx_code rcode;
5996 if (FLOAT_MODE_P (mode))
5997 rcode = reverse_condition_maybe_unordered (code);
5998 else
5999 rcode = reverse_condition (code);
6001 /* Canonicalize to UNORDERED for the libcall. */
6002 if (can_compare_p (rcode, mode, ccp_jump)
6003 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6005 falseval = trueval;
6006 trueval = const0_rtx;
6007 code = rcode;
6011 emit_move_insn (target, trueval);
6012 label = gen_label_rtx ();
6013 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6014 label, profile_probability::uninitialized ());
6016 emit_move_insn (target, falseval);
6017 emit_label (label);
6019 return target;
6022 /* Perform possibly multi-word comparison and conditional jump to LABEL
6023 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6024 now a thin wrapper around do_compare_rtx_and_jump. */
6026 static void
6027 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6028 rtx_code_label *label)
6030 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6031 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6032 NULL, label, profile_probability::uninitialized ());