Configury changes for obstack optimization
[official-gcc.git] / gcc / expmed.c
blob97a15130e4fefd2c11154abac51cce4dd1d5e109
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2015 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "tm_p.h"
31 #include "expmed.h"
32 #include "optabs.h"
33 #include "emit-rtl.h"
34 #include "diagnostic-core.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "dojump.h"
38 #include "explow.h"
39 #include "expr.h"
40 #include "langhooks.h"
42 struct target_expmed default_target_expmed;
43 #if SWITCHABLE_TARGET
44 struct target_expmed *this_target_expmed = &default_target_expmed;
45 #endif
47 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 rtx, bool);
52 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
53 unsigned HOST_WIDE_INT,
54 rtx, bool);
55 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT,
59 rtx, bool);
60 static rtx extract_fixed_bit_field (machine_mode, rtx,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
64 unsigned HOST_WIDE_INT,
65 unsigned HOST_WIDE_INT, rtx, int, bool);
66 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
68 unsigned HOST_WIDE_INT, int, bool);
69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
70 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
71 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
74 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
75 The mask is truncated if necessary to the width of mode MODE. The
76 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
78 static inline rtx
79 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
81 return immed_wide_int_const
82 (wi::shifted_mask (bitpos, bitsize, complement,
83 GET_MODE_PRECISION (mode)), mode);
86 /* Test whether a value is zero of a power of two. */
87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
88 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
90 struct init_expmed_rtl
92 rtx reg;
93 rtx plus;
94 rtx neg;
95 rtx mult;
96 rtx sdiv;
97 rtx udiv;
98 rtx sdiv_32;
99 rtx smod_32;
100 rtx wide_mult;
101 rtx wide_lshr;
102 rtx wide_trunc;
103 rtx shift;
104 rtx shift_mult;
105 rtx shift_add;
106 rtx shift_sub0;
107 rtx shift_sub1;
108 rtx zext;
109 rtx trunc;
111 rtx pow2[MAX_BITS_PER_WORD];
112 rtx cint[MAX_BITS_PER_WORD];
115 static void
116 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
117 machine_mode from_mode, bool speed)
119 int to_size, from_size;
120 rtx which;
122 to_size = GET_MODE_PRECISION (to_mode);
123 from_size = GET_MODE_PRECISION (from_mode);
125 /* Most partial integers have a precision less than the "full"
126 integer it requires for storage. In case one doesn't, for
127 comparison purposes here, reduce the bit size by one in that
128 case. */
129 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
130 && exact_log2 (to_size) != -1)
131 to_size --;
132 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
133 && exact_log2 (from_size) != -1)
134 from_size --;
136 /* Assume cost of zero-extend and sign-extend is the same. */
137 which = (to_size < from_size ? all->trunc : all->zext);
139 PUT_MODE (all->reg, from_mode);
140 set_convert_cost (to_mode, from_mode, speed,
141 set_src_cost (which, to_mode, speed));
144 static void
145 init_expmed_one_mode (struct init_expmed_rtl *all,
146 machine_mode mode, int speed)
148 int m, n, mode_bitsize;
149 machine_mode mode_from;
151 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
153 PUT_MODE (all->reg, mode);
154 PUT_MODE (all->plus, mode);
155 PUT_MODE (all->neg, mode);
156 PUT_MODE (all->mult, mode);
157 PUT_MODE (all->sdiv, mode);
158 PUT_MODE (all->udiv, mode);
159 PUT_MODE (all->sdiv_32, mode);
160 PUT_MODE (all->smod_32, mode);
161 PUT_MODE (all->wide_trunc, mode);
162 PUT_MODE (all->shift, mode);
163 PUT_MODE (all->shift_mult, mode);
164 PUT_MODE (all->shift_add, mode);
165 PUT_MODE (all->shift_sub0, mode);
166 PUT_MODE (all->shift_sub1, mode);
167 PUT_MODE (all->zext, mode);
168 PUT_MODE (all->trunc, mode);
170 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
171 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
172 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
173 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
174 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
176 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
177 <= 2 * add_cost (speed, mode)));
178 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
179 <= 4 * add_cost (speed, mode)));
181 set_shift_cost (speed, mode, 0, 0);
183 int cost = add_cost (speed, mode);
184 set_shiftadd_cost (speed, mode, 0, cost);
185 set_shiftsub0_cost (speed, mode, 0, cost);
186 set_shiftsub1_cost (speed, mode, 0, cost);
189 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
190 for (m = 1; m < n; m++)
192 XEXP (all->shift, 1) = all->cint[m];
193 XEXP (all->shift_mult, 1) = all->pow2[m];
195 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
196 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
197 speed));
198 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
199 speed));
200 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
201 speed));
204 if (SCALAR_INT_MODE_P (mode))
206 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
207 mode_from = (machine_mode)(mode_from + 1))
208 init_expmed_one_conv (all, mode, mode_from, speed);
210 if (GET_MODE_CLASS (mode) == MODE_INT)
212 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
213 if (wider_mode != VOIDmode)
215 PUT_MODE (all->zext, wider_mode);
216 PUT_MODE (all->wide_mult, wider_mode);
217 PUT_MODE (all->wide_lshr, wider_mode);
218 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
220 set_mul_widen_cost (speed, wider_mode,
221 set_src_cost (all->wide_mult, wider_mode, speed));
222 set_mul_highpart_cost (speed, mode,
223 set_src_cost (all->wide_trunc, mode, speed));
228 void
229 init_expmed (void)
231 struct init_expmed_rtl all;
232 machine_mode mode = QImode;
233 int m, speed;
235 memset (&all, 0, sizeof all);
236 for (m = 1; m < MAX_BITS_PER_WORD; m++)
238 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
239 all.cint[m] = GEN_INT (m);
242 /* Avoid using hard regs in ways which may be unsupported. */
243 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
244 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
245 all.neg = gen_rtx_NEG (mode, all.reg);
246 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
247 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
248 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
249 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
250 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
251 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
252 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
253 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
254 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
255 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
256 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
257 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
258 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
259 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
260 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
262 for (speed = 0; speed < 2; speed++)
264 crtl->maybe_hot_insn_p = speed;
265 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
267 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
268 mode = (machine_mode)(mode + 1))
269 init_expmed_one_mode (&all, mode, speed);
271 if (MIN_MODE_PARTIAL_INT != VOIDmode)
272 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
273 mode = (machine_mode)(mode + 1))
274 init_expmed_one_mode (&all, mode, speed);
276 if (MIN_MODE_VECTOR_INT != VOIDmode)
277 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
278 mode = (machine_mode)(mode + 1))
279 init_expmed_one_mode (&all, mode, speed);
282 if (alg_hash_used_p ())
284 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
285 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
287 else
288 set_alg_hash_used_p (true);
289 default_rtl_profile ();
291 ggc_free (all.trunc);
292 ggc_free (all.shift_sub1);
293 ggc_free (all.shift_sub0);
294 ggc_free (all.shift_add);
295 ggc_free (all.shift_mult);
296 ggc_free (all.shift);
297 ggc_free (all.wide_trunc);
298 ggc_free (all.wide_lshr);
299 ggc_free (all.wide_mult);
300 ggc_free (all.zext);
301 ggc_free (all.smod_32);
302 ggc_free (all.sdiv_32);
303 ggc_free (all.udiv);
304 ggc_free (all.sdiv);
305 ggc_free (all.mult);
306 ggc_free (all.neg);
307 ggc_free (all.plus);
308 ggc_free (all.reg);
311 /* Return an rtx representing minus the value of X.
312 MODE is the intended mode of the result,
313 useful if X is a CONST_INT. */
316 negate_rtx (machine_mode mode, rtx x)
318 rtx result = simplify_unary_operation (NEG, mode, x, mode);
320 if (result == 0)
321 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
323 return result;
326 /* Whether reverse storage order is supported on the target. */
327 static int reverse_storage_order_supported = -1;
329 /* Check whether reverse storage order is supported on the target. */
331 static void
332 check_reverse_storage_order_support (void)
334 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
336 reverse_storage_order_supported = 0;
337 sorry ("reverse scalar storage order");
339 else
340 reverse_storage_order_supported = 1;
343 /* Whether reverse FP storage order is supported on the target. */
344 static int reverse_float_storage_order_supported = -1;
346 /* Check whether reverse FP storage order is supported on the target. */
348 static void
349 check_reverse_float_storage_order_support (void)
351 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
353 reverse_float_storage_order_supported = 0;
354 sorry ("reverse floating-point scalar storage order");
356 else
357 reverse_float_storage_order_supported = 1;
360 /* Return an rtx representing value of X with reverse storage order.
361 MODE is the intended mode of the result,
362 useful if X is a CONST_INT. */
365 flip_storage_order (enum machine_mode mode, rtx x)
367 enum machine_mode int_mode;
368 rtx result;
370 if (mode == QImode)
371 return x;
373 if (COMPLEX_MODE_P (mode))
375 rtx real = read_complex_part (x, false);
376 rtx imag = read_complex_part (x, true);
378 real = flip_storage_order (GET_MODE_INNER (mode), real);
379 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
381 return gen_rtx_CONCAT (mode, real, imag);
384 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
385 check_reverse_storage_order_support ();
387 if (SCALAR_INT_MODE_P (mode))
388 int_mode = mode;
389 else
391 if (FLOAT_MODE_P (mode)
392 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
393 check_reverse_float_storage_order_support ();
395 int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
396 if (int_mode == BLKmode)
398 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
399 return x;
401 x = gen_lowpart (int_mode, x);
404 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
405 if (result == 0)
406 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
408 if (int_mode != mode)
409 result = gen_lowpart (mode, result);
411 return result;
414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
415 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
416 If MODE is BLKmode, return a reference to every byte in the bitfield.
417 Set *NEW_BITNUM to the bit position of the field within the new memory. */
419 static rtx
420 narrow_bit_field_mem (rtx mem, machine_mode mode,
421 unsigned HOST_WIDE_INT bitsize,
422 unsigned HOST_WIDE_INT bitnum,
423 unsigned HOST_WIDE_INT *new_bitnum)
425 if (mode == BLKmode)
427 *new_bitnum = bitnum % BITS_PER_UNIT;
428 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
429 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
430 / BITS_PER_UNIT);
431 return adjust_bitfield_address_size (mem, mode, offset, size);
433 else
435 unsigned int unit = GET_MODE_BITSIZE (mode);
436 *new_bitnum = bitnum % unit;
437 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
438 return adjust_bitfield_address (mem, mode, offset);
442 /* The caller wants to perform insertion or extraction PATTERN on a
443 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
444 BITREGION_START and BITREGION_END are as for store_bit_field
445 and FIELDMODE is the natural mode of the field.
447 Search for a mode that is compatible with the memory access
448 restrictions and (where applicable) with a register insertion or
449 extraction. Return the new memory on success, storing the adjusted
450 bit position in *NEW_BITNUM. Return null otherwise. */
452 static rtx
453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
454 rtx op0, HOST_WIDE_INT bitsize,
455 HOST_WIDE_INT bitnum,
456 unsigned HOST_WIDE_INT bitregion_start,
457 unsigned HOST_WIDE_INT bitregion_end,
458 machine_mode fieldmode,
459 unsigned HOST_WIDE_INT *new_bitnum)
461 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
462 bitregion_end, MEM_ALIGN (op0),
463 MEM_VOLATILE_P (op0));
464 machine_mode best_mode;
465 if (iter.next_mode (&best_mode))
467 /* We can use a memory in BEST_MODE. See whether this is true for
468 any wider modes. All other things being equal, we prefer to
469 use the widest mode possible because it tends to expose more
470 CSE opportunities. */
471 if (!iter.prefer_smaller_modes ())
473 /* Limit the search to the mode required by the corresponding
474 register insertion or extraction instruction, if any. */
475 machine_mode limit_mode = word_mode;
476 extraction_insn insn;
477 if (get_best_reg_extraction_insn (&insn, pattern,
478 GET_MODE_BITSIZE (best_mode),
479 fieldmode))
480 limit_mode = insn.field_mode;
482 machine_mode wider_mode;
483 while (iter.next_mode (&wider_mode)
484 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
485 best_mode = wider_mode;
487 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
488 new_bitnum);
490 return NULL_RTX;
493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
494 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
495 offset is then BITNUM / BITS_PER_UNIT. */
497 static bool
498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
499 unsigned HOST_WIDE_INT bitsize,
500 machine_mode struct_mode)
502 if (BYTES_BIG_ENDIAN)
503 return (bitnum % BITS_PER_UNIT == 0
504 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
505 || (bitnum + bitsize) % BITS_PER_WORD == 0));
506 else
507 return bitnum % BITS_PER_WORD == 0;
510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
511 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
512 Return false if the access would touch memory outside the range
513 BITREGION_START to BITREGION_END for conformance to the C++ memory
514 model. */
516 static bool
517 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
518 unsigned HOST_WIDE_INT bitnum,
519 machine_mode fieldmode,
520 unsigned HOST_WIDE_INT bitregion_start,
521 unsigned HOST_WIDE_INT bitregion_end)
523 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
525 /* -fstrict-volatile-bitfields must be enabled and we must have a
526 volatile MEM. */
527 if (!MEM_P (op0)
528 || !MEM_VOLATILE_P (op0)
529 || flag_strict_volatile_bitfields <= 0)
530 return false;
532 /* Non-integral modes likely only happen with packed structures.
533 Punt. */
534 if (!SCALAR_INT_MODE_P (fieldmode))
535 return false;
537 /* The bit size must not be larger than the field mode, and
538 the field mode must not be larger than a word. */
539 if (bitsize > modesize || modesize > BITS_PER_WORD)
540 return false;
542 /* Check for cases of unaligned fields that must be split. */
543 if (bitnum % modesize + bitsize > modesize)
544 return false;
546 /* The memory must be sufficiently aligned for a MODESIZE access.
547 This condition guarantees, that the memory access will not
548 touch anything after the end of the structure. */
549 if (MEM_ALIGN (op0) < modesize)
550 return false;
552 /* Check for cases where the C++ memory model applies. */
553 if (bitregion_end != 0
554 && (bitnum - bitnum % modesize < bitregion_start
555 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
556 return false;
558 return true;
561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
562 bit number BITNUM can be treated as a simple value of mode MODE. */
564 static bool
565 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
566 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
568 return (MEM_P (op0)
569 && bitnum % BITS_PER_UNIT == 0
570 && bitsize == GET_MODE_BITSIZE (mode)
571 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
572 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
573 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
576 /* Try to use instruction INSV to store VALUE into a field of OP0.
577 BITSIZE and BITNUM are as for store_bit_field. */
579 static bool
580 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
581 unsigned HOST_WIDE_INT bitsize,
582 unsigned HOST_WIDE_INT bitnum,
583 rtx value)
585 struct expand_operand ops[4];
586 rtx value1;
587 rtx xop0 = op0;
588 rtx_insn *last = get_last_insn ();
589 bool copy_back = false;
591 machine_mode op_mode = insv->field_mode;
592 unsigned int unit = GET_MODE_BITSIZE (op_mode);
593 if (bitsize == 0 || bitsize > unit)
594 return false;
596 if (MEM_P (xop0))
597 /* Get a reference to the first byte of the field. */
598 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
599 &bitnum);
600 else
602 /* Convert from counting within OP0 to counting in OP_MODE. */
603 if (BYTES_BIG_ENDIAN)
604 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
606 /* If xop0 is a register, we need it in OP_MODE
607 to make it acceptable to the format of insv. */
608 if (GET_CODE (xop0) == SUBREG)
609 /* We can't just change the mode, because this might clobber op0,
610 and we will need the original value of op0 if insv fails. */
611 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
612 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
613 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
616 /* If the destination is a paradoxical subreg such that we need a
617 truncate to the inner mode, perform the insertion on a temporary and
618 truncate the result to the original destination. Note that we can't
619 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
620 X) 0)) is (reg:N X). */
621 if (GET_CODE (xop0) == SUBREG
622 && REG_P (SUBREG_REG (xop0))
623 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
624 op_mode))
626 rtx tem = gen_reg_rtx (op_mode);
627 emit_move_insn (tem, xop0);
628 xop0 = tem;
629 copy_back = true;
632 /* There are similar overflow check at the start of store_bit_field_1,
633 but that only check the situation where the field lies completely
634 outside the register, while there do have situation where the field
635 lies partialy in the register, we need to adjust bitsize for this
636 partial overflow situation. Without this fix, pr48335-2.c on big-endian
637 will broken on those arch support bit insert instruction, like arm, aarch64
638 etc. */
639 if (bitsize + bitnum > unit && bitnum < unit)
641 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
642 "destination object, data truncated into %wu-bit",
643 bitsize, unit - bitnum);
644 bitsize = unit - bitnum;
647 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
648 "backwards" from the size of the unit we are inserting into.
649 Otherwise, we count bits from the most significant on a
650 BYTES/BITS_BIG_ENDIAN machine. */
652 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
653 bitnum = unit - bitsize - bitnum;
655 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
656 value1 = value;
657 if (GET_MODE (value) != op_mode)
659 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
661 /* Optimization: Don't bother really extending VALUE
662 if it has all the bits we will actually use. However,
663 if we must narrow it, be sure we do it correctly. */
665 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
667 rtx tmp;
669 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
670 if (! tmp)
671 tmp = simplify_gen_subreg (op_mode,
672 force_reg (GET_MODE (value),
673 value1),
674 GET_MODE (value), 0);
675 value1 = tmp;
677 else
678 value1 = gen_lowpart (op_mode, value1);
680 else if (CONST_INT_P (value))
681 value1 = gen_int_mode (INTVAL (value), op_mode);
682 else
683 /* Parse phase is supposed to make VALUE's data type
684 match that of the component reference, which is a type
685 at least as wide as the field; so VALUE should have
686 a mode that corresponds to that type. */
687 gcc_assert (CONSTANT_P (value));
690 create_fixed_operand (&ops[0], xop0);
691 create_integer_operand (&ops[1], bitsize);
692 create_integer_operand (&ops[2], bitnum);
693 create_input_operand (&ops[3], value1, op_mode);
694 if (maybe_expand_insn (insv->icode, 4, ops))
696 if (copy_back)
697 convert_move (op0, xop0, true);
698 return true;
700 delete_insns_since (last);
701 return false;
704 /* A subroutine of store_bit_field, with the same arguments. Return true
705 if the operation could be implemented.
707 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
708 no other way of implementing the operation. If FALLBACK_P is false,
709 return false instead. */
711 static bool
712 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
713 unsigned HOST_WIDE_INT bitnum,
714 unsigned HOST_WIDE_INT bitregion_start,
715 unsigned HOST_WIDE_INT bitregion_end,
716 machine_mode fieldmode,
717 rtx value, bool reverse, bool fallback_p)
719 rtx op0 = str_rtx;
720 rtx orig_value;
722 while (GET_CODE (op0) == SUBREG)
724 /* The following line once was done only if WORDS_BIG_ENDIAN,
725 but I think that is a mistake. WORDS_BIG_ENDIAN is
726 meaningful at a much higher level; when structures are copied
727 between memory and regs, the higher-numbered regs
728 always get higher addresses. */
729 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
730 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
731 int byte_offset = 0;
733 /* Paradoxical subregs need special handling on big-endian machines. */
734 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
736 int difference = inner_mode_size - outer_mode_size;
738 if (WORDS_BIG_ENDIAN)
739 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
740 if (BYTES_BIG_ENDIAN)
741 byte_offset += difference % UNITS_PER_WORD;
743 else
744 byte_offset = SUBREG_BYTE (op0);
746 bitnum += byte_offset * BITS_PER_UNIT;
747 op0 = SUBREG_REG (op0);
750 /* No action is needed if the target is a register and if the field
751 lies completely outside that register. This can occur if the source
752 code contains an out-of-bounds access to a small array. */
753 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
754 return true;
756 /* Use vec_set patterns for inserting parts of vectors whenever
757 available. */
758 if (VECTOR_MODE_P (GET_MODE (op0))
759 && !MEM_P (op0)
760 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
761 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
762 && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
763 && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
765 struct expand_operand ops[3];
766 machine_mode outermode = GET_MODE (op0);
767 machine_mode innermode = GET_MODE_INNER (outermode);
768 enum insn_code icode = optab_handler (vec_set_optab, outermode);
769 int pos = bitnum / GET_MODE_BITSIZE (innermode);
771 create_fixed_operand (&ops[0], op0);
772 create_input_operand (&ops[1], value, innermode);
773 create_integer_operand (&ops[2], pos);
774 if (maybe_expand_insn (icode, 3, ops))
775 return true;
778 /* If the target is a register, overwriting the entire object, or storing
779 a full-word or multi-word field can be done with just a SUBREG. */
780 if (!MEM_P (op0)
781 && bitsize == GET_MODE_BITSIZE (fieldmode)
782 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
783 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
785 /* Use the subreg machinery either to narrow OP0 to the required
786 words or to cope with mode punning between equal-sized modes.
787 In the latter case, use subreg on the rhs side, not lhs. */
788 rtx sub;
790 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
792 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
793 if (sub)
795 if (reverse)
796 sub = flip_storage_order (GET_MODE (op0), sub);
797 emit_move_insn (op0, sub);
798 return true;
801 else
803 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
804 bitnum / BITS_PER_UNIT);
805 if (sub)
807 if (reverse)
808 value = flip_storage_order (fieldmode, value);
809 emit_move_insn (sub, value);
810 return true;
815 /* If the target is memory, storing any naturally aligned field can be
816 done with a simple store. For targets that support fast unaligned
817 memory, any naturally sized, unit aligned field can be done directly. */
818 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
820 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
821 if (reverse)
822 value = flip_storage_order (fieldmode, value);
823 emit_move_insn (op0, value);
824 return true;
827 /* Make sure we are playing with integral modes. Pun with subregs
828 if we aren't. This must come after the entire register case above,
829 since that case is valid for any mode. The following cases are only
830 valid for integral modes. */
832 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
833 if (imode != GET_MODE (op0))
835 if (MEM_P (op0))
836 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
837 else
839 gcc_assert (imode != BLKmode);
840 op0 = gen_lowpart (imode, op0);
845 /* Storing an lsb-aligned field in a register
846 can be done with a movstrict instruction. */
848 if (!MEM_P (op0)
849 && !reverse
850 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
851 && bitsize == GET_MODE_BITSIZE (fieldmode)
852 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
854 struct expand_operand ops[2];
855 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
856 rtx arg0 = op0;
857 unsigned HOST_WIDE_INT subreg_off;
859 if (GET_CODE (arg0) == SUBREG)
861 /* Else we've got some float mode source being extracted into
862 a different float mode destination -- this combination of
863 subregs results in Severe Tire Damage. */
864 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
865 || GET_MODE_CLASS (fieldmode) == MODE_INT
866 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
867 arg0 = SUBREG_REG (arg0);
870 subreg_off = bitnum / BITS_PER_UNIT;
871 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
873 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
875 create_fixed_operand (&ops[0], arg0);
876 /* Shrink the source operand to FIELDMODE. */
877 create_convert_operand_to (&ops[1], value, fieldmode, false);
878 if (maybe_expand_insn (icode, 2, ops))
879 return true;
883 /* Handle fields bigger than a word. */
885 if (bitsize > BITS_PER_WORD)
887 /* Here we transfer the words of the field
888 in the order least significant first.
889 This is because the most significant word is the one which may
890 be less than full.
891 However, only do that if the value is not BLKmode. */
893 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
894 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
895 unsigned int i;
896 rtx_insn *last;
898 /* This is the mode we must force value to, so that there will be enough
899 subwords to extract. Note that fieldmode will often (always?) be
900 VOIDmode, because that is what store_field uses to indicate that this
901 is a bit field, but passing VOIDmode to operand_subword_force
902 is not allowed. */
903 fieldmode = GET_MODE (value);
904 if (fieldmode == VOIDmode)
905 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
907 last = get_last_insn ();
908 for (i = 0; i < nwords; i++)
910 /* If I is 0, use the low-order word in both field and target;
911 if I is 1, use the next to lowest word; and so on. */
912 unsigned int wordnum = (backwards
913 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
914 - i - 1
915 : i);
916 unsigned int bit_offset = (backwards ^ reverse
917 ? MAX ((int) bitsize - ((int) i + 1)
918 * BITS_PER_WORD,
920 : (int) i * BITS_PER_WORD);
921 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
922 unsigned HOST_WIDE_INT new_bitsize =
923 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
925 /* If the remaining chunk doesn't have full wordsize we have
926 to make sure that for big-endian machines the higher order
927 bits are used. */
928 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
929 value_word = simplify_expand_binop (word_mode, lshr_optab,
930 value_word,
931 GEN_INT (BITS_PER_WORD
932 - new_bitsize),
933 NULL_RTX, true,
934 OPTAB_LIB_WIDEN);
936 if (!store_bit_field_1 (op0, new_bitsize,
937 bitnum + bit_offset,
938 bitregion_start, bitregion_end,
939 word_mode,
940 value_word, reverse, fallback_p))
942 delete_insns_since (last);
943 return false;
946 return true;
949 /* If VALUE has a floating-point or complex mode, access it as an
950 integer of the corresponding size. This can occur on a machine
951 with 64 bit registers that uses SFmode for float. It can also
952 occur for unaligned float or complex fields. */
953 orig_value = value;
954 if (GET_MODE (value) != VOIDmode
955 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
956 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
958 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
959 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
962 /* If OP0 is a multi-word register, narrow it to the affected word.
963 If the region spans two words, defer to store_split_bit_field. */
964 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
966 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
967 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
968 gcc_assert (op0);
969 bitnum %= BITS_PER_WORD;
970 if (bitnum + bitsize > BITS_PER_WORD)
972 if (!fallback_p)
973 return false;
975 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
976 bitregion_end, value, reverse);
977 return true;
981 /* From here on we can assume that the field to be stored in fits
982 within a word. If the destination is a register, it too fits
983 in a word. */
985 extraction_insn insv;
986 if (!MEM_P (op0)
987 && !reverse
988 && get_best_reg_extraction_insn (&insv, EP_insv,
989 GET_MODE_BITSIZE (GET_MODE (op0)),
990 fieldmode)
991 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
992 return true;
994 /* If OP0 is a memory, try copying it to a register and seeing if a
995 cheap register alternative is available. */
996 if (MEM_P (op0) && !reverse)
998 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
999 fieldmode)
1000 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1001 return true;
1003 rtx_insn *last = get_last_insn ();
1005 /* Try loading part of OP0 into a register, inserting the bitfield
1006 into that, and then copying the result back to OP0. */
1007 unsigned HOST_WIDE_INT bitpos;
1008 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1009 bitregion_start, bitregion_end,
1010 fieldmode, &bitpos);
1011 if (xop0)
1013 rtx tempreg = copy_to_reg (xop0);
1014 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1015 bitregion_start, bitregion_end,
1016 fieldmode, orig_value, reverse, false))
1018 emit_move_insn (xop0, tempreg);
1019 return true;
1021 delete_insns_since (last);
1025 if (!fallback_p)
1026 return false;
1028 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1029 bitregion_end, value, reverse);
1030 return true;
1033 /* Generate code to store value from rtx VALUE
1034 into a bit-field within structure STR_RTX
1035 containing BITSIZE bits starting at bit BITNUM.
1037 BITREGION_START is bitpos of the first bitfield in this region.
1038 BITREGION_END is the bitpos of the ending bitfield in this region.
1039 These two fields are 0, if the C++ memory model does not apply,
1040 or we are not interested in keeping track of bitfield regions.
1042 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1044 If REVERSE is true, the store is to be done in reverse order. */
1046 void
1047 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1048 unsigned HOST_WIDE_INT bitnum,
1049 unsigned HOST_WIDE_INT bitregion_start,
1050 unsigned HOST_WIDE_INT bitregion_end,
1051 machine_mode fieldmode,
1052 rtx value, bool reverse)
1054 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1055 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1056 bitregion_start, bitregion_end))
1058 /* Storing of a full word can be done with a simple store.
1059 We know here that the field can be accessed with one single
1060 instruction. For targets that support unaligned memory,
1061 an unaligned access may be necessary. */
1062 if (bitsize == GET_MODE_BITSIZE (fieldmode))
1064 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1065 bitnum / BITS_PER_UNIT);
1066 if (reverse)
1067 value = flip_storage_order (fieldmode, value);
1068 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1069 emit_move_insn (str_rtx, value);
1071 else
1073 rtx temp;
1075 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1076 &bitnum);
1077 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1078 temp = copy_to_reg (str_rtx);
1079 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1080 fieldmode, value, reverse, true))
1081 gcc_unreachable ();
1083 emit_move_insn (str_rtx, temp);
1086 return;
1089 /* Under the C++0x memory model, we must not touch bits outside the
1090 bit region. Adjust the address to start at the beginning of the
1091 bit region. */
1092 if (MEM_P (str_rtx) && bitregion_start > 0)
1094 machine_mode bestmode;
1095 HOST_WIDE_INT offset, size;
1097 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1099 offset = bitregion_start / BITS_PER_UNIT;
1100 bitnum -= bitregion_start;
1101 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1102 bitregion_end -= bitregion_start;
1103 bitregion_start = 0;
1104 bestmode = get_best_mode (bitsize, bitnum,
1105 bitregion_start, bitregion_end,
1106 MEM_ALIGN (str_rtx), VOIDmode,
1107 MEM_VOLATILE_P (str_rtx));
1108 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1111 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1112 bitregion_start, bitregion_end,
1113 fieldmode, value, reverse, true))
1114 gcc_unreachable ();
1117 /* Use shifts and boolean operations to store VALUE into a bit field of
1118 width BITSIZE in OP0, starting at bit BITNUM.
1120 If REVERSE is true, the store is to be done in reverse order. */
1122 static void
1123 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1124 unsigned HOST_WIDE_INT bitnum,
1125 unsigned HOST_WIDE_INT bitregion_start,
1126 unsigned HOST_WIDE_INT bitregion_end,
1127 rtx value, bool reverse)
1129 /* There is a case not handled here:
1130 a structure with a known alignment of just a halfword
1131 and a field split across two aligned halfwords within the structure.
1132 Or likewise a structure with a known alignment of just a byte
1133 and a field split across two bytes.
1134 Such cases are not supposed to be able to occur. */
1136 if (MEM_P (op0))
1138 machine_mode mode = GET_MODE (op0);
1139 if (GET_MODE_BITSIZE (mode) == 0
1140 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1141 mode = word_mode;
1142 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1143 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1145 if (mode == VOIDmode)
1147 /* The only way this should occur is if the field spans word
1148 boundaries. */
1149 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1150 bitregion_end, value, reverse);
1151 return;
1154 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1157 store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1160 /* Helper function for store_fixed_bit_field, stores
1161 the bit field always using the MODE of OP0. */
1163 static void
1164 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1165 unsigned HOST_WIDE_INT bitnum,
1166 rtx value, bool reverse)
1168 machine_mode mode;
1169 rtx temp;
1170 int all_zero = 0;
1171 int all_one = 0;
1173 mode = GET_MODE (op0);
1174 gcc_assert (SCALAR_INT_MODE_P (mode));
1176 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1177 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1179 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1180 /* BITNUM is the distance between our msb
1181 and that of the containing datum.
1182 Convert it to the distance from the lsb. */
1183 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1185 /* Now BITNUM is always the distance between our lsb
1186 and that of OP0. */
1188 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1189 we must first convert its mode to MODE. */
1191 if (CONST_INT_P (value))
1193 unsigned HOST_WIDE_INT v = UINTVAL (value);
1195 if (bitsize < HOST_BITS_PER_WIDE_INT)
1196 v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1198 if (v == 0)
1199 all_zero = 1;
1200 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1201 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1202 || (bitsize == HOST_BITS_PER_WIDE_INT
1203 && v == (unsigned HOST_WIDE_INT) -1))
1204 all_one = 1;
1206 value = lshift_value (mode, v, bitnum);
1208 else
1210 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1211 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1213 if (GET_MODE (value) != mode)
1214 value = convert_to_mode (mode, value, 1);
1216 if (must_and)
1217 value = expand_binop (mode, and_optab, value,
1218 mask_rtx (mode, 0, bitsize, 0),
1219 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1220 if (bitnum > 0)
1221 value = expand_shift (LSHIFT_EXPR, mode, value,
1222 bitnum, NULL_RTX, 1);
1225 if (reverse)
1226 value = flip_storage_order (mode, value);
1228 /* Now clear the chosen bits in OP0,
1229 except that if VALUE is -1 we need not bother. */
1230 /* We keep the intermediates in registers to allow CSE to combine
1231 consecutive bitfield assignments. */
1233 temp = force_reg (mode, op0);
1235 if (! all_one)
1237 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1238 if (reverse)
1239 mask = flip_storage_order (mode, mask);
1240 temp = expand_binop (mode, and_optab, temp, mask,
1241 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1242 temp = force_reg (mode, temp);
1245 /* Now logical-or VALUE into OP0, unless it is zero. */
1247 if (! all_zero)
1249 temp = expand_binop (mode, ior_optab, temp, value,
1250 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1251 temp = force_reg (mode, temp);
1254 if (op0 != temp)
1256 op0 = copy_rtx (op0);
1257 emit_move_insn (op0, temp);
1261 /* Store a bit field that is split across multiple accessible memory objects.
1263 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1264 BITSIZE is the field width; BITPOS the position of its first bit
1265 (within the word).
1266 VALUE is the value to store.
1268 If REVERSE is true, the store is to be done in reverse order.
1270 This does not yet handle fields wider than BITS_PER_WORD. */
1272 static void
1273 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1274 unsigned HOST_WIDE_INT bitpos,
1275 unsigned HOST_WIDE_INT bitregion_start,
1276 unsigned HOST_WIDE_INT bitregion_end,
1277 rtx value, bool reverse)
1279 unsigned int unit, total_bits, bitsdone = 0;
1281 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1282 much at a time. */
1283 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1284 unit = BITS_PER_WORD;
1285 else
1286 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1288 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1289 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1290 again, and we will mutually recurse forever. */
1291 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1292 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1294 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1295 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1296 that VALUE might be a floating-point constant. */
1297 if (CONSTANT_P (value) && !CONST_INT_P (value))
1299 rtx word = gen_lowpart_common (word_mode, value);
1301 if (word && (value != word))
1302 value = word;
1303 else
1304 value = gen_lowpart_common (word_mode,
1305 force_reg (GET_MODE (value) != VOIDmode
1306 ? GET_MODE (value)
1307 : word_mode, value));
1310 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1312 while (bitsdone < bitsize)
1314 unsigned HOST_WIDE_INT thissize;
1315 unsigned HOST_WIDE_INT thispos;
1316 unsigned HOST_WIDE_INT offset;
1317 rtx part, word;
1319 offset = (bitpos + bitsdone) / unit;
1320 thispos = (bitpos + bitsdone) % unit;
1322 /* When region of bytes we can touch is restricted, decrease
1323 UNIT close to the end of the region as needed. If op0 is a REG
1324 or SUBREG of REG, don't do this, as there can't be data races
1325 on a register and we can expand shorter code in some cases. */
1326 if (bitregion_end
1327 && unit > BITS_PER_UNIT
1328 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1329 && !REG_P (op0)
1330 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1332 unit = unit / 2;
1333 continue;
1336 /* THISSIZE must not overrun a word boundary. Otherwise,
1337 store_fixed_bit_field will call us again, and we will mutually
1338 recurse forever. */
1339 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1340 thissize = MIN (thissize, unit - thispos);
1342 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1344 /* Fetch successively less significant portions. */
1345 if (CONST_INT_P (value))
1346 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1347 >> (bitsize - bitsdone - thissize))
1348 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1349 /* Likewise, but the source is little-endian. */
1350 else if (reverse)
1351 part = extract_fixed_bit_field (word_mode, value, thissize,
1352 bitsize - bitsdone - thissize,
1353 NULL_RTX, 1, false);
1354 else
1356 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1357 /* The args are chosen so that the last part includes the
1358 lsb. Give extract_bit_field the value it needs (with
1359 endianness compensation) to fetch the piece we want. */
1360 part = extract_fixed_bit_field (word_mode, value, thissize,
1361 total_bits - bitsize + bitsdone,
1362 NULL_RTX, 1, false);
1365 else
1367 /* Fetch successively more significant portions. */
1368 if (CONST_INT_P (value))
1369 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1370 >> bitsdone)
1371 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1372 /* Likewise, but the source is big-endian. */
1373 else if (reverse)
1374 part = extract_fixed_bit_field (word_mode, value, thissize,
1375 total_bits - bitsdone - thissize,
1376 NULL_RTX, 1, false);
1377 else
1378 part = extract_fixed_bit_field (word_mode, value, thissize,
1379 bitsdone, NULL_RTX, 1, false);
1382 /* If OP0 is a register, then handle OFFSET here.
1384 When handling multiword bitfields, extract_bit_field may pass
1385 down a word_mode SUBREG of a larger REG for a bitfield that actually
1386 crosses a word boundary. Thus, for a SUBREG, we must find
1387 the current word starting from the base register. */
1388 if (GET_CODE (op0) == SUBREG)
1390 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1391 + (offset * unit / BITS_PER_WORD);
1392 machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1393 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1394 word = word_offset ? const0_rtx : op0;
1395 else
1396 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1397 GET_MODE (SUBREG_REG (op0)));
1398 offset &= BITS_PER_WORD / unit - 1;
1400 else if (REG_P (op0))
1402 machine_mode op0_mode = GET_MODE (op0);
1403 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1404 word = offset ? const0_rtx : op0;
1405 else
1406 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1407 GET_MODE (op0));
1408 offset &= BITS_PER_WORD / unit - 1;
1410 else
1411 word = op0;
1413 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1414 it is just an out-of-bounds access. Ignore it. */
1415 if (word != const0_rtx)
1416 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1417 bitregion_start, bitregion_end, part,
1418 reverse);
1419 bitsdone += thissize;
1423 /* A subroutine of extract_bit_field_1 that converts return value X
1424 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1425 to extract_bit_field. */
1427 static rtx
1428 convert_extracted_bit_field (rtx x, machine_mode mode,
1429 machine_mode tmode, bool unsignedp)
1431 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1432 return x;
1434 /* If the x mode is not a scalar integral, first convert to the
1435 integer mode of that size and then access it as a floating-point
1436 value via a SUBREG. */
1437 if (!SCALAR_INT_MODE_P (tmode))
1439 machine_mode smode;
1441 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1442 x = convert_to_mode (smode, x, unsignedp);
1443 x = force_reg (smode, x);
1444 return gen_lowpart (tmode, x);
1447 return convert_to_mode (tmode, x, unsignedp);
1450 /* Try to use an ext(z)v pattern to extract a field from OP0.
1451 Return the extracted value on success, otherwise return null.
1452 EXT_MODE is the mode of the extraction and the other arguments
1453 are as for extract_bit_field. */
1455 static rtx
1456 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1457 unsigned HOST_WIDE_INT bitsize,
1458 unsigned HOST_WIDE_INT bitnum,
1459 int unsignedp, rtx target,
1460 machine_mode mode, machine_mode tmode)
1462 struct expand_operand ops[4];
1463 rtx spec_target = target;
1464 rtx spec_target_subreg = 0;
1465 machine_mode ext_mode = extv->field_mode;
1466 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1468 if (bitsize == 0 || unit < bitsize)
1469 return NULL_RTX;
1471 if (MEM_P (op0))
1472 /* Get a reference to the first byte of the field. */
1473 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1474 &bitnum);
1475 else
1477 /* Convert from counting within OP0 to counting in EXT_MODE. */
1478 if (BYTES_BIG_ENDIAN)
1479 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1481 /* If op0 is a register, we need it in EXT_MODE to make it
1482 acceptable to the format of ext(z)v. */
1483 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1484 return NULL_RTX;
1485 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1486 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1489 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1490 "backwards" from the size of the unit we are extracting from.
1491 Otherwise, we count bits from the most significant on a
1492 BYTES/BITS_BIG_ENDIAN machine. */
1494 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1495 bitnum = unit - bitsize - bitnum;
1497 if (target == 0)
1498 target = spec_target = gen_reg_rtx (tmode);
1500 if (GET_MODE (target) != ext_mode)
1502 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1503 between the mode of the extraction (word_mode) and the target
1504 mode. Instead, create a temporary and use convert_move to set
1505 the target. */
1506 if (REG_P (target)
1507 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1509 target = gen_lowpart (ext_mode, target);
1510 if (GET_MODE_PRECISION (ext_mode)
1511 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1512 spec_target_subreg = target;
1514 else
1515 target = gen_reg_rtx (ext_mode);
1518 create_output_operand (&ops[0], target, ext_mode);
1519 create_fixed_operand (&ops[1], op0);
1520 create_integer_operand (&ops[2], bitsize);
1521 create_integer_operand (&ops[3], bitnum);
1522 if (maybe_expand_insn (extv->icode, 4, ops))
1524 target = ops[0].value;
1525 if (target == spec_target)
1526 return target;
1527 if (target == spec_target_subreg)
1528 return spec_target;
1529 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1531 return NULL_RTX;
1534 /* A subroutine of extract_bit_field, with the same arguments.
1535 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1536 if we can find no other means of implementing the operation.
1537 if FALLBACK_P is false, return NULL instead. */
1539 static rtx
1540 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1541 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1542 machine_mode mode, machine_mode tmode,
1543 bool reverse, bool fallback_p)
1545 rtx op0 = str_rtx;
1546 machine_mode int_mode;
1547 machine_mode mode1;
1549 if (tmode == VOIDmode)
1550 tmode = mode;
1552 while (GET_CODE (op0) == SUBREG)
1554 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1555 op0 = SUBREG_REG (op0);
1558 /* If we have an out-of-bounds access to a register, just return an
1559 uninitialized register of the required mode. This can occur if the
1560 source code contains an out-of-bounds access to a small array. */
1561 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1562 return gen_reg_rtx (tmode);
1564 if (REG_P (op0)
1565 && mode == GET_MODE (op0)
1566 && bitnum == 0
1567 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1569 if (reverse)
1570 op0 = flip_storage_order (mode, op0);
1571 /* We're trying to extract a full register from itself. */
1572 return op0;
1575 /* See if we can get a better vector mode before extracting. */
1576 if (VECTOR_MODE_P (GET_MODE (op0))
1577 && !MEM_P (op0)
1578 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1580 machine_mode new_mode;
1582 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1583 new_mode = MIN_MODE_VECTOR_FLOAT;
1584 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1585 new_mode = MIN_MODE_VECTOR_FRACT;
1586 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1587 new_mode = MIN_MODE_VECTOR_UFRACT;
1588 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1589 new_mode = MIN_MODE_VECTOR_ACCUM;
1590 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1591 new_mode = MIN_MODE_VECTOR_UACCUM;
1592 else
1593 new_mode = MIN_MODE_VECTOR_INT;
1595 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1596 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1597 && targetm.vector_mode_supported_p (new_mode))
1598 break;
1599 if (new_mode != VOIDmode)
1600 op0 = gen_lowpart (new_mode, op0);
1603 /* Use vec_extract patterns for extracting parts of vectors whenever
1604 available. */
1605 if (VECTOR_MODE_P (GET_MODE (op0))
1606 && !MEM_P (op0)
1607 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1608 && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1609 == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1611 struct expand_operand ops[3];
1612 machine_mode outermode = GET_MODE (op0);
1613 machine_mode innermode = GET_MODE_INNER (outermode);
1614 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1615 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1617 create_output_operand (&ops[0], target, innermode);
1618 create_input_operand (&ops[1], op0, outermode);
1619 create_integer_operand (&ops[2], pos);
1620 if (maybe_expand_insn (icode, 3, ops))
1622 target = ops[0].value;
1623 if (GET_MODE (target) != mode)
1624 return gen_lowpart (tmode, target);
1625 return target;
1629 /* Make sure we are playing with integral modes. Pun with subregs
1630 if we aren't. */
1632 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1633 if (imode != GET_MODE (op0))
1635 if (MEM_P (op0))
1636 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1637 else if (imode != BLKmode)
1639 op0 = gen_lowpart (imode, op0);
1641 /* If we got a SUBREG, force it into a register since we
1642 aren't going to be able to do another SUBREG on it. */
1643 if (GET_CODE (op0) == SUBREG)
1644 op0 = force_reg (imode, op0);
1646 else if (REG_P (op0))
1648 rtx reg, subreg;
1649 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1650 MODE_INT);
1651 reg = gen_reg_rtx (imode);
1652 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1653 emit_move_insn (subreg, op0);
1654 op0 = reg;
1655 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1657 else
1659 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1660 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1661 emit_move_insn (mem, op0);
1662 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1667 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1668 If that's wrong, the solution is to test for it and set TARGET to 0
1669 if needed. */
1671 /* Get the mode of the field to use for atomic access or subreg
1672 conversion. */
1673 mode1 = mode;
1674 if (SCALAR_INT_MODE_P (tmode))
1676 machine_mode try_mode = mode_for_size (bitsize,
1677 GET_MODE_CLASS (tmode), 0);
1678 if (try_mode != BLKmode)
1679 mode1 = try_mode;
1681 gcc_assert (mode1 != BLKmode);
1683 /* Extraction of a full MODE1 value can be done with a subreg as long
1684 as the least significant bit of the value is the least significant
1685 bit of either OP0 or a word of OP0. */
1686 if (!MEM_P (op0)
1687 && !reverse
1688 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1689 && bitsize == GET_MODE_BITSIZE (mode1)
1690 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1692 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1693 bitnum / BITS_PER_UNIT);
1694 if (sub)
1695 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1698 /* Extraction of a full MODE1 value can be done with a load as long as
1699 the field is on a byte boundary and is sufficiently aligned. */
1700 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1702 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1703 if (reverse)
1704 op0 = flip_storage_order (mode1, op0);
1705 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1708 /* Handle fields bigger than a word. */
1710 if (bitsize > BITS_PER_WORD)
1712 /* Here we transfer the words of the field
1713 in the order least significant first.
1714 This is because the most significant word is the one which may
1715 be less than full. */
1717 const bool backwards = WORDS_BIG_ENDIAN;
1718 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1719 unsigned int i;
1720 rtx_insn *last;
1722 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1723 target = gen_reg_rtx (mode);
1725 /* In case we're about to clobber a base register or something
1726 (see gcc.c-torture/execute/20040625-1.c). */
1727 if (reg_mentioned_p (target, str_rtx))
1728 target = gen_reg_rtx (mode);
1730 /* Indicate for flow that the entire target reg is being set. */
1731 emit_clobber (target);
1733 last = get_last_insn ();
1734 for (i = 0; i < nwords; i++)
1736 /* If I is 0, use the low-order word in both field and target;
1737 if I is 1, use the next to lowest word; and so on. */
1738 /* Word number in TARGET to use. */
1739 unsigned int wordnum
1740 = (backwards
1741 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1742 : i);
1743 /* Offset from start of field in OP0. */
1744 unsigned int bit_offset = (backwards ^ reverse
1745 ? MAX ((int) bitsize - ((int) i + 1)
1746 * BITS_PER_WORD,
1748 : (int) i * BITS_PER_WORD);
1749 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1750 rtx result_part
1751 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1752 bitsize - i * BITS_PER_WORD),
1753 bitnum + bit_offset, 1, target_part,
1754 mode, word_mode, reverse, fallback_p);
1756 gcc_assert (target_part);
1757 if (!result_part)
1759 delete_insns_since (last);
1760 return NULL;
1763 if (result_part != target_part)
1764 emit_move_insn (target_part, result_part);
1767 if (unsignedp)
1769 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1770 need to be zero'd out. */
1771 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1773 unsigned int i, total_words;
1775 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1776 for (i = nwords; i < total_words; i++)
1777 emit_move_insn
1778 (operand_subword (target,
1779 backwards ? total_words - i - 1 : i,
1780 1, VOIDmode),
1781 const0_rtx);
1783 return target;
1786 /* Signed bit field: sign-extend with two arithmetic shifts. */
1787 target = expand_shift (LSHIFT_EXPR, mode, target,
1788 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1789 return expand_shift (RSHIFT_EXPR, mode, target,
1790 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1793 /* If OP0 is a multi-word register, narrow it to the affected word.
1794 If the region spans two words, defer to extract_split_bit_field. */
1795 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1797 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1798 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1799 bitnum %= BITS_PER_WORD;
1800 if (bitnum + bitsize > BITS_PER_WORD)
1802 if (!fallback_p)
1803 return NULL_RTX;
1804 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1805 reverse);
1806 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1810 /* From here on we know the desired field is smaller than a word.
1811 If OP0 is a register, it too fits within a word. */
1812 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1813 extraction_insn extv;
1814 if (!MEM_P (op0)
1815 && !reverse
1816 /* ??? We could limit the structure size to the part of OP0 that
1817 contains the field, with appropriate checks for endianness
1818 and TRULY_NOOP_TRUNCATION. */
1819 && get_best_reg_extraction_insn (&extv, pattern,
1820 GET_MODE_BITSIZE (GET_MODE (op0)),
1821 tmode))
1823 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1824 unsignedp, target, mode,
1825 tmode);
1826 if (result)
1827 return result;
1830 /* If OP0 is a memory, try copying it to a register and seeing if a
1831 cheap register alternative is available. */
1832 if (MEM_P (op0) & !reverse)
1834 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1835 tmode))
1837 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1838 bitnum, unsignedp,
1839 target, mode,
1840 tmode);
1841 if (result)
1842 return result;
1845 rtx_insn *last = get_last_insn ();
1847 /* Try loading part of OP0 into a register and extracting the
1848 bitfield from that. */
1849 unsigned HOST_WIDE_INT bitpos;
1850 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1851 0, 0, tmode, &bitpos);
1852 if (xop0)
1854 xop0 = copy_to_reg (xop0);
1855 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1856 unsignedp, target,
1857 mode, tmode, reverse, false);
1858 if (result)
1859 return result;
1860 delete_insns_since (last);
1864 if (!fallback_p)
1865 return NULL;
1867 /* Find a correspondingly-sized integer field, so we can apply
1868 shifts and masks to it. */
1869 int_mode = int_mode_for_mode (tmode);
1870 if (int_mode == BLKmode)
1871 int_mode = int_mode_for_mode (mode);
1872 /* Should probably push op0 out to memory and then do a load. */
1873 gcc_assert (int_mode != BLKmode);
1875 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1876 unsignedp, reverse);
1878 /* Complex values must be reversed piecewise, so we need to undo the global
1879 reversal, convert to the complex mode and reverse again. */
1880 if (reverse && COMPLEX_MODE_P (tmode))
1882 target = flip_storage_order (int_mode, target);
1883 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1884 target = flip_storage_order (tmode, target);
1886 else
1887 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1889 return target;
1892 /* Generate code to extract a byte-field from STR_RTX
1893 containing BITSIZE bits, starting at BITNUM,
1894 and put it in TARGET if possible (if TARGET is nonzero).
1895 Regardless of TARGET, we return the rtx for where the value is placed.
1897 STR_RTX is the structure containing the byte (a REG or MEM).
1898 UNSIGNEDP is nonzero if this is an unsigned bit field.
1899 MODE is the natural mode of the field value once extracted.
1900 TMODE is the mode the caller would like the value to have;
1901 but the value may be returned with type MODE instead.
1903 If REVERSE is true, the extraction is to be done in reverse order.
1905 If a TARGET is specified and we can store in it at no extra cost,
1906 we do so, and return TARGET.
1907 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1908 if they are equally easy. */
1911 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1912 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1913 machine_mode mode, machine_mode tmode, bool reverse)
1915 machine_mode mode1;
1917 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1918 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1919 mode1 = GET_MODE (str_rtx);
1920 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1921 mode1 = GET_MODE (target);
1922 else
1923 mode1 = tmode;
1925 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1927 /* Extraction of a full MODE1 value can be done with a simple load.
1928 We know here that the field can be accessed with one single
1929 instruction. For targets that support unaligned memory,
1930 an unaligned access may be necessary. */
1931 if (bitsize == GET_MODE_BITSIZE (mode1))
1933 rtx result = adjust_bitfield_address (str_rtx, mode1,
1934 bitnum / BITS_PER_UNIT);
1935 if (reverse)
1936 result = flip_storage_order (mode1, result);
1937 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1938 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1941 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1942 &bitnum);
1943 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1944 str_rtx = copy_to_reg (str_rtx);
1947 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1948 target, mode, tmode, reverse, true);
1951 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1952 from bit BITNUM of OP0.
1954 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1955 If REVERSE is true, the extraction is to be done in reverse order.
1957 If TARGET is nonzero, attempts to store the value there
1958 and return TARGET, but this is not guaranteed.
1959 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1961 static rtx
1962 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1963 unsigned HOST_WIDE_INT bitsize,
1964 unsigned HOST_WIDE_INT bitnum, rtx target,
1965 int unsignedp, bool reverse)
1967 if (MEM_P (op0))
1969 machine_mode mode
1970 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1971 MEM_VOLATILE_P (op0));
1973 if (mode == VOIDmode)
1974 /* The only way this should occur is if the field spans word
1975 boundaries. */
1976 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1977 reverse);
1979 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1982 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1983 target, unsignedp, reverse);
1986 /* Helper function for extract_fixed_bit_field, extracts
1987 the bit field always using the MODE of OP0. */
1989 static rtx
1990 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1991 unsigned HOST_WIDE_INT bitsize,
1992 unsigned HOST_WIDE_INT bitnum, rtx target,
1993 int unsignedp, bool reverse)
1995 machine_mode mode = GET_MODE (op0);
1996 gcc_assert (SCALAR_INT_MODE_P (mode));
1998 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1999 for invalid input, such as extract equivalent of f5 from
2000 gcc.dg/pr48335-2.c. */
2002 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2003 /* BITNUM is the distance between our msb and that of OP0.
2004 Convert it to the distance from the lsb. */
2005 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2007 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2008 We have reduced the big-endian case to the little-endian case. */
2009 if (reverse)
2010 op0 = flip_storage_order (mode, op0);
2012 if (unsignedp)
2014 if (bitnum)
2016 /* If the field does not already start at the lsb,
2017 shift it so it does. */
2018 /* Maybe propagate the target for the shift. */
2019 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2020 if (tmode != mode)
2021 subtarget = 0;
2022 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2024 /* Convert the value to the desired mode. */
2025 if (mode != tmode)
2026 op0 = convert_to_mode (tmode, op0, 1);
2028 /* Unless the msb of the field used to be the msb when we shifted,
2029 mask out the upper bits. */
2031 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2032 return expand_binop (GET_MODE (op0), and_optab, op0,
2033 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2034 target, 1, OPTAB_LIB_WIDEN);
2035 return op0;
2038 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2039 then arithmetic-shift its lsb to the lsb of the word. */
2040 op0 = force_reg (mode, op0);
2042 /* Find the narrowest integer mode that contains the field. */
2044 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2045 mode = GET_MODE_WIDER_MODE (mode))
2046 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2048 op0 = convert_to_mode (mode, op0, 0);
2049 break;
2052 if (mode != tmode)
2053 target = 0;
2055 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2057 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2058 /* Maybe propagate the target for the shift. */
2059 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2060 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2063 return expand_shift (RSHIFT_EXPR, mode, op0,
2064 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2067 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2068 VALUE << BITPOS. */
2070 static rtx
2071 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2072 int bitpos)
2074 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2077 /* Extract a bit field that is split across two words
2078 and return an RTX for the result.
2080 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2081 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2082 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2084 If REVERSE is true, the extraction is to be done in reverse order. */
2086 static rtx
2087 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2088 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2089 bool reverse)
2091 unsigned int unit;
2092 unsigned int bitsdone = 0;
2093 rtx result = NULL_RTX;
2094 int first = 1;
2096 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2097 much at a time. */
2098 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2099 unit = BITS_PER_WORD;
2100 else
2101 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2103 while (bitsdone < bitsize)
2105 unsigned HOST_WIDE_INT thissize;
2106 rtx part, word;
2107 unsigned HOST_WIDE_INT thispos;
2108 unsigned HOST_WIDE_INT offset;
2110 offset = (bitpos + bitsdone) / unit;
2111 thispos = (bitpos + bitsdone) % unit;
2113 /* THISSIZE must not overrun a word boundary. Otherwise,
2114 extract_fixed_bit_field will call us again, and we will mutually
2115 recurse forever. */
2116 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2117 thissize = MIN (thissize, unit - thispos);
2119 /* If OP0 is a register, then handle OFFSET here.
2121 When handling multiword bitfields, extract_bit_field may pass
2122 down a word_mode SUBREG of a larger REG for a bitfield that actually
2123 crosses a word boundary. Thus, for a SUBREG, we must find
2124 the current word starting from the base register. */
2125 if (GET_CODE (op0) == SUBREG)
2127 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2128 word = operand_subword_force (SUBREG_REG (op0), word_offset,
2129 GET_MODE (SUBREG_REG (op0)));
2130 offset = 0;
2132 else if (REG_P (op0))
2134 word = operand_subword_force (op0, offset, GET_MODE (op0));
2135 offset = 0;
2137 else
2138 word = op0;
2140 /* Extract the parts in bit-counting order,
2141 whose meaning is determined by BYTES_PER_UNIT.
2142 OFFSET is in UNITs, and UNIT is in bits. */
2143 part = extract_fixed_bit_field (word_mode, word, thissize,
2144 offset * unit + thispos, 0, 1, reverse);
2145 bitsdone += thissize;
2147 /* Shift this part into place for the result. */
2148 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2150 if (bitsize != bitsdone)
2151 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2152 bitsize - bitsdone, 0, 1);
2154 else
2156 if (bitsdone != thissize)
2157 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2158 bitsdone - thissize, 0, 1);
2161 if (first)
2162 result = part;
2163 else
2164 /* Combine the parts with bitwise or. This works
2165 because we extracted each part as an unsigned bit field. */
2166 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2167 OPTAB_LIB_WIDEN);
2169 first = 0;
2172 /* Unsigned bit field: we are done. */
2173 if (unsignedp)
2174 return result;
2175 /* Signed bit field: sign-extend with two arithmetic shifts. */
2176 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2177 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2178 return expand_shift (RSHIFT_EXPR, word_mode, result,
2179 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2182 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2183 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2184 MODE, fill the upper bits with zeros. Fail if the layout of either
2185 mode is unknown (as for CC modes) or if the extraction would involve
2186 unprofitable mode punning. Return the value on success, otherwise
2187 return null.
2189 This is different from gen_lowpart* in these respects:
2191 - the returned value must always be considered an rvalue
2193 - when MODE is wider than SRC_MODE, the extraction involves
2194 a zero extension
2196 - when MODE is smaller than SRC_MODE, the extraction involves
2197 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2199 In other words, this routine performs a computation, whereas the
2200 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2201 operations. */
2204 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2206 machine_mode int_mode, src_int_mode;
2208 if (mode == src_mode)
2209 return src;
2211 if (CONSTANT_P (src))
2213 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2214 fails, it will happily create (subreg (symbol_ref)) or similar
2215 invalid SUBREGs. */
2216 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2217 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2218 if (ret)
2219 return ret;
2221 if (GET_MODE (src) == VOIDmode
2222 || !validate_subreg (mode, src_mode, src, byte))
2223 return NULL_RTX;
2225 src = force_reg (GET_MODE (src), src);
2226 return gen_rtx_SUBREG (mode, src, byte);
2229 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2230 return NULL_RTX;
2232 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2233 && MODES_TIEABLE_P (mode, src_mode))
2235 rtx x = gen_lowpart_common (mode, src);
2236 if (x)
2237 return x;
2240 src_int_mode = int_mode_for_mode (src_mode);
2241 int_mode = int_mode_for_mode (mode);
2242 if (src_int_mode == BLKmode || int_mode == BLKmode)
2243 return NULL_RTX;
2245 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2246 return NULL_RTX;
2247 if (!MODES_TIEABLE_P (int_mode, mode))
2248 return NULL_RTX;
2250 src = gen_lowpart (src_int_mode, src);
2251 src = convert_modes (int_mode, src_int_mode, src, true);
2252 src = gen_lowpart (mode, src);
2253 return src;
2256 /* Add INC into TARGET. */
2258 void
2259 expand_inc (rtx target, rtx inc)
2261 rtx value = expand_binop (GET_MODE (target), add_optab,
2262 target, inc,
2263 target, 0, OPTAB_LIB_WIDEN);
2264 if (value != target)
2265 emit_move_insn (target, value);
2268 /* Subtract DEC from TARGET. */
2270 void
2271 expand_dec (rtx target, rtx dec)
2273 rtx value = expand_binop (GET_MODE (target), sub_optab,
2274 target, dec,
2275 target, 0, OPTAB_LIB_WIDEN);
2276 if (value != target)
2277 emit_move_insn (target, value);
2280 /* Output a shift instruction for expression code CODE,
2281 with SHIFTED being the rtx for the value to shift,
2282 and AMOUNT the rtx for the amount to shift by.
2283 Store the result in the rtx TARGET, if that is convenient.
2284 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2285 Return the rtx for where the value is. */
2287 static rtx
2288 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2289 rtx amount, rtx target, int unsignedp)
2291 rtx op1, temp = 0;
2292 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2293 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2294 optab lshift_optab = ashl_optab;
2295 optab rshift_arith_optab = ashr_optab;
2296 optab rshift_uns_optab = lshr_optab;
2297 optab lrotate_optab = rotl_optab;
2298 optab rrotate_optab = rotr_optab;
2299 machine_mode op1_mode;
2300 machine_mode scalar_mode = mode;
2301 int attempt;
2302 bool speed = optimize_insn_for_speed_p ();
2304 if (VECTOR_MODE_P (mode))
2305 scalar_mode = GET_MODE_INNER (mode);
2306 op1 = amount;
2307 op1_mode = GET_MODE (op1);
2309 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2310 shift amount is a vector, use the vector/vector shift patterns. */
2311 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2313 lshift_optab = vashl_optab;
2314 rshift_arith_optab = vashr_optab;
2315 rshift_uns_optab = vlshr_optab;
2316 lrotate_optab = vrotl_optab;
2317 rrotate_optab = vrotr_optab;
2320 /* Previously detected shift-counts computed by NEGATE_EXPR
2321 and shifted in the other direction; but that does not work
2322 on all machines. */
2324 if (SHIFT_COUNT_TRUNCATED)
2326 if (CONST_INT_P (op1)
2327 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2328 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2329 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2330 % GET_MODE_BITSIZE (scalar_mode));
2331 else if (GET_CODE (op1) == SUBREG
2332 && subreg_lowpart_p (op1)
2333 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2334 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2335 op1 = SUBREG_REG (op1);
2338 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2339 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2340 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2341 amount instead. */
2342 if (rotate
2343 && CONST_INT_P (op1)
2344 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2345 GET_MODE_BITSIZE (scalar_mode) - 1))
2347 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2348 left = !left;
2349 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2352 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2353 Note that this is not the case for bigger values. For instance a rotation
2354 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2355 0x04030201 (bswapsi). */
2356 if (rotate
2357 && CONST_INT_P (op1)
2358 && INTVAL (op1) == BITS_PER_UNIT
2359 && GET_MODE_SIZE (scalar_mode) == 2
2360 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2361 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2362 unsignedp);
2364 if (op1 == const0_rtx)
2365 return shifted;
2367 /* Check whether its cheaper to implement a left shift by a constant
2368 bit count by a sequence of additions. */
2369 if (code == LSHIFT_EXPR
2370 && CONST_INT_P (op1)
2371 && INTVAL (op1) > 0
2372 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2373 && INTVAL (op1) < MAX_BITS_PER_WORD
2374 && (shift_cost (speed, mode, INTVAL (op1))
2375 > INTVAL (op1) * add_cost (speed, mode))
2376 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2378 int i;
2379 for (i = 0; i < INTVAL (op1); i++)
2381 temp = force_reg (mode, shifted);
2382 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2383 unsignedp, OPTAB_LIB_WIDEN);
2385 return shifted;
2388 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2390 enum optab_methods methods;
2392 if (attempt == 0)
2393 methods = OPTAB_DIRECT;
2394 else if (attempt == 1)
2395 methods = OPTAB_WIDEN;
2396 else
2397 methods = OPTAB_LIB_WIDEN;
2399 if (rotate)
2401 /* Widening does not work for rotation. */
2402 if (methods == OPTAB_WIDEN)
2403 continue;
2404 else if (methods == OPTAB_LIB_WIDEN)
2406 /* If we have been unable to open-code this by a rotation,
2407 do it as the IOR of two shifts. I.e., to rotate A
2408 by N bits, compute
2409 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2410 where C is the bitsize of A.
2412 It is theoretically possible that the target machine might
2413 not be able to perform either shift and hence we would
2414 be making two libcalls rather than just the one for the
2415 shift (similarly if IOR could not be done). We will allow
2416 this extremely unlikely lossage to avoid complicating the
2417 code below. */
2419 rtx subtarget = target == shifted ? 0 : target;
2420 rtx new_amount, other_amount;
2421 rtx temp1;
2423 new_amount = op1;
2424 if (op1 == const0_rtx)
2425 return shifted;
2426 else if (CONST_INT_P (op1))
2427 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2428 - INTVAL (op1));
2429 else
2431 other_amount
2432 = simplify_gen_unary (NEG, GET_MODE (op1),
2433 op1, GET_MODE (op1));
2434 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2435 other_amount
2436 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2437 gen_int_mode (mask, GET_MODE (op1)));
2440 shifted = force_reg (mode, shifted);
2442 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2443 mode, shifted, new_amount, 0, 1);
2444 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2445 mode, shifted, other_amount,
2446 subtarget, 1);
2447 return expand_binop (mode, ior_optab, temp, temp1, target,
2448 unsignedp, methods);
2451 temp = expand_binop (mode,
2452 left ? lrotate_optab : rrotate_optab,
2453 shifted, op1, target, unsignedp, methods);
2455 else if (unsignedp)
2456 temp = expand_binop (mode,
2457 left ? lshift_optab : rshift_uns_optab,
2458 shifted, op1, target, unsignedp, methods);
2460 /* Do arithmetic shifts.
2461 Also, if we are going to widen the operand, we can just as well
2462 use an arithmetic right-shift instead of a logical one. */
2463 if (temp == 0 && ! rotate
2464 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2466 enum optab_methods methods1 = methods;
2468 /* If trying to widen a log shift to an arithmetic shift,
2469 don't accept an arithmetic shift of the same size. */
2470 if (unsignedp)
2471 methods1 = OPTAB_MUST_WIDEN;
2473 /* Arithmetic shift */
2475 temp = expand_binop (mode,
2476 left ? lshift_optab : rshift_arith_optab,
2477 shifted, op1, target, unsignedp, methods1);
2480 /* We used to try extzv here for logical right shifts, but that was
2481 only useful for one machine, the VAX, and caused poor code
2482 generation there for lshrdi3, so the code was deleted and a
2483 define_expand for lshrsi3 was added to vax.md. */
2486 gcc_assert (temp);
2487 return temp;
2490 /* Output a shift instruction for expression code CODE,
2491 with SHIFTED being the rtx for the value to shift,
2492 and AMOUNT the amount to shift by.
2493 Store the result in the rtx TARGET, if that is convenient.
2494 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2495 Return the rtx for where the value is. */
2498 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2499 int amount, rtx target, int unsignedp)
2501 return expand_shift_1 (code, mode,
2502 shifted, GEN_INT (amount), target, unsignedp);
2505 /* Output a shift instruction for expression code CODE,
2506 with SHIFTED being the rtx for the value to shift,
2507 and AMOUNT the tree for the amount to shift by.
2508 Store the result in the rtx TARGET, if that is convenient.
2509 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2510 Return the rtx for where the value is. */
2513 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2514 tree amount, rtx target, int unsignedp)
2516 return expand_shift_1 (code, mode,
2517 shifted, expand_normal (amount), target, unsignedp);
2521 /* Indicates the type of fixup needed after a constant multiplication.
2522 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2523 the result should be negated, and ADD_VARIANT means that the
2524 multiplicand should be added to the result. */
2525 enum mult_variant {basic_variant, negate_variant, add_variant};
2527 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2528 const struct mult_cost *, machine_mode mode);
2529 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2530 struct algorithm *, enum mult_variant *, int);
2531 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2532 const struct algorithm *, enum mult_variant);
2533 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2534 static rtx extract_high_half (machine_mode, rtx);
2535 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2536 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2537 int, int);
2538 /* Compute and return the best algorithm for multiplying by T.
2539 The algorithm must cost less than cost_limit
2540 If retval.cost >= COST_LIMIT, no algorithm was found and all
2541 other field of the returned struct are undefined.
2542 MODE is the machine mode of the multiplication. */
2544 static void
2545 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2546 const struct mult_cost *cost_limit, machine_mode mode)
2548 int m;
2549 struct algorithm *alg_in, *best_alg;
2550 struct mult_cost best_cost;
2551 struct mult_cost new_limit;
2552 int op_cost, op_latency;
2553 unsigned HOST_WIDE_INT orig_t = t;
2554 unsigned HOST_WIDE_INT q;
2555 int maxm, hash_index;
2556 bool cache_hit = false;
2557 enum alg_code cache_alg = alg_zero;
2558 bool speed = optimize_insn_for_speed_p ();
2559 machine_mode imode;
2560 struct alg_hash_entry *entry_ptr;
2562 /* Indicate that no algorithm is yet found. If no algorithm
2563 is found, this value will be returned and indicate failure. */
2564 alg_out->cost.cost = cost_limit->cost + 1;
2565 alg_out->cost.latency = cost_limit->latency + 1;
2567 if (cost_limit->cost < 0
2568 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2569 return;
2571 /* Be prepared for vector modes. */
2572 imode = GET_MODE_INNER (mode);
2574 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2576 /* Restrict the bits of "t" to the multiplication's mode. */
2577 t &= GET_MODE_MASK (imode);
2579 /* t == 1 can be done in zero cost. */
2580 if (t == 1)
2582 alg_out->ops = 1;
2583 alg_out->cost.cost = 0;
2584 alg_out->cost.latency = 0;
2585 alg_out->op[0] = alg_m;
2586 return;
2589 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2590 fail now. */
2591 if (t == 0)
2593 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2594 return;
2595 else
2597 alg_out->ops = 1;
2598 alg_out->cost.cost = zero_cost (speed);
2599 alg_out->cost.latency = zero_cost (speed);
2600 alg_out->op[0] = alg_zero;
2601 return;
2605 /* We'll be needing a couple extra algorithm structures now. */
2607 alg_in = XALLOCA (struct algorithm);
2608 best_alg = XALLOCA (struct algorithm);
2609 best_cost = *cost_limit;
2611 /* Compute the hash index. */
2612 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2614 /* See if we already know what to do for T. */
2615 entry_ptr = alg_hash_entry_ptr (hash_index);
2616 if (entry_ptr->t == t
2617 && entry_ptr->mode == mode
2618 && entry_ptr->mode == mode
2619 && entry_ptr->speed == speed
2620 && entry_ptr->alg != alg_unknown)
2622 cache_alg = entry_ptr->alg;
2624 if (cache_alg == alg_impossible)
2626 /* The cache tells us that it's impossible to synthesize
2627 multiplication by T within entry_ptr->cost. */
2628 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2629 /* COST_LIMIT is at least as restrictive as the one
2630 recorded in the hash table, in which case we have no
2631 hope of synthesizing a multiplication. Just
2632 return. */
2633 return;
2635 /* If we get here, COST_LIMIT is less restrictive than the
2636 one recorded in the hash table, so we may be able to
2637 synthesize a multiplication. Proceed as if we didn't
2638 have the cache entry. */
2640 else
2642 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2643 /* The cached algorithm shows that this multiplication
2644 requires more cost than COST_LIMIT. Just return. This
2645 way, we don't clobber this cache entry with
2646 alg_impossible but retain useful information. */
2647 return;
2649 cache_hit = true;
2651 switch (cache_alg)
2653 case alg_shift:
2654 goto do_alg_shift;
2656 case alg_add_t_m2:
2657 case alg_sub_t_m2:
2658 goto do_alg_addsub_t_m2;
2660 case alg_add_factor:
2661 case alg_sub_factor:
2662 goto do_alg_addsub_factor;
2664 case alg_add_t2_m:
2665 goto do_alg_add_t2_m;
2667 case alg_sub_t2_m:
2668 goto do_alg_sub_t2_m;
2670 default:
2671 gcc_unreachable ();
2676 /* If we have a group of zero bits at the low-order part of T, try
2677 multiplying by the remaining bits and then doing a shift. */
2679 if ((t & 1) == 0)
2681 do_alg_shift:
2682 m = floor_log2 (t & -t); /* m = number of low zero bits */
2683 if (m < maxm)
2685 q = t >> m;
2686 /* The function expand_shift will choose between a shift and
2687 a sequence of additions, so the observed cost is given as
2688 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2689 op_cost = m * add_cost (speed, mode);
2690 if (shift_cost (speed, mode, m) < op_cost)
2691 op_cost = shift_cost (speed, mode, m);
2692 new_limit.cost = best_cost.cost - op_cost;
2693 new_limit.latency = best_cost.latency - op_cost;
2694 synth_mult (alg_in, q, &new_limit, mode);
2696 alg_in->cost.cost += op_cost;
2697 alg_in->cost.latency += op_cost;
2698 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2700 best_cost = alg_in->cost;
2701 std::swap (alg_in, best_alg);
2702 best_alg->log[best_alg->ops] = m;
2703 best_alg->op[best_alg->ops] = alg_shift;
2706 /* See if treating ORIG_T as a signed number yields a better
2707 sequence. Try this sequence only for a negative ORIG_T
2708 as it would be useless for a non-negative ORIG_T. */
2709 if ((HOST_WIDE_INT) orig_t < 0)
2711 /* Shift ORIG_T as follows because a right shift of a
2712 negative-valued signed type is implementation
2713 defined. */
2714 q = ~(~orig_t >> m);
2715 /* The function expand_shift will choose between a shift
2716 and a sequence of additions, so the observed cost is
2717 given as MIN (m * add_cost(speed, mode),
2718 shift_cost(speed, mode, m)). */
2719 op_cost = m * add_cost (speed, mode);
2720 if (shift_cost (speed, mode, m) < op_cost)
2721 op_cost = shift_cost (speed, mode, m);
2722 new_limit.cost = best_cost.cost - op_cost;
2723 new_limit.latency = best_cost.latency - op_cost;
2724 synth_mult (alg_in, q, &new_limit, mode);
2726 alg_in->cost.cost += op_cost;
2727 alg_in->cost.latency += op_cost;
2728 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2730 best_cost = alg_in->cost;
2731 std::swap (alg_in, best_alg);
2732 best_alg->log[best_alg->ops] = m;
2733 best_alg->op[best_alg->ops] = alg_shift;
2737 if (cache_hit)
2738 goto done;
2741 /* If we have an odd number, add or subtract one. */
2742 if ((t & 1) != 0)
2744 unsigned HOST_WIDE_INT w;
2746 do_alg_addsub_t_m2:
2747 for (w = 1; (w & t) != 0; w <<= 1)
2749 /* If T was -1, then W will be zero after the loop. This is another
2750 case where T ends with ...111. Handling this with (T + 1) and
2751 subtract 1 produces slightly better code and results in algorithm
2752 selection much faster than treating it like the ...0111 case
2753 below. */
2754 if (w == 0
2755 || (w > 2
2756 /* Reject the case where t is 3.
2757 Thus we prefer addition in that case. */
2758 && t != 3))
2760 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2762 op_cost = add_cost (speed, mode);
2763 new_limit.cost = best_cost.cost - op_cost;
2764 new_limit.latency = best_cost.latency - op_cost;
2765 synth_mult (alg_in, t + 1, &new_limit, mode);
2767 alg_in->cost.cost += op_cost;
2768 alg_in->cost.latency += op_cost;
2769 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2771 best_cost = alg_in->cost;
2772 std::swap (alg_in, best_alg);
2773 best_alg->log[best_alg->ops] = 0;
2774 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2777 else
2779 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2781 op_cost = add_cost (speed, mode);
2782 new_limit.cost = best_cost.cost - op_cost;
2783 new_limit.latency = best_cost.latency - op_cost;
2784 synth_mult (alg_in, t - 1, &new_limit, mode);
2786 alg_in->cost.cost += op_cost;
2787 alg_in->cost.latency += op_cost;
2788 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2790 best_cost = alg_in->cost;
2791 std::swap (alg_in, best_alg);
2792 best_alg->log[best_alg->ops] = 0;
2793 best_alg->op[best_alg->ops] = alg_add_t_m2;
2797 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2798 quickly with a - a * n for some appropriate constant n. */
2799 m = exact_log2 (-orig_t + 1);
2800 if (m >= 0 && m < maxm)
2802 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2803 /* If the target has a cheap shift-and-subtract insn use
2804 that in preference to a shift insn followed by a sub insn.
2805 Assume that the shift-and-sub is "atomic" with a latency
2806 equal to it's cost, otherwise assume that on superscalar
2807 hardware the shift may be executed concurrently with the
2808 earlier steps in the algorithm. */
2809 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2811 op_cost = shiftsub1_cost (speed, mode, m);
2812 op_latency = op_cost;
2814 else
2815 op_latency = add_cost (speed, mode);
2817 new_limit.cost = best_cost.cost - op_cost;
2818 new_limit.latency = best_cost.latency - op_latency;
2819 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2820 &new_limit, mode);
2822 alg_in->cost.cost += op_cost;
2823 alg_in->cost.latency += op_latency;
2824 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2826 best_cost = alg_in->cost;
2827 std::swap (alg_in, best_alg);
2828 best_alg->log[best_alg->ops] = m;
2829 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2833 if (cache_hit)
2834 goto done;
2837 /* Look for factors of t of the form
2838 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2839 If we find such a factor, we can multiply by t using an algorithm that
2840 multiplies by q, shift the result by m and add/subtract it to itself.
2842 We search for large factors first and loop down, even if large factors
2843 are less probable than small; if we find a large factor we will find a
2844 good sequence quickly, and therefore be able to prune (by decreasing
2845 COST_LIMIT) the search. */
2847 do_alg_addsub_factor:
2848 for (m = floor_log2 (t - 1); m >= 2; m--)
2850 unsigned HOST_WIDE_INT d;
2852 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2853 if (t % d == 0 && t > d && m < maxm
2854 && (!cache_hit || cache_alg == alg_add_factor))
2856 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2857 if (shiftadd_cost (speed, mode, m) <= op_cost)
2858 op_cost = shiftadd_cost (speed, mode, m);
2860 op_latency = op_cost;
2863 new_limit.cost = best_cost.cost - op_cost;
2864 new_limit.latency = best_cost.latency - op_latency;
2865 synth_mult (alg_in, t / d, &new_limit, mode);
2867 alg_in->cost.cost += op_cost;
2868 alg_in->cost.latency += op_latency;
2869 if (alg_in->cost.latency < op_cost)
2870 alg_in->cost.latency = op_cost;
2871 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2873 best_cost = alg_in->cost;
2874 std::swap (alg_in, best_alg);
2875 best_alg->log[best_alg->ops] = m;
2876 best_alg->op[best_alg->ops] = alg_add_factor;
2878 /* Other factors will have been taken care of in the recursion. */
2879 break;
2882 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2883 if (t % d == 0 && t > d && m < maxm
2884 && (!cache_hit || cache_alg == alg_sub_factor))
2886 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2887 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2888 op_cost = shiftsub0_cost (speed, mode, m);
2890 op_latency = op_cost;
2892 new_limit.cost = best_cost.cost - op_cost;
2893 new_limit.latency = best_cost.latency - op_latency;
2894 synth_mult (alg_in, t / d, &new_limit, mode);
2896 alg_in->cost.cost += op_cost;
2897 alg_in->cost.latency += op_latency;
2898 if (alg_in->cost.latency < op_cost)
2899 alg_in->cost.latency = op_cost;
2900 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2902 best_cost = alg_in->cost;
2903 std::swap (alg_in, best_alg);
2904 best_alg->log[best_alg->ops] = m;
2905 best_alg->op[best_alg->ops] = alg_sub_factor;
2907 break;
2910 if (cache_hit)
2911 goto done;
2913 /* Try shift-and-add (load effective address) instructions,
2914 i.e. do a*3, a*5, a*9. */
2915 if ((t & 1) != 0)
2917 do_alg_add_t2_m:
2918 q = t - 1;
2919 q = q & -q;
2920 m = exact_log2 (q);
2921 if (m >= 0 && m < maxm)
2923 op_cost = shiftadd_cost (speed, mode, m);
2924 new_limit.cost = best_cost.cost - op_cost;
2925 new_limit.latency = best_cost.latency - op_cost;
2926 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2928 alg_in->cost.cost += op_cost;
2929 alg_in->cost.latency += op_cost;
2930 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2932 best_cost = alg_in->cost;
2933 std::swap (alg_in, best_alg);
2934 best_alg->log[best_alg->ops] = m;
2935 best_alg->op[best_alg->ops] = alg_add_t2_m;
2938 if (cache_hit)
2939 goto done;
2941 do_alg_sub_t2_m:
2942 q = t + 1;
2943 q = q & -q;
2944 m = exact_log2 (q);
2945 if (m >= 0 && m < maxm)
2947 op_cost = shiftsub0_cost (speed, mode, m);
2948 new_limit.cost = best_cost.cost - op_cost;
2949 new_limit.latency = best_cost.latency - op_cost;
2950 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2952 alg_in->cost.cost += op_cost;
2953 alg_in->cost.latency += op_cost;
2954 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2956 best_cost = alg_in->cost;
2957 std::swap (alg_in, best_alg);
2958 best_alg->log[best_alg->ops] = m;
2959 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2962 if (cache_hit)
2963 goto done;
2966 done:
2967 /* If best_cost has not decreased, we have not found any algorithm. */
2968 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2970 /* We failed to find an algorithm. Record alg_impossible for
2971 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2972 we are asked to find an algorithm for T within the same or
2973 lower COST_LIMIT, we can immediately return to the
2974 caller. */
2975 entry_ptr->t = t;
2976 entry_ptr->mode = mode;
2977 entry_ptr->speed = speed;
2978 entry_ptr->alg = alg_impossible;
2979 entry_ptr->cost = *cost_limit;
2980 return;
2983 /* Cache the result. */
2984 if (!cache_hit)
2986 entry_ptr->t = t;
2987 entry_ptr->mode = mode;
2988 entry_ptr->speed = speed;
2989 entry_ptr->alg = best_alg->op[best_alg->ops];
2990 entry_ptr->cost.cost = best_cost.cost;
2991 entry_ptr->cost.latency = best_cost.latency;
2994 /* If we are getting a too long sequence for `struct algorithm'
2995 to record, make this search fail. */
2996 if (best_alg->ops == MAX_BITS_PER_WORD)
2997 return;
2999 /* Copy the algorithm from temporary space to the space at alg_out.
3000 We avoid using structure assignment because the majority of
3001 best_alg is normally undefined, and this is a critical function. */
3002 alg_out->ops = best_alg->ops + 1;
3003 alg_out->cost = best_cost;
3004 memcpy (alg_out->op, best_alg->op,
3005 alg_out->ops * sizeof *alg_out->op);
3006 memcpy (alg_out->log, best_alg->log,
3007 alg_out->ops * sizeof *alg_out->log);
3010 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3011 Try three variations:
3013 - a shift/add sequence based on VAL itself
3014 - a shift/add sequence based on -VAL, followed by a negation
3015 - a shift/add sequence based on VAL - 1, followed by an addition.
3017 Return true if the cheapest of these cost less than MULT_COST,
3018 describing the algorithm in *ALG and final fixup in *VARIANT. */
3020 static bool
3021 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3022 struct algorithm *alg, enum mult_variant *variant,
3023 int mult_cost)
3025 struct algorithm alg2;
3026 struct mult_cost limit;
3027 int op_cost;
3028 bool speed = optimize_insn_for_speed_p ();
3030 /* Fail quickly for impossible bounds. */
3031 if (mult_cost < 0)
3032 return false;
3034 /* Ensure that mult_cost provides a reasonable upper bound.
3035 Any constant multiplication can be performed with less
3036 than 2 * bits additions. */
3037 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3038 if (mult_cost > op_cost)
3039 mult_cost = op_cost;
3041 *variant = basic_variant;
3042 limit.cost = mult_cost;
3043 limit.latency = mult_cost;
3044 synth_mult (alg, val, &limit, mode);
3046 /* This works only if the inverted value actually fits in an
3047 `unsigned int' */
3048 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3050 op_cost = neg_cost (speed, mode);
3051 if (MULT_COST_LESS (&alg->cost, mult_cost))
3053 limit.cost = alg->cost.cost - op_cost;
3054 limit.latency = alg->cost.latency - op_cost;
3056 else
3058 limit.cost = mult_cost - op_cost;
3059 limit.latency = mult_cost - op_cost;
3062 synth_mult (&alg2, -val, &limit, mode);
3063 alg2.cost.cost += op_cost;
3064 alg2.cost.latency += op_cost;
3065 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3066 *alg = alg2, *variant = negate_variant;
3069 /* This proves very useful for division-by-constant. */
3070 op_cost = add_cost (speed, mode);
3071 if (MULT_COST_LESS (&alg->cost, mult_cost))
3073 limit.cost = alg->cost.cost - op_cost;
3074 limit.latency = alg->cost.latency - op_cost;
3076 else
3078 limit.cost = mult_cost - op_cost;
3079 limit.latency = mult_cost - op_cost;
3082 synth_mult (&alg2, val - 1, &limit, mode);
3083 alg2.cost.cost += op_cost;
3084 alg2.cost.latency += op_cost;
3085 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3086 *alg = alg2, *variant = add_variant;
3088 return MULT_COST_LESS (&alg->cost, mult_cost);
3091 /* A subroutine of expand_mult, used for constant multiplications.
3092 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3093 convenient. Use the shift/add sequence described by ALG and apply
3094 the final fixup specified by VARIANT. */
3096 static rtx
3097 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3098 rtx target, const struct algorithm *alg,
3099 enum mult_variant variant)
3101 HOST_WIDE_INT val_so_far;
3102 rtx_insn *insn;
3103 rtx accum, tem;
3104 int opno;
3105 machine_mode nmode;
3107 /* Avoid referencing memory over and over and invalid sharing
3108 on SUBREGs. */
3109 op0 = force_reg (mode, op0);
3111 /* ACCUM starts out either as OP0 or as a zero, depending on
3112 the first operation. */
3114 if (alg->op[0] == alg_zero)
3116 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3117 val_so_far = 0;
3119 else if (alg->op[0] == alg_m)
3121 accum = copy_to_mode_reg (mode, op0);
3122 val_so_far = 1;
3124 else
3125 gcc_unreachable ();
3127 for (opno = 1; opno < alg->ops; opno++)
3129 int log = alg->log[opno];
3130 rtx shift_subtarget = optimize ? 0 : accum;
3131 rtx add_target
3132 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3133 && !optimize)
3134 ? target : 0;
3135 rtx accum_target = optimize ? 0 : accum;
3136 rtx accum_inner;
3138 switch (alg->op[opno])
3140 case alg_shift:
3141 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3142 /* REG_EQUAL note will be attached to the following insn. */
3143 emit_move_insn (accum, tem);
3144 val_so_far <<= log;
3145 break;
3147 case alg_add_t_m2:
3148 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3149 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3150 add_target ? add_target : accum_target);
3151 val_so_far += (HOST_WIDE_INT) 1 << log;
3152 break;
3154 case alg_sub_t_m2:
3155 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3156 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3157 add_target ? add_target : accum_target);
3158 val_so_far -= (HOST_WIDE_INT) 1 << log;
3159 break;
3161 case alg_add_t2_m:
3162 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3163 log, shift_subtarget, 0);
3164 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3165 add_target ? add_target : accum_target);
3166 val_so_far = (val_so_far << log) + 1;
3167 break;
3169 case alg_sub_t2_m:
3170 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3171 log, shift_subtarget, 0);
3172 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3173 add_target ? add_target : accum_target);
3174 val_so_far = (val_so_far << log) - 1;
3175 break;
3177 case alg_add_factor:
3178 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3179 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3180 add_target ? add_target : accum_target);
3181 val_so_far += val_so_far << log;
3182 break;
3184 case alg_sub_factor:
3185 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3186 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3187 (add_target
3188 ? add_target : (optimize ? 0 : tem)));
3189 val_so_far = (val_so_far << log) - val_so_far;
3190 break;
3192 default:
3193 gcc_unreachable ();
3196 if (SCALAR_INT_MODE_P (mode))
3198 /* Write a REG_EQUAL note on the last insn so that we can cse
3199 multiplication sequences. Note that if ACCUM is a SUBREG,
3200 we've set the inner register and must properly indicate that. */
3201 tem = op0, nmode = mode;
3202 accum_inner = accum;
3203 if (GET_CODE (accum) == SUBREG)
3205 accum_inner = SUBREG_REG (accum);
3206 nmode = GET_MODE (accum_inner);
3207 tem = gen_lowpart (nmode, op0);
3210 insn = get_last_insn ();
3211 set_dst_reg_note (insn, REG_EQUAL,
3212 gen_rtx_MULT (nmode, tem,
3213 gen_int_mode (val_so_far, nmode)),
3214 accum_inner);
3218 if (variant == negate_variant)
3220 val_so_far = -val_so_far;
3221 accum = expand_unop (mode, neg_optab, accum, target, 0);
3223 else if (variant == add_variant)
3225 val_so_far = val_so_far + 1;
3226 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3229 /* Compare only the bits of val and val_so_far that are significant
3230 in the result mode, to avoid sign-/zero-extension confusion. */
3231 nmode = GET_MODE_INNER (mode);
3232 val &= GET_MODE_MASK (nmode);
3233 val_so_far &= GET_MODE_MASK (nmode);
3234 gcc_assert (val == val_so_far);
3236 return accum;
3239 /* Perform a multiplication and return an rtx for the result.
3240 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3241 TARGET is a suggestion for where to store the result (an rtx).
3243 We check specially for a constant integer as OP1.
3244 If you want this check for OP0 as well, then before calling
3245 you should swap the two operands if OP0 would be constant. */
3248 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3249 int unsignedp)
3251 enum mult_variant variant;
3252 struct algorithm algorithm;
3253 rtx scalar_op1;
3254 int max_cost;
3255 bool speed = optimize_insn_for_speed_p ();
3256 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3258 if (CONSTANT_P (op0))
3259 std::swap (op0, op1);
3261 /* For vectors, there are several simplifications that can be made if
3262 all elements of the vector constant are identical. */
3263 scalar_op1 = unwrap_const_vec_duplicate (op1);
3265 if (INTEGRAL_MODE_P (mode))
3267 rtx fake_reg;
3268 HOST_WIDE_INT coeff;
3269 bool is_neg;
3270 int mode_bitsize;
3272 if (op1 == CONST0_RTX (mode))
3273 return op1;
3274 if (op1 == CONST1_RTX (mode))
3275 return op0;
3276 if (op1 == CONSTM1_RTX (mode))
3277 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3278 op0, target, 0);
3280 if (do_trapv)
3281 goto skip_synth;
3283 /* If mode is integer vector mode, check if the backend supports
3284 vector lshift (by scalar or vector) at all. If not, we can't use
3285 synthetized multiply. */
3286 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3287 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3288 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3289 goto skip_synth;
3291 /* These are the operations that are potentially turned into
3292 a sequence of shifts and additions. */
3293 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3295 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3296 less than or equal in size to `unsigned int' this doesn't matter.
3297 If the mode is larger than `unsigned int', then synth_mult works
3298 only if the constant value exactly fits in an `unsigned int' without
3299 any truncation. This means that multiplying by negative values does
3300 not work; results are off by 2^32 on a 32 bit machine. */
3301 if (CONST_INT_P (scalar_op1))
3303 coeff = INTVAL (scalar_op1);
3304 is_neg = coeff < 0;
3306 #if TARGET_SUPPORTS_WIDE_INT
3307 else if (CONST_WIDE_INT_P (scalar_op1))
3308 #else
3309 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3310 #endif
3312 int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3313 /* Perfect power of 2 (other than 1, which is handled above). */
3314 if (shift > 0)
3315 return expand_shift (LSHIFT_EXPR, mode, op0,
3316 shift, target, unsignedp);
3317 else
3318 goto skip_synth;
3320 else
3321 goto skip_synth;
3323 /* We used to test optimize here, on the grounds that it's better to
3324 produce a smaller program when -O is not used. But this causes
3325 such a terrible slowdown sometimes that it seems better to always
3326 use synth_mult. */
3328 /* Special case powers of two. */
3329 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3330 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3331 return expand_shift (LSHIFT_EXPR, mode, op0,
3332 floor_log2 (coeff), target, unsignedp);
3334 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3336 /* Attempt to handle multiplication of DImode values by negative
3337 coefficients, by performing the multiplication by a positive
3338 multiplier and then inverting the result. */
3339 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3341 /* Its safe to use -coeff even for INT_MIN, as the
3342 result is interpreted as an unsigned coefficient.
3343 Exclude cost of op0 from max_cost to match the cost
3344 calculation of the synth_mult. */
3345 coeff = -(unsigned HOST_WIDE_INT) coeff;
3346 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3347 mode, speed)
3348 - neg_cost (speed, mode));
3349 if (max_cost <= 0)
3350 goto skip_synth;
3352 /* Special case powers of two. */
3353 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3355 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3356 floor_log2 (coeff), target, unsignedp);
3357 return expand_unop (mode, neg_optab, temp, target, 0);
3360 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3361 max_cost))
3363 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3364 &algorithm, variant);
3365 return expand_unop (mode, neg_optab, temp, target, 0);
3367 goto skip_synth;
3370 /* Exclude cost of op0 from max_cost to match the cost
3371 calculation of the synth_mult. */
3372 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3373 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3374 return expand_mult_const (mode, op0, coeff, target,
3375 &algorithm, variant);
3377 skip_synth:
3379 /* Expand x*2.0 as x+x. */
3380 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3381 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3383 op0 = force_reg (GET_MODE (op0), op0);
3384 return expand_binop (mode, add_optab, op0, op0,
3385 target, unsignedp, OPTAB_LIB_WIDEN);
3388 /* This used to use umul_optab if unsigned, but for non-widening multiply
3389 there is no difference between signed and unsigned. */
3390 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3391 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3392 gcc_assert (op0);
3393 return op0;
3396 /* Return a cost estimate for multiplying a register by the given
3397 COEFFicient in the given MODE and SPEED. */
3400 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3402 int max_cost;
3403 struct algorithm algorithm;
3404 enum mult_variant variant;
3406 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3407 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3408 mode, speed);
3409 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3410 return algorithm.cost.cost;
3411 else
3412 return max_cost;
3415 /* Perform a widening multiplication and return an rtx for the result.
3416 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3417 TARGET is a suggestion for where to store the result (an rtx).
3418 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3419 or smul_widen_optab.
3421 We check specially for a constant integer as OP1, comparing the
3422 cost of a widening multiply against the cost of a sequence of shifts
3423 and adds. */
3426 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3427 int unsignedp, optab this_optab)
3429 bool speed = optimize_insn_for_speed_p ();
3430 rtx cop1;
3432 if (CONST_INT_P (op1)
3433 && GET_MODE (op0) != VOIDmode
3434 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3435 this_optab == umul_widen_optab))
3436 && CONST_INT_P (cop1)
3437 && (INTVAL (cop1) >= 0
3438 || HWI_COMPUTABLE_MODE_P (mode)))
3440 HOST_WIDE_INT coeff = INTVAL (cop1);
3441 int max_cost;
3442 enum mult_variant variant;
3443 struct algorithm algorithm;
3445 if (coeff == 0)
3446 return CONST0_RTX (mode);
3448 /* Special case powers of two. */
3449 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3451 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3452 return expand_shift (LSHIFT_EXPR, mode, op0,
3453 floor_log2 (coeff), target, unsignedp);
3456 /* Exclude cost of op0 from max_cost to match the cost
3457 calculation of the synth_mult. */
3458 max_cost = mul_widen_cost (speed, mode);
3459 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3460 max_cost))
3462 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3463 return expand_mult_const (mode, op0, coeff, target,
3464 &algorithm, variant);
3467 return expand_binop (mode, this_optab, op0, op1, target,
3468 unsignedp, OPTAB_LIB_WIDEN);
3471 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3472 replace division by D, and put the least significant N bits of the result
3473 in *MULTIPLIER_PTR and return the most significant bit.
3475 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3476 needed precision is in PRECISION (should be <= N).
3478 PRECISION should be as small as possible so this function can choose
3479 multiplier more freely.
3481 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3482 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3484 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3485 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3487 unsigned HOST_WIDE_INT
3488 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3489 unsigned HOST_WIDE_INT *multiplier_ptr,
3490 int *post_shift_ptr, int *lgup_ptr)
3492 int lgup, post_shift;
3493 int pow, pow2;
3495 /* lgup = ceil(log2(divisor)); */
3496 lgup = ceil_log2 (d);
3498 gcc_assert (lgup <= n);
3500 pow = n + lgup;
3501 pow2 = n + lgup - precision;
3503 /* mlow = 2^(N + lgup)/d */
3504 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3505 wide_int mlow = wi::udiv_trunc (val, d);
3507 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3508 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3509 wide_int mhigh = wi::udiv_trunc (val, d);
3511 /* If precision == N, then mlow, mhigh exceed 2^N
3512 (but they do not exceed 2^(N+1)). */
3514 /* Reduce to lowest terms. */
3515 for (post_shift = lgup; post_shift > 0; post_shift--)
3517 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3518 HOST_BITS_PER_WIDE_INT);
3519 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3520 HOST_BITS_PER_WIDE_INT);
3521 if (ml_lo >= mh_lo)
3522 break;
3524 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3525 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3528 *post_shift_ptr = post_shift;
3529 *lgup_ptr = lgup;
3530 if (n < HOST_BITS_PER_WIDE_INT)
3532 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3533 *multiplier_ptr = mhigh.to_uhwi () & mask;
3534 return mhigh.to_uhwi () >= mask;
3536 else
3538 *multiplier_ptr = mhigh.to_uhwi ();
3539 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3543 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3544 congruent to 1 (mod 2**N). */
3546 static unsigned HOST_WIDE_INT
3547 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3549 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3551 /* The algorithm notes that the choice y = x satisfies
3552 x*y == 1 mod 2^3, since x is assumed odd.
3553 Each iteration doubles the number of bits of significance in y. */
3555 unsigned HOST_WIDE_INT mask;
3556 unsigned HOST_WIDE_INT y = x;
3557 int nbit = 3;
3559 mask = (n == HOST_BITS_PER_WIDE_INT
3560 ? ~(unsigned HOST_WIDE_INT) 0
3561 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3563 while (nbit < n)
3565 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3566 nbit *= 2;
3568 return y;
3571 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3572 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3573 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3574 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3575 become signed.
3577 The result is put in TARGET if that is convenient.
3579 MODE is the mode of operation. */
3582 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3583 rtx op1, rtx target, int unsignedp)
3585 rtx tem;
3586 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3588 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3589 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3590 tem = expand_and (mode, tem, op1, NULL_RTX);
3591 adj_operand
3592 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3593 adj_operand);
3595 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3596 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3597 tem = expand_and (mode, tem, op0, NULL_RTX);
3598 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3599 target);
3601 return target;
3604 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3606 static rtx
3607 extract_high_half (machine_mode mode, rtx op)
3609 machine_mode wider_mode;
3611 if (mode == word_mode)
3612 return gen_highpart (mode, op);
3614 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3616 wider_mode = GET_MODE_WIDER_MODE (mode);
3617 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3618 GET_MODE_BITSIZE (mode), 0, 1);
3619 return convert_modes (mode, wider_mode, op, 0);
3622 /* Like expmed_mult_highpart, but only consider using a multiplication
3623 optab. OP1 is an rtx for the constant operand. */
3625 static rtx
3626 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3627 rtx target, int unsignedp, int max_cost)
3629 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3630 machine_mode wider_mode;
3631 optab moptab;
3632 rtx tem;
3633 int size;
3634 bool speed = optimize_insn_for_speed_p ();
3636 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3638 wider_mode = GET_MODE_WIDER_MODE (mode);
3639 size = GET_MODE_BITSIZE (mode);
3641 /* Firstly, try using a multiplication insn that only generates the needed
3642 high part of the product, and in the sign flavor of unsignedp. */
3643 if (mul_highpart_cost (speed, mode) < max_cost)
3645 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3646 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3647 unsignedp, OPTAB_DIRECT);
3648 if (tem)
3649 return tem;
3652 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3653 Need to adjust the result after the multiplication. */
3654 if (size - 1 < BITS_PER_WORD
3655 && (mul_highpart_cost (speed, mode)
3656 + 2 * shift_cost (speed, mode, size-1)
3657 + 4 * add_cost (speed, mode) < max_cost))
3659 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3660 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3661 unsignedp, OPTAB_DIRECT);
3662 if (tem)
3663 /* We used the wrong signedness. Adjust the result. */
3664 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3665 tem, unsignedp);
3668 /* Try widening multiplication. */
3669 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3670 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3671 && mul_widen_cost (speed, wider_mode) < max_cost)
3673 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3674 unsignedp, OPTAB_WIDEN);
3675 if (tem)
3676 return extract_high_half (mode, tem);
3679 /* Try widening the mode and perform a non-widening multiplication. */
3680 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3681 && size - 1 < BITS_PER_WORD
3682 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3683 < max_cost))
3685 rtx_insn *insns;
3686 rtx wop0, wop1;
3688 /* We need to widen the operands, for example to ensure the
3689 constant multiplier is correctly sign or zero extended.
3690 Use a sequence to clean-up any instructions emitted by
3691 the conversions if things don't work out. */
3692 start_sequence ();
3693 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3694 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3695 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3696 unsignedp, OPTAB_WIDEN);
3697 insns = get_insns ();
3698 end_sequence ();
3700 if (tem)
3702 emit_insn (insns);
3703 return extract_high_half (mode, tem);
3707 /* Try widening multiplication of opposite signedness, and adjust. */
3708 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3709 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3710 && size - 1 < BITS_PER_WORD
3711 && (mul_widen_cost (speed, wider_mode)
3712 + 2 * shift_cost (speed, mode, size-1)
3713 + 4 * add_cost (speed, mode) < max_cost))
3715 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3716 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3717 if (tem != 0)
3719 tem = extract_high_half (mode, tem);
3720 /* We used the wrong signedness. Adjust the result. */
3721 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3722 target, unsignedp);
3726 return 0;
3729 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3730 putting the high half of the result in TARGET if that is convenient,
3731 and return where the result is. If the operation can not be performed,
3732 0 is returned.
3734 MODE is the mode of operation and result.
3736 UNSIGNEDP nonzero means unsigned multiply.
3738 MAX_COST is the total allowed cost for the expanded RTL. */
3740 static rtx
3741 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3742 rtx target, int unsignedp, int max_cost)
3744 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3745 unsigned HOST_WIDE_INT cnst1;
3746 int extra_cost;
3747 bool sign_adjust = false;
3748 enum mult_variant variant;
3749 struct algorithm alg;
3750 rtx tem;
3751 bool speed = optimize_insn_for_speed_p ();
3753 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3754 /* We can't support modes wider than HOST_BITS_PER_INT. */
3755 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3757 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3759 /* We can't optimize modes wider than BITS_PER_WORD.
3760 ??? We might be able to perform double-word arithmetic if
3761 mode == word_mode, however all the cost calculations in
3762 synth_mult etc. assume single-word operations. */
3763 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3764 return expmed_mult_highpart_optab (mode, op0, op1, target,
3765 unsignedp, max_cost);
3767 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3769 /* Check whether we try to multiply by a negative constant. */
3770 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3772 sign_adjust = true;
3773 extra_cost += add_cost (speed, mode);
3776 /* See whether shift/add multiplication is cheap enough. */
3777 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3778 max_cost - extra_cost))
3780 /* See whether the specialized multiplication optabs are
3781 cheaper than the shift/add version. */
3782 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3783 alg.cost.cost + extra_cost);
3784 if (tem)
3785 return tem;
3787 tem = convert_to_mode (wider_mode, op0, unsignedp);
3788 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3789 tem = extract_high_half (mode, tem);
3791 /* Adjust result for signedness. */
3792 if (sign_adjust)
3793 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3795 return tem;
3797 return expmed_mult_highpart_optab (mode, op0, op1, target,
3798 unsignedp, max_cost);
3802 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3804 static rtx
3805 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3807 rtx result, temp, shift;
3808 rtx_code_label *label;
3809 int logd;
3810 int prec = GET_MODE_PRECISION (mode);
3812 logd = floor_log2 (d);
3813 result = gen_reg_rtx (mode);
3815 /* Avoid conditional branches when they're expensive. */
3816 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3817 && optimize_insn_for_speed_p ())
3819 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3820 mode, 0, -1);
3821 if (signmask)
3823 HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3824 signmask = force_reg (mode, signmask);
3825 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3827 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3828 which instruction sequence to use. If logical right shifts
3829 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3830 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3832 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3833 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3834 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3835 > COSTS_N_INSNS (2)))
3837 temp = expand_binop (mode, xor_optab, op0, signmask,
3838 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3839 temp = expand_binop (mode, sub_optab, temp, signmask,
3840 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3841 temp = expand_binop (mode, and_optab, temp,
3842 gen_int_mode (masklow, mode),
3843 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3844 temp = expand_binop (mode, xor_optab, temp, signmask,
3845 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3846 temp = expand_binop (mode, sub_optab, temp, signmask,
3847 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3849 else
3851 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3852 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3853 signmask = force_reg (mode, signmask);
3855 temp = expand_binop (mode, add_optab, op0, signmask,
3856 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3857 temp = expand_binop (mode, and_optab, temp,
3858 gen_int_mode (masklow, mode),
3859 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3860 temp = expand_binop (mode, sub_optab, temp, signmask,
3861 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3863 return temp;
3867 /* Mask contains the mode's signbit and the significant bits of the
3868 modulus. By including the signbit in the operation, many targets
3869 can avoid an explicit compare operation in the following comparison
3870 against zero. */
3871 wide_int mask = wi::mask (logd, false, prec);
3872 mask = wi::set_bit (mask, prec - 1);
3874 temp = expand_binop (mode, and_optab, op0,
3875 immed_wide_int_const (mask, mode),
3876 result, 1, OPTAB_LIB_WIDEN);
3877 if (temp != result)
3878 emit_move_insn (result, temp);
3880 label = gen_label_rtx ();
3881 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3883 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3884 0, OPTAB_LIB_WIDEN);
3886 mask = wi::mask (logd, true, prec);
3887 temp = expand_binop (mode, ior_optab, temp,
3888 immed_wide_int_const (mask, mode),
3889 result, 1, OPTAB_LIB_WIDEN);
3890 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3891 0, OPTAB_LIB_WIDEN);
3892 if (temp != result)
3893 emit_move_insn (result, temp);
3894 emit_label (label);
3895 return result;
3898 /* Expand signed division of OP0 by a power of two D in mode MODE.
3899 This routine is only called for positive values of D. */
3901 static rtx
3902 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3904 rtx temp;
3905 rtx_code_label *label;
3906 int logd;
3908 logd = floor_log2 (d);
3910 if (d == 2
3911 && BRANCH_COST (optimize_insn_for_speed_p (),
3912 false) >= 1)
3914 temp = gen_reg_rtx (mode);
3915 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3916 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3917 0, OPTAB_LIB_WIDEN);
3918 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3921 if (HAVE_conditional_move
3922 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3924 rtx temp2;
3926 start_sequence ();
3927 temp2 = copy_to_mode_reg (mode, op0);
3928 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3929 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3930 temp = force_reg (mode, temp);
3932 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3933 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3934 mode, temp, temp2, mode, 0);
3935 if (temp2)
3937 rtx_insn *seq = get_insns ();
3938 end_sequence ();
3939 emit_insn (seq);
3940 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3942 end_sequence ();
3945 if (BRANCH_COST (optimize_insn_for_speed_p (),
3946 false) >= 2)
3948 int ushift = GET_MODE_BITSIZE (mode) - logd;
3950 temp = gen_reg_rtx (mode);
3951 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3952 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3953 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3954 > COSTS_N_INSNS (1))
3955 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3956 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3957 else
3958 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3959 ushift, NULL_RTX, 1);
3960 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3961 0, OPTAB_LIB_WIDEN);
3962 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3965 label = gen_label_rtx ();
3966 temp = copy_to_mode_reg (mode, op0);
3967 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3968 expand_inc (temp, gen_int_mode (d - 1, mode));
3969 emit_label (label);
3970 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3973 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3974 if that is convenient, and returning where the result is.
3975 You may request either the quotient or the remainder as the result;
3976 specify REM_FLAG nonzero to get the remainder.
3978 CODE is the expression code for which kind of division this is;
3979 it controls how rounding is done. MODE is the machine mode to use.
3980 UNSIGNEDP nonzero means do unsigned division. */
3982 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3983 and then correct it by or'ing in missing high bits
3984 if result of ANDI is nonzero.
3985 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3986 This could optimize to a bfexts instruction.
3987 But C doesn't use these operations, so their optimizations are
3988 left for later. */
3989 /* ??? For modulo, we don't actually need the highpart of the first product,
3990 the low part will do nicely. And for small divisors, the second multiply
3991 can also be a low-part only multiply or even be completely left out.
3992 E.g. to calculate the remainder of a division by 3 with a 32 bit
3993 multiply, multiply with 0x55555556 and extract the upper two bits;
3994 the result is exact for inputs up to 0x1fffffff.
3995 The input range can be reduced by using cross-sum rules.
3996 For odd divisors >= 3, the following table gives right shift counts
3997 so that if a number is shifted by an integer multiple of the given
3998 amount, the remainder stays the same:
3999 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4000 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4001 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4002 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4003 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4005 Cross-sum rules for even numbers can be derived by leaving as many bits
4006 to the right alone as the divisor has zeros to the right.
4007 E.g. if x is an unsigned 32 bit number:
4008 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4012 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4013 rtx op0, rtx op1, rtx target, int unsignedp)
4015 machine_mode compute_mode;
4016 rtx tquotient;
4017 rtx quotient = 0, remainder = 0;
4018 rtx_insn *last;
4019 int size;
4020 rtx_insn *insn;
4021 optab optab1, optab2;
4022 int op1_is_constant, op1_is_pow2 = 0;
4023 int max_cost, extra_cost;
4024 static HOST_WIDE_INT last_div_const = 0;
4025 bool speed = optimize_insn_for_speed_p ();
4027 op1_is_constant = CONST_INT_P (op1);
4028 if (op1_is_constant)
4030 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
4031 if (unsignedp)
4032 ext_op1 &= GET_MODE_MASK (mode);
4033 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
4034 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
4038 This is the structure of expand_divmod:
4040 First comes code to fix up the operands so we can perform the operations
4041 correctly and efficiently.
4043 Second comes a switch statement with code specific for each rounding mode.
4044 For some special operands this code emits all RTL for the desired
4045 operation, for other cases, it generates only a quotient and stores it in
4046 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4047 to indicate that it has not done anything.
4049 Last comes code that finishes the operation. If QUOTIENT is set and
4050 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4051 QUOTIENT is not set, it is computed using trunc rounding.
4053 We try to generate special code for division and remainder when OP1 is a
4054 constant. If |OP1| = 2**n we can use shifts and some other fast
4055 operations. For other values of OP1, we compute a carefully selected
4056 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4057 by m.
4059 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4060 half of the product. Different strategies for generating the product are
4061 implemented in expmed_mult_highpart.
4063 If what we actually want is the remainder, we generate that by another
4064 by-constant multiplication and a subtraction. */
4066 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4067 code below will malfunction if we are, so check here and handle
4068 the special case if so. */
4069 if (op1 == const1_rtx)
4070 return rem_flag ? const0_rtx : op0;
4072 /* When dividing by -1, we could get an overflow.
4073 negv_optab can handle overflows. */
4074 if (! unsignedp && op1 == constm1_rtx)
4076 if (rem_flag)
4077 return const0_rtx;
4078 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4079 ? negv_optab : neg_optab, op0, target, 0);
4082 if (target
4083 /* Don't use the function value register as a target
4084 since we have to read it as well as write it,
4085 and function-inlining gets confused by this. */
4086 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4087 /* Don't clobber an operand while doing a multi-step calculation. */
4088 || ((rem_flag || op1_is_constant)
4089 && (reg_mentioned_p (target, op0)
4090 || (MEM_P (op0) && MEM_P (target))))
4091 || reg_mentioned_p (target, op1)
4092 || (MEM_P (op1) && MEM_P (target))))
4093 target = 0;
4095 /* Get the mode in which to perform this computation. Normally it will
4096 be MODE, but sometimes we can't do the desired operation in MODE.
4097 If so, pick a wider mode in which we can do the operation. Convert
4098 to that mode at the start to avoid repeated conversions.
4100 First see what operations we need. These depend on the expression
4101 we are evaluating. (We assume that divxx3 insns exist under the
4102 same conditions that modxx3 insns and that these insns don't normally
4103 fail. If these assumptions are not correct, we may generate less
4104 efficient code in some cases.)
4106 Then see if we find a mode in which we can open-code that operation
4107 (either a division, modulus, or shift). Finally, check for the smallest
4108 mode for which we can do the operation with a library call. */
4110 /* We might want to refine this now that we have division-by-constant
4111 optimization. Since expmed_mult_highpart tries so many variants, it is
4112 not straightforward to generalize this. Maybe we should make an array
4113 of possible modes in init_expmed? Save this for GCC 2.7. */
4115 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4116 ? (unsignedp ? lshr_optab : ashr_optab)
4117 : (unsignedp ? udiv_optab : sdiv_optab));
4118 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4119 ? optab1
4120 : (unsignedp ? udivmod_optab : sdivmod_optab));
4122 for (compute_mode = mode; compute_mode != VOIDmode;
4123 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4124 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4125 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4126 break;
4128 if (compute_mode == VOIDmode)
4129 for (compute_mode = mode; compute_mode != VOIDmode;
4130 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4131 if (optab_libfunc (optab1, compute_mode)
4132 || optab_libfunc (optab2, compute_mode))
4133 break;
4135 /* If we still couldn't find a mode, use MODE, but expand_binop will
4136 probably die. */
4137 if (compute_mode == VOIDmode)
4138 compute_mode = mode;
4140 if (target && GET_MODE (target) == compute_mode)
4141 tquotient = target;
4142 else
4143 tquotient = gen_reg_rtx (compute_mode);
4145 size = GET_MODE_BITSIZE (compute_mode);
4146 #if 0
4147 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4148 (mode), and thereby get better code when OP1 is a constant. Do that
4149 later. It will require going over all usages of SIZE below. */
4150 size = GET_MODE_BITSIZE (mode);
4151 #endif
4153 /* Only deduct something for a REM if the last divide done was
4154 for a different constant. Then set the constant of the last
4155 divide. */
4156 max_cost = (unsignedp
4157 ? udiv_cost (speed, compute_mode)
4158 : sdiv_cost (speed, compute_mode));
4159 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4160 && INTVAL (op1) == last_div_const))
4161 max_cost -= (mul_cost (speed, compute_mode)
4162 + add_cost (speed, compute_mode));
4164 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4166 /* Now convert to the best mode to use. */
4167 if (compute_mode != mode)
4169 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4170 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4172 /* convert_modes may have placed op1 into a register, so we
4173 must recompute the following. */
4174 op1_is_constant = CONST_INT_P (op1);
4175 op1_is_pow2 = (op1_is_constant
4176 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4177 || (! unsignedp
4178 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4181 /* If one of the operands is a volatile MEM, copy it into a register. */
4183 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4184 op0 = force_reg (compute_mode, op0);
4185 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4186 op1 = force_reg (compute_mode, op1);
4188 /* If we need the remainder or if OP1 is constant, we need to
4189 put OP0 in a register in case it has any queued subexpressions. */
4190 if (rem_flag || op1_is_constant)
4191 op0 = force_reg (compute_mode, op0);
4193 last = get_last_insn ();
4195 /* Promote floor rounding to trunc rounding for unsigned operations. */
4196 if (unsignedp)
4198 if (code == FLOOR_DIV_EXPR)
4199 code = TRUNC_DIV_EXPR;
4200 if (code == FLOOR_MOD_EXPR)
4201 code = TRUNC_MOD_EXPR;
4202 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4203 code = TRUNC_DIV_EXPR;
4206 if (op1 != const0_rtx)
4207 switch (code)
4209 case TRUNC_MOD_EXPR:
4210 case TRUNC_DIV_EXPR:
4211 if (op1_is_constant)
4213 if (unsignedp)
4215 unsigned HOST_WIDE_INT mh, ml;
4216 int pre_shift, post_shift;
4217 int dummy;
4218 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4219 & GET_MODE_MASK (compute_mode));
4221 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4223 pre_shift = floor_log2 (d);
4224 if (rem_flag)
4226 unsigned HOST_WIDE_INT mask
4227 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4228 remainder
4229 = expand_binop (compute_mode, and_optab, op0,
4230 gen_int_mode (mask, compute_mode),
4231 remainder, 1,
4232 OPTAB_LIB_WIDEN);
4233 if (remainder)
4234 return gen_lowpart (mode, remainder);
4236 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4237 pre_shift, tquotient, 1);
4239 else if (size <= HOST_BITS_PER_WIDE_INT)
4241 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4243 /* Most significant bit of divisor is set; emit an scc
4244 insn. */
4245 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4246 compute_mode, 1, 1);
4248 else
4250 /* Find a suitable multiplier and right shift count
4251 instead of multiplying with D. */
4253 mh = choose_multiplier (d, size, size,
4254 &ml, &post_shift, &dummy);
4256 /* If the suggested multiplier is more than SIZE bits,
4257 we can do better for even divisors, using an
4258 initial right shift. */
4259 if (mh != 0 && (d & 1) == 0)
4261 pre_shift = floor_log2 (d & -d);
4262 mh = choose_multiplier (d >> pre_shift, size,
4263 size - pre_shift,
4264 &ml, &post_shift, &dummy);
4265 gcc_assert (!mh);
4267 else
4268 pre_shift = 0;
4270 if (mh != 0)
4272 rtx t1, t2, t3, t4;
4274 if (post_shift - 1 >= BITS_PER_WORD)
4275 goto fail1;
4277 extra_cost
4278 = (shift_cost (speed, compute_mode, post_shift - 1)
4279 + shift_cost (speed, compute_mode, 1)
4280 + 2 * add_cost (speed, compute_mode));
4281 t1 = expmed_mult_highpart
4282 (compute_mode, op0,
4283 gen_int_mode (ml, compute_mode),
4284 NULL_RTX, 1, max_cost - extra_cost);
4285 if (t1 == 0)
4286 goto fail1;
4287 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4288 op0, t1),
4289 NULL_RTX);
4290 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4291 t2, 1, NULL_RTX, 1);
4292 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4293 t1, t3),
4294 NULL_RTX);
4295 quotient = expand_shift
4296 (RSHIFT_EXPR, compute_mode, t4,
4297 post_shift - 1, tquotient, 1);
4299 else
4301 rtx t1, t2;
4303 if (pre_shift >= BITS_PER_WORD
4304 || post_shift >= BITS_PER_WORD)
4305 goto fail1;
4307 t1 = expand_shift
4308 (RSHIFT_EXPR, compute_mode, op0,
4309 pre_shift, NULL_RTX, 1);
4310 extra_cost
4311 = (shift_cost (speed, compute_mode, pre_shift)
4312 + shift_cost (speed, compute_mode, post_shift));
4313 t2 = expmed_mult_highpart
4314 (compute_mode, t1,
4315 gen_int_mode (ml, compute_mode),
4316 NULL_RTX, 1, max_cost - extra_cost);
4317 if (t2 == 0)
4318 goto fail1;
4319 quotient = expand_shift
4320 (RSHIFT_EXPR, compute_mode, t2,
4321 post_shift, tquotient, 1);
4325 else /* Too wide mode to use tricky code */
4326 break;
4328 insn = get_last_insn ();
4329 if (insn != last)
4330 set_dst_reg_note (insn, REG_EQUAL,
4331 gen_rtx_UDIV (compute_mode, op0, op1),
4332 quotient);
4334 else /* TRUNC_DIV, signed */
4336 unsigned HOST_WIDE_INT ml;
4337 int lgup, post_shift;
4338 rtx mlr;
4339 HOST_WIDE_INT d = INTVAL (op1);
4340 unsigned HOST_WIDE_INT abs_d;
4342 /* Since d might be INT_MIN, we have to cast to
4343 unsigned HOST_WIDE_INT before negating to avoid
4344 undefined signed overflow. */
4345 abs_d = (d >= 0
4346 ? (unsigned HOST_WIDE_INT) d
4347 : - (unsigned HOST_WIDE_INT) d);
4349 /* n rem d = n rem -d */
4350 if (rem_flag && d < 0)
4352 d = abs_d;
4353 op1 = gen_int_mode (abs_d, compute_mode);
4356 if (d == 1)
4357 quotient = op0;
4358 else if (d == -1)
4359 quotient = expand_unop (compute_mode, neg_optab, op0,
4360 tquotient, 0);
4361 else if (HOST_BITS_PER_WIDE_INT >= size
4362 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4364 /* This case is not handled correctly below. */
4365 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4366 compute_mode, 1, 1);
4367 if (quotient == 0)
4368 goto fail1;
4370 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4371 && (rem_flag
4372 ? smod_pow2_cheap (speed, compute_mode)
4373 : sdiv_pow2_cheap (speed, compute_mode))
4374 /* We assume that cheap metric is true if the
4375 optab has an expander for this mode. */
4376 && ((optab_handler ((rem_flag ? smod_optab
4377 : sdiv_optab),
4378 compute_mode)
4379 != CODE_FOR_nothing)
4380 || (optab_handler (sdivmod_optab,
4381 compute_mode)
4382 != CODE_FOR_nothing)))
4384 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4386 if (rem_flag)
4388 remainder = expand_smod_pow2 (compute_mode, op0, d);
4389 if (remainder)
4390 return gen_lowpart (mode, remainder);
4393 if (sdiv_pow2_cheap (speed, compute_mode)
4394 && ((optab_handler (sdiv_optab, compute_mode)
4395 != CODE_FOR_nothing)
4396 || (optab_handler (sdivmod_optab, compute_mode)
4397 != CODE_FOR_nothing)))
4398 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4399 compute_mode, op0,
4400 gen_int_mode (abs_d,
4401 compute_mode),
4402 NULL_RTX, 0);
4403 else
4404 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4406 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4407 negate the quotient. */
4408 if (d < 0)
4410 insn = get_last_insn ();
4411 if (insn != last
4412 && abs_d < ((unsigned HOST_WIDE_INT) 1
4413 << (HOST_BITS_PER_WIDE_INT - 1)))
4414 set_dst_reg_note (insn, REG_EQUAL,
4415 gen_rtx_DIV (compute_mode, op0,
4416 gen_int_mode
4417 (abs_d,
4418 compute_mode)),
4419 quotient);
4421 quotient = expand_unop (compute_mode, neg_optab,
4422 quotient, quotient, 0);
4425 else if (size <= HOST_BITS_PER_WIDE_INT)
4427 choose_multiplier (abs_d, size, size - 1,
4428 &ml, &post_shift, &lgup);
4429 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4431 rtx t1, t2, t3;
4433 if (post_shift >= BITS_PER_WORD
4434 || size - 1 >= BITS_PER_WORD)
4435 goto fail1;
4437 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4438 + shift_cost (speed, compute_mode, size - 1)
4439 + add_cost (speed, compute_mode));
4440 t1 = expmed_mult_highpart
4441 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4442 NULL_RTX, 0, max_cost - extra_cost);
4443 if (t1 == 0)
4444 goto fail1;
4445 t2 = expand_shift
4446 (RSHIFT_EXPR, compute_mode, t1,
4447 post_shift, NULL_RTX, 0);
4448 t3 = expand_shift
4449 (RSHIFT_EXPR, compute_mode, op0,
4450 size - 1, NULL_RTX, 0);
4451 if (d < 0)
4452 quotient
4453 = force_operand (gen_rtx_MINUS (compute_mode,
4454 t3, t2),
4455 tquotient);
4456 else
4457 quotient
4458 = force_operand (gen_rtx_MINUS (compute_mode,
4459 t2, t3),
4460 tquotient);
4462 else
4464 rtx t1, t2, t3, t4;
4466 if (post_shift >= BITS_PER_WORD
4467 || size - 1 >= BITS_PER_WORD)
4468 goto fail1;
4470 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4471 mlr = gen_int_mode (ml, compute_mode);
4472 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4473 + shift_cost (speed, compute_mode, size - 1)
4474 + 2 * add_cost (speed, compute_mode));
4475 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4476 NULL_RTX, 0,
4477 max_cost - extra_cost);
4478 if (t1 == 0)
4479 goto fail1;
4480 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4481 t1, op0),
4482 NULL_RTX);
4483 t3 = expand_shift
4484 (RSHIFT_EXPR, compute_mode, t2,
4485 post_shift, NULL_RTX, 0);
4486 t4 = expand_shift
4487 (RSHIFT_EXPR, compute_mode, op0,
4488 size - 1, NULL_RTX, 0);
4489 if (d < 0)
4490 quotient
4491 = force_operand (gen_rtx_MINUS (compute_mode,
4492 t4, t3),
4493 tquotient);
4494 else
4495 quotient
4496 = force_operand (gen_rtx_MINUS (compute_mode,
4497 t3, t4),
4498 tquotient);
4501 else /* Too wide mode to use tricky code */
4502 break;
4504 insn = get_last_insn ();
4505 if (insn != last)
4506 set_dst_reg_note (insn, REG_EQUAL,
4507 gen_rtx_DIV (compute_mode, op0, op1),
4508 quotient);
4510 break;
4512 fail1:
4513 delete_insns_since (last);
4514 break;
4516 case FLOOR_DIV_EXPR:
4517 case FLOOR_MOD_EXPR:
4518 /* We will come here only for signed operations. */
4519 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4521 unsigned HOST_WIDE_INT mh, ml;
4522 int pre_shift, lgup, post_shift;
4523 HOST_WIDE_INT d = INTVAL (op1);
4525 if (d > 0)
4527 /* We could just as easily deal with negative constants here,
4528 but it does not seem worth the trouble for GCC 2.6. */
4529 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4531 pre_shift = floor_log2 (d);
4532 if (rem_flag)
4534 unsigned HOST_WIDE_INT mask
4535 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4536 remainder = expand_binop
4537 (compute_mode, and_optab, op0,
4538 gen_int_mode (mask, compute_mode),
4539 remainder, 0, OPTAB_LIB_WIDEN);
4540 if (remainder)
4541 return gen_lowpart (mode, remainder);
4543 quotient = expand_shift
4544 (RSHIFT_EXPR, compute_mode, op0,
4545 pre_shift, tquotient, 0);
4547 else
4549 rtx t1, t2, t3, t4;
4551 mh = choose_multiplier (d, size, size - 1,
4552 &ml, &post_shift, &lgup);
4553 gcc_assert (!mh);
4555 if (post_shift < BITS_PER_WORD
4556 && size - 1 < BITS_PER_WORD)
4558 t1 = expand_shift
4559 (RSHIFT_EXPR, compute_mode, op0,
4560 size - 1, NULL_RTX, 0);
4561 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4562 NULL_RTX, 0, OPTAB_WIDEN);
4563 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4564 + shift_cost (speed, compute_mode, size - 1)
4565 + 2 * add_cost (speed, compute_mode));
4566 t3 = expmed_mult_highpart
4567 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4568 NULL_RTX, 1, max_cost - extra_cost);
4569 if (t3 != 0)
4571 t4 = expand_shift
4572 (RSHIFT_EXPR, compute_mode, t3,
4573 post_shift, NULL_RTX, 1);
4574 quotient = expand_binop (compute_mode, xor_optab,
4575 t4, t1, tquotient, 0,
4576 OPTAB_WIDEN);
4581 else
4583 rtx nsign, t1, t2, t3, t4;
4584 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4585 op0, constm1_rtx), NULL_RTX);
4586 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4587 0, OPTAB_WIDEN);
4588 nsign = expand_shift
4589 (RSHIFT_EXPR, compute_mode, t2,
4590 size - 1, NULL_RTX, 0);
4591 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4592 NULL_RTX);
4593 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4594 NULL_RTX, 0);
4595 if (t4)
4597 rtx t5;
4598 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4599 NULL_RTX, 0);
4600 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4601 t4, t5),
4602 tquotient);
4607 if (quotient != 0)
4608 break;
4609 delete_insns_since (last);
4611 /* Try using an instruction that produces both the quotient and
4612 remainder, using truncation. We can easily compensate the quotient
4613 or remainder to get floor rounding, once we have the remainder.
4614 Notice that we compute also the final remainder value here,
4615 and return the result right away. */
4616 if (target == 0 || GET_MODE (target) != compute_mode)
4617 target = gen_reg_rtx (compute_mode);
4619 if (rem_flag)
4621 remainder
4622 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4623 quotient = gen_reg_rtx (compute_mode);
4625 else
4627 quotient
4628 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4629 remainder = gen_reg_rtx (compute_mode);
4632 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4633 quotient, remainder, 0))
4635 /* This could be computed with a branch-less sequence.
4636 Save that for later. */
4637 rtx tem;
4638 rtx_code_label *label = gen_label_rtx ();
4639 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4640 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4641 NULL_RTX, 0, OPTAB_WIDEN);
4642 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4643 expand_dec (quotient, const1_rtx);
4644 expand_inc (remainder, op1);
4645 emit_label (label);
4646 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4649 /* No luck with division elimination or divmod. Have to do it
4650 by conditionally adjusting op0 *and* the result. */
4652 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4653 rtx adjusted_op0;
4654 rtx tem;
4656 quotient = gen_reg_rtx (compute_mode);
4657 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4658 label1 = gen_label_rtx ();
4659 label2 = gen_label_rtx ();
4660 label3 = gen_label_rtx ();
4661 label4 = gen_label_rtx ();
4662 label5 = gen_label_rtx ();
4663 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4664 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4665 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4666 quotient, 0, OPTAB_LIB_WIDEN);
4667 if (tem != quotient)
4668 emit_move_insn (quotient, tem);
4669 emit_jump_insn (targetm.gen_jump (label5));
4670 emit_barrier ();
4671 emit_label (label1);
4672 expand_inc (adjusted_op0, const1_rtx);
4673 emit_jump_insn (targetm.gen_jump (label4));
4674 emit_barrier ();
4675 emit_label (label2);
4676 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4677 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4678 quotient, 0, OPTAB_LIB_WIDEN);
4679 if (tem != quotient)
4680 emit_move_insn (quotient, tem);
4681 emit_jump_insn (targetm.gen_jump (label5));
4682 emit_barrier ();
4683 emit_label (label3);
4684 expand_dec (adjusted_op0, const1_rtx);
4685 emit_label (label4);
4686 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4687 quotient, 0, OPTAB_LIB_WIDEN);
4688 if (tem != quotient)
4689 emit_move_insn (quotient, tem);
4690 expand_dec (quotient, const1_rtx);
4691 emit_label (label5);
4693 break;
4695 case CEIL_DIV_EXPR:
4696 case CEIL_MOD_EXPR:
4697 if (unsignedp)
4699 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4701 rtx t1, t2, t3;
4702 unsigned HOST_WIDE_INT d = INTVAL (op1);
4703 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4704 floor_log2 (d), tquotient, 1);
4705 t2 = expand_binop (compute_mode, and_optab, op0,
4706 gen_int_mode (d - 1, compute_mode),
4707 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4708 t3 = gen_reg_rtx (compute_mode);
4709 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4710 compute_mode, 1, 1);
4711 if (t3 == 0)
4713 rtx_code_label *lab;
4714 lab = gen_label_rtx ();
4715 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4716 expand_inc (t1, const1_rtx);
4717 emit_label (lab);
4718 quotient = t1;
4720 else
4721 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4722 t1, t3),
4723 tquotient);
4724 break;
4727 /* Try using an instruction that produces both the quotient and
4728 remainder, using truncation. We can easily compensate the
4729 quotient or remainder to get ceiling rounding, once we have the
4730 remainder. Notice that we compute also the final remainder
4731 value here, and return the result right away. */
4732 if (target == 0 || GET_MODE (target) != compute_mode)
4733 target = gen_reg_rtx (compute_mode);
4735 if (rem_flag)
4737 remainder = (REG_P (target)
4738 ? target : gen_reg_rtx (compute_mode));
4739 quotient = gen_reg_rtx (compute_mode);
4741 else
4743 quotient = (REG_P (target)
4744 ? target : gen_reg_rtx (compute_mode));
4745 remainder = gen_reg_rtx (compute_mode);
4748 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4749 remainder, 1))
4751 /* This could be computed with a branch-less sequence.
4752 Save that for later. */
4753 rtx_code_label *label = gen_label_rtx ();
4754 do_cmp_and_jump (remainder, const0_rtx, EQ,
4755 compute_mode, label);
4756 expand_inc (quotient, const1_rtx);
4757 expand_dec (remainder, op1);
4758 emit_label (label);
4759 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4762 /* No luck with division elimination or divmod. Have to do it
4763 by conditionally adjusting op0 *and* the result. */
4765 rtx_code_label *label1, *label2;
4766 rtx adjusted_op0, tem;
4768 quotient = gen_reg_rtx (compute_mode);
4769 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4770 label1 = gen_label_rtx ();
4771 label2 = gen_label_rtx ();
4772 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4773 compute_mode, label1);
4774 emit_move_insn (quotient, const0_rtx);
4775 emit_jump_insn (targetm.gen_jump (label2));
4776 emit_barrier ();
4777 emit_label (label1);
4778 expand_dec (adjusted_op0, const1_rtx);
4779 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4780 quotient, 1, OPTAB_LIB_WIDEN);
4781 if (tem != quotient)
4782 emit_move_insn (quotient, tem);
4783 expand_inc (quotient, const1_rtx);
4784 emit_label (label2);
4787 else /* signed */
4789 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4790 && INTVAL (op1) >= 0)
4792 /* This is extremely similar to the code for the unsigned case
4793 above. For 2.7 we should merge these variants, but for
4794 2.6.1 I don't want to touch the code for unsigned since that
4795 get used in C. The signed case will only be used by other
4796 languages (Ada). */
4798 rtx t1, t2, t3;
4799 unsigned HOST_WIDE_INT d = INTVAL (op1);
4800 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4801 floor_log2 (d), tquotient, 0);
4802 t2 = expand_binop (compute_mode, and_optab, op0,
4803 gen_int_mode (d - 1, compute_mode),
4804 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4805 t3 = gen_reg_rtx (compute_mode);
4806 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4807 compute_mode, 1, 1);
4808 if (t3 == 0)
4810 rtx_code_label *lab;
4811 lab = gen_label_rtx ();
4812 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4813 expand_inc (t1, const1_rtx);
4814 emit_label (lab);
4815 quotient = t1;
4817 else
4818 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4819 t1, t3),
4820 tquotient);
4821 break;
4824 /* Try using an instruction that produces both the quotient and
4825 remainder, using truncation. We can easily compensate the
4826 quotient or remainder to get ceiling rounding, once we have the
4827 remainder. Notice that we compute also the final remainder
4828 value here, and return the result right away. */
4829 if (target == 0 || GET_MODE (target) != compute_mode)
4830 target = gen_reg_rtx (compute_mode);
4831 if (rem_flag)
4833 remainder= (REG_P (target)
4834 ? target : gen_reg_rtx (compute_mode));
4835 quotient = gen_reg_rtx (compute_mode);
4837 else
4839 quotient = (REG_P (target)
4840 ? target : gen_reg_rtx (compute_mode));
4841 remainder = gen_reg_rtx (compute_mode);
4844 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4845 remainder, 0))
4847 /* This could be computed with a branch-less sequence.
4848 Save that for later. */
4849 rtx tem;
4850 rtx_code_label *label = gen_label_rtx ();
4851 do_cmp_and_jump (remainder, const0_rtx, EQ,
4852 compute_mode, label);
4853 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4854 NULL_RTX, 0, OPTAB_WIDEN);
4855 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4856 expand_inc (quotient, const1_rtx);
4857 expand_dec (remainder, op1);
4858 emit_label (label);
4859 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4862 /* No luck with division elimination or divmod. Have to do it
4863 by conditionally adjusting op0 *and* the result. */
4865 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4866 rtx adjusted_op0;
4867 rtx tem;
4869 quotient = gen_reg_rtx (compute_mode);
4870 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4871 label1 = gen_label_rtx ();
4872 label2 = gen_label_rtx ();
4873 label3 = gen_label_rtx ();
4874 label4 = gen_label_rtx ();
4875 label5 = gen_label_rtx ();
4876 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4877 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4878 compute_mode, label1);
4879 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4880 quotient, 0, OPTAB_LIB_WIDEN);
4881 if (tem != quotient)
4882 emit_move_insn (quotient, tem);
4883 emit_jump_insn (targetm.gen_jump (label5));
4884 emit_barrier ();
4885 emit_label (label1);
4886 expand_dec (adjusted_op0, const1_rtx);
4887 emit_jump_insn (targetm.gen_jump (label4));
4888 emit_barrier ();
4889 emit_label (label2);
4890 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4891 compute_mode, label3);
4892 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4893 quotient, 0, OPTAB_LIB_WIDEN);
4894 if (tem != quotient)
4895 emit_move_insn (quotient, tem);
4896 emit_jump_insn (targetm.gen_jump (label5));
4897 emit_barrier ();
4898 emit_label (label3);
4899 expand_inc (adjusted_op0, const1_rtx);
4900 emit_label (label4);
4901 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4902 quotient, 0, OPTAB_LIB_WIDEN);
4903 if (tem != quotient)
4904 emit_move_insn (quotient, tem);
4905 expand_inc (quotient, const1_rtx);
4906 emit_label (label5);
4909 break;
4911 case EXACT_DIV_EXPR:
4912 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4914 HOST_WIDE_INT d = INTVAL (op1);
4915 unsigned HOST_WIDE_INT ml;
4916 int pre_shift;
4917 rtx t1;
4919 pre_shift = floor_log2 (d & -d);
4920 ml = invert_mod2n (d >> pre_shift, size);
4921 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4922 pre_shift, NULL_RTX, unsignedp);
4923 quotient = expand_mult (compute_mode, t1,
4924 gen_int_mode (ml, compute_mode),
4925 NULL_RTX, 1);
4927 insn = get_last_insn ();
4928 set_dst_reg_note (insn, REG_EQUAL,
4929 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4930 compute_mode, op0, op1),
4931 quotient);
4933 break;
4935 case ROUND_DIV_EXPR:
4936 case ROUND_MOD_EXPR:
4937 if (unsignedp)
4939 rtx tem;
4940 rtx_code_label *label;
4941 label = gen_label_rtx ();
4942 quotient = gen_reg_rtx (compute_mode);
4943 remainder = gen_reg_rtx (compute_mode);
4944 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4946 rtx tem;
4947 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4948 quotient, 1, OPTAB_LIB_WIDEN);
4949 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4950 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4951 remainder, 1, OPTAB_LIB_WIDEN);
4953 tem = plus_constant (compute_mode, op1, -1);
4954 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4955 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4956 expand_inc (quotient, const1_rtx);
4957 expand_dec (remainder, op1);
4958 emit_label (label);
4960 else
4962 rtx abs_rem, abs_op1, tem, mask;
4963 rtx_code_label *label;
4964 label = gen_label_rtx ();
4965 quotient = gen_reg_rtx (compute_mode);
4966 remainder = gen_reg_rtx (compute_mode);
4967 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4969 rtx tem;
4970 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4971 quotient, 0, OPTAB_LIB_WIDEN);
4972 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4973 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4974 remainder, 0, OPTAB_LIB_WIDEN);
4976 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4977 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4978 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4979 1, NULL_RTX, 1);
4980 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4981 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4982 NULL_RTX, 0, OPTAB_WIDEN);
4983 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4984 size - 1, NULL_RTX, 0);
4985 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4986 NULL_RTX, 0, OPTAB_WIDEN);
4987 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4988 NULL_RTX, 0, OPTAB_WIDEN);
4989 expand_inc (quotient, tem);
4990 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4991 NULL_RTX, 0, OPTAB_WIDEN);
4992 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4993 NULL_RTX, 0, OPTAB_WIDEN);
4994 expand_dec (remainder, tem);
4995 emit_label (label);
4997 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4999 default:
5000 gcc_unreachable ();
5003 if (quotient == 0)
5005 if (target && GET_MODE (target) != compute_mode)
5006 target = 0;
5008 if (rem_flag)
5010 /* Try to produce the remainder without producing the quotient.
5011 If we seem to have a divmod pattern that does not require widening,
5012 don't try widening here. We should really have a WIDEN argument
5013 to expand_twoval_binop, since what we'd really like to do here is
5014 1) try a mod insn in compute_mode
5015 2) try a divmod insn in compute_mode
5016 3) try a div insn in compute_mode and multiply-subtract to get
5017 remainder
5018 4) try the same things with widening allowed. */
5019 remainder
5020 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5021 op0, op1, target,
5022 unsignedp,
5023 ((optab_handler (optab2, compute_mode)
5024 != CODE_FOR_nothing)
5025 ? OPTAB_DIRECT : OPTAB_WIDEN));
5026 if (remainder == 0)
5028 /* No luck there. Can we do remainder and divide at once
5029 without a library call? */
5030 remainder = gen_reg_rtx (compute_mode);
5031 if (! expand_twoval_binop ((unsignedp
5032 ? udivmod_optab
5033 : sdivmod_optab),
5034 op0, op1,
5035 NULL_RTX, remainder, unsignedp))
5036 remainder = 0;
5039 if (remainder)
5040 return gen_lowpart (mode, remainder);
5043 /* Produce the quotient. Try a quotient insn, but not a library call.
5044 If we have a divmod in this mode, use it in preference to widening
5045 the div (for this test we assume it will not fail). Note that optab2
5046 is set to the one of the two optabs that the call below will use. */
5047 quotient
5048 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5049 op0, op1, rem_flag ? NULL_RTX : target,
5050 unsignedp,
5051 ((optab_handler (optab2, compute_mode)
5052 != CODE_FOR_nothing)
5053 ? OPTAB_DIRECT : OPTAB_WIDEN));
5055 if (quotient == 0)
5057 /* No luck there. Try a quotient-and-remainder insn,
5058 keeping the quotient alone. */
5059 quotient = gen_reg_rtx (compute_mode);
5060 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5061 op0, op1,
5062 quotient, NULL_RTX, unsignedp))
5064 quotient = 0;
5065 if (! rem_flag)
5066 /* Still no luck. If we are not computing the remainder,
5067 use a library call for the quotient. */
5068 quotient = sign_expand_binop (compute_mode,
5069 udiv_optab, sdiv_optab,
5070 op0, op1, target,
5071 unsignedp, OPTAB_LIB_WIDEN);
5076 if (rem_flag)
5078 if (target && GET_MODE (target) != compute_mode)
5079 target = 0;
5081 if (quotient == 0)
5083 /* No divide instruction either. Use library for remainder. */
5084 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5085 op0, op1, target,
5086 unsignedp, OPTAB_LIB_WIDEN);
5087 /* No remainder function. Try a quotient-and-remainder
5088 function, keeping the remainder. */
5089 if (!remainder)
5091 remainder = gen_reg_rtx (compute_mode);
5092 if (!expand_twoval_binop_libfunc
5093 (unsignedp ? udivmod_optab : sdivmod_optab,
5094 op0, op1,
5095 NULL_RTX, remainder,
5096 unsignedp ? UMOD : MOD))
5097 remainder = NULL_RTX;
5100 else
5102 /* We divided. Now finish doing X - Y * (X / Y). */
5103 remainder = expand_mult (compute_mode, quotient, op1,
5104 NULL_RTX, unsignedp);
5105 remainder = expand_binop (compute_mode, sub_optab, op0,
5106 remainder, target, unsignedp,
5107 OPTAB_LIB_WIDEN);
5111 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5114 /* Return a tree node with data type TYPE, describing the value of X.
5115 Usually this is an VAR_DECL, if there is no obvious better choice.
5116 X may be an expression, however we only support those expressions
5117 generated by loop.c. */
5119 tree
5120 make_tree (tree type, rtx x)
5122 tree t;
5124 switch (GET_CODE (x))
5126 case CONST_INT:
5127 case CONST_WIDE_INT:
5128 t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5129 return t;
5131 case CONST_DOUBLE:
5132 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5133 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5134 t = wide_int_to_tree (type,
5135 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5136 HOST_BITS_PER_WIDE_INT * 2));
5137 else
5138 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5140 return t;
5142 case CONST_VECTOR:
5144 int units = CONST_VECTOR_NUNITS (x);
5145 tree itype = TREE_TYPE (type);
5146 tree *elts;
5147 int i;
5149 /* Build a tree with vector elements. */
5150 elts = XALLOCAVEC (tree, units);
5151 for (i = units - 1; i >= 0; --i)
5153 rtx elt = CONST_VECTOR_ELT (x, i);
5154 elts[i] = make_tree (itype, elt);
5157 return build_vector (type, elts);
5160 case PLUS:
5161 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5162 make_tree (type, XEXP (x, 1)));
5164 case MINUS:
5165 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5166 make_tree (type, XEXP (x, 1)));
5168 case NEG:
5169 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5171 case MULT:
5172 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5173 make_tree (type, XEXP (x, 1)));
5175 case ASHIFT:
5176 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5177 make_tree (type, XEXP (x, 1)));
5179 case LSHIFTRT:
5180 t = unsigned_type_for (type);
5181 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5182 make_tree (t, XEXP (x, 0)),
5183 make_tree (type, XEXP (x, 1))));
5185 case ASHIFTRT:
5186 t = signed_type_for (type);
5187 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5188 make_tree (t, XEXP (x, 0)),
5189 make_tree (type, XEXP (x, 1))));
5191 case DIV:
5192 if (TREE_CODE (type) != REAL_TYPE)
5193 t = signed_type_for (type);
5194 else
5195 t = type;
5197 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5198 make_tree (t, XEXP (x, 0)),
5199 make_tree (t, XEXP (x, 1))));
5200 case UDIV:
5201 t = unsigned_type_for (type);
5202 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5203 make_tree (t, XEXP (x, 0)),
5204 make_tree (t, XEXP (x, 1))));
5206 case SIGN_EXTEND:
5207 case ZERO_EXTEND:
5208 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5209 GET_CODE (x) == ZERO_EXTEND);
5210 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5212 case CONST:
5213 return make_tree (type, XEXP (x, 0));
5215 case SYMBOL_REF:
5216 t = SYMBOL_REF_DECL (x);
5217 if (t)
5218 return fold_convert (type, build_fold_addr_expr (t));
5219 /* else fall through. */
5221 default:
5222 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5224 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5225 address mode to pointer mode. */
5226 if (POINTER_TYPE_P (type))
5227 x = convert_memory_address_addr_space
5228 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5230 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5231 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5232 t->decl_with_rtl.rtl = x;
5234 return t;
5238 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5239 and returning TARGET.
5241 If TARGET is 0, a pseudo-register or constant is returned. */
5244 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5246 rtx tem = 0;
5248 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5249 tem = simplify_binary_operation (AND, mode, op0, op1);
5250 if (tem == 0)
5251 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5253 if (target == 0)
5254 target = tem;
5255 else if (tem != target)
5256 emit_move_insn (target, tem);
5257 return target;
5260 /* Helper function for emit_store_flag. */
5262 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5263 machine_mode mode, machine_mode compare_mode,
5264 int unsignedp, rtx x, rtx y, int normalizep,
5265 machine_mode target_mode)
5267 struct expand_operand ops[4];
5268 rtx op0, comparison, subtarget;
5269 rtx_insn *last;
5270 machine_mode result_mode = targetm.cstore_mode (icode);
5272 last = get_last_insn ();
5273 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5274 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5275 if (!x || !y)
5277 delete_insns_since (last);
5278 return NULL_RTX;
5281 if (target_mode == VOIDmode)
5282 target_mode = result_mode;
5283 if (!target)
5284 target = gen_reg_rtx (target_mode);
5286 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5288 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5289 create_fixed_operand (&ops[1], comparison);
5290 create_fixed_operand (&ops[2], x);
5291 create_fixed_operand (&ops[3], y);
5292 if (!maybe_expand_insn (icode, 4, ops))
5294 delete_insns_since (last);
5295 return NULL_RTX;
5297 subtarget = ops[0].value;
5299 /* If we are converting to a wider mode, first convert to
5300 TARGET_MODE, then normalize. This produces better combining
5301 opportunities on machines that have a SIGN_EXTRACT when we are
5302 testing a single bit. This mostly benefits the 68k.
5304 If STORE_FLAG_VALUE does not have the sign bit set when
5305 interpreted in MODE, we can do this conversion as unsigned, which
5306 is usually more efficient. */
5307 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5309 convert_move (target, subtarget,
5310 val_signbit_known_clear_p (result_mode,
5311 STORE_FLAG_VALUE));
5312 op0 = target;
5313 result_mode = target_mode;
5315 else
5316 op0 = subtarget;
5318 /* If we want to keep subexpressions around, don't reuse our last
5319 target. */
5320 if (optimize)
5321 subtarget = 0;
5323 /* Now normalize to the proper value in MODE. Sometimes we don't
5324 have to do anything. */
5325 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5327 /* STORE_FLAG_VALUE might be the most negative number, so write
5328 the comparison this way to avoid a compiler-time warning. */
5329 else if (- normalizep == STORE_FLAG_VALUE)
5330 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5332 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5333 it hard to use a value of just the sign bit due to ANSI integer
5334 constant typing rules. */
5335 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5336 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5337 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5338 normalizep == 1);
5339 else
5341 gcc_assert (STORE_FLAG_VALUE & 1);
5343 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5344 if (normalizep == -1)
5345 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5348 /* If we were converting to a smaller mode, do the conversion now. */
5349 if (target_mode != result_mode)
5351 convert_move (target, op0, 0);
5352 return target;
5354 else
5355 return op0;
5359 /* A subroutine of emit_store_flag only including "tricks" that do not
5360 need a recursive call. These are kept separate to avoid infinite
5361 loops. */
5363 static rtx
5364 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5365 machine_mode mode, int unsignedp, int normalizep,
5366 machine_mode target_mode)
5368 rtx subtarget;
5369 enum insn_code icode;
5370 machine_mode compare_mode;
5371 enum mode_class mclass;
5372 enum rtx_code scode;
5374 if (unsignedp)
5375 code = unsigned_condition (code);
5376 scode = swap_condition (code);
5378 /* If one operand is constant, make it the second one. Only do this
5379 if the other operand is not constant as well. */
5381 if (swap_commutative_operands_p (op0, op1))
5383 std::swap (op0, op1);
5384 code = swap_condition (code);
5387 if (mode == VOIDmode)
5388 mode = GET_MODE (op0);
5390 /* For some comparisons with 1 and -1, we can convert this to
5391 comparisons with zero. This will often produce more opportunities for
5392 store-flag insns. */
5394 switch (code)
5396 case LT:
5397 if (op1 == const1_rtx)
5398 op1 = const0_rtx, code = LE;
5399 break;
5400 case LE:
5401 if (op1 == constm1_rtx)
5402 op1 = const0_rtx, code = LT;
5403 break;
5404 case GE:
5405 if (op1 == const1_rtx)
5406 op1 = const0_rtx, code = GT;
5407 break;
5408 case GT:
5409 if (op1 == constm1_rtx)
5410 op1 = const0_rtx, code = GE;
5411 break;
5412 case GEU:
5413 if (op1 == const1_rtx)
5414 op1 = const0_rtx, code = NE;
5415 break;
5416 case LTU:
5417 if (op1 == const1_rtx)
5418 op1 = const0_rtx, code = EQ;
5419 break;
5420 default:
5421 break;
5424 /* If we are comparing a double-word integer with zero or -1, we can
5425 convert the comparison into one involving a single word. */
5426 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5427 && GET_MODE_CLASS (mode) == MODE_INT
5428 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5430 rtx tem;
5431 if ((code == EQ || code == NE)
5432 && (op1 == const0_rtx || op1 == constm1_rtx))
5434 rtx op00, op01;
5436 /* Do a logical OR or AND of the two words and compare the
5437 result. */
5438 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5439 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5440 tem = expand_binop (word_mode,
5441 op1 == const0_rtx ? ior_optab : and_optab,
5442 op00, op01, NULL_RTX, unsignedp,
5443 OPTAB_DIRECT);
5445 if (tem != 0)
5446 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5447 unsignedp, normalizep);
5449 else if ((code == LT || code == GE) && op1 == const0_rtx)
5451 rtx op0h;
5453 /* If testing the sign bit, can just test on high word. */
5454 op0h = simplify_gen_subreg (word_mode, op0, mode,
5455 subreg_highpart_offset (word_mode,
5456 mode));
5457 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5458 unsignedp, normalizep);
5460 else
5461 tem = NULL_RTX;
5463 if (tem)
5465 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5466 return tem;
5467 if (!target)
5468 target = gen_reg_rtx (target_mode);
5470 convert_move (target, tem,
5471 !val_signbit_known_set_p (word_mode,
5472 (normalizep ? normalizep
5473 : STORE_FLAG_VALUE)));
5474 return target;
5478 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5479 complement of A (for GE) and shifting the sign bit to the low bit. */
5480 if (op1 == const0_rtx && (code == LT || code == GE)
5481 && GET_MODE_CLASS (mode) == MODE_INT
5482 && (normalizep || STORE_FLAG_VALUE == 1
5483 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5485 subtarget = target;
5487 if (!target)
5488 target_mode = mode;
5490 /* If the result is to be wider than OP0, it is best to convert it
5491 first. If it is to be narrower, it is *incorrect* to convert it
5492 first. */
5493 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5495 op0 = convert_modes (target_mode, mode, op0, 0);
5496 mode = target_mode;
5499 if (target_mode != mode)
5500 subtarget = 0;
5502 if (code == GE)
5503 op0 = expand_unop (mode, one_cmpl_optab, op0,
5504 ((STORE_FLAG_VALUE == 1 || normalizep)
5505 ? 0 : subtarget), 0);
5507 if (STORE_FLAG_VALUE == 1 || normalizep)
5508 /* If we are supposed to produce a 0/1 value, we want to do
5509 a logical shift from the sign bit to the low-order bit; for
5510 a -1/0 value, we do an arithmetic shift. */
5511 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5512 GET_MODE_BITSIZE (mode) - 1,
5513 subtarget, normalizep != -1);
5515 if (mode != target_mode)
5516 op0 = convert_modes (target_mode, mode, op0, 0);
5518 return op0;
5521 mclass = GET_MODE_CLASS (mode);
5522 for (compare_mode = mode; compare_mode != VOIDmode;
5523 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5525 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5526 icode = optab_handler (cstore_optab, optab_mode);
5527 if (icode != CODE_FOR_nothing)
5529 do_pending_stack_adjust ();
5530 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5531 unsignedp, op0, op1, normalizep, target_mode);
5532 if (tem)
5533 return tem;
5535 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5537 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5538 unsignedp, op1, op0, normalizep, target_mode);
5539 if (tem)
5540 return tem;
5542 break;
5546 return 0;
5549 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5550 and storing in TARGET. Normally return TARGET.
5551 Return 0 if that cannot be done.
5553 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5554 it is VOIDmode, they cannot both be CONST_INT.
5556 UNSIGNEDP is for the case where we have to widen the operands
5557 to perform the operation. It says to use zero-extension.
5559 NORMALIZEP is 1 if we should convert the result to be either zero
5560 or one. Normalize is -1 if we should convert the result to be
5561 either zero or -1. If NORMALIZEP is zero, the result will be left
5562 "raw" out of the scc insn. */
5565 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5566 machine_mode mode, int unsignedp, int normalizep)
5568 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5569 enum rtx_code rcode;
5570 rtx subtarget;
5571 rtx tem, trueval;
5572 rtx_insn *last;
5574 /* If we compare constants, we shouldn't use a store-flag operation,
5575 but a constant load. We can get there via the vanilla route that
5576 usually generates a compare-branch sequence, but will in this case
5577 fold the comparison to a constant, and thus elide the branch. */
5578 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5579 return NULL_RTX;
5581 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5582 target_mode);
5583 if (tem)
5584 return tem;
5586 /* If we reached here, we can't do this with a scc insn, however there
5587 are some comparisons that can be done in other ways. Don't do any
5588 of these cases if branches are very cheap. */
5589 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5590 return 0;
5592 /* See what we need to return. We can only return a 1, -1, or the
5593 sign bit. */
5595 if (normalizep == 0)
5597 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5598 normalizep = STORE_FLAG_VALUE;
5600 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5602 else
5603 return 0;
5606 last = get_last_insn ();
5608 /* If optimizing, use different pseudo registers for each insn, instead
5609 of reusing the same pseudo. This leads to better CSE, but slows
5610 down the compiler, since there are more pseudos */
5611 subtarget = (!optimize
5612 && (target_mode == mode)) ? target : NULL_RTX;
5613 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5615 /* For floating-point comparisons, try the reverse comparison or try
5616 changing the "orderedness" of the comparison. */
5617 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5619 enum rtx_code first_code;
5620 bool and_them;
5622 rcode = reverse_condition_maybe_unordered (code);
5623 if (can_compare_p (rcode, mode, ccp_store_flag)
5624 && (code == ORDERED || code == UNORDERED
5625 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5626 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5628 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5629 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5631 /* For the reverse comparison, use either an addition or a XOR. */
5632 if (want_add
5633 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5634 optimize_insn_for_speed_p ()) == 0)
5636 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5637 STORE_FLAG_VALUE, target_mode);
5638 if (tem)
5639 return expand_binop (target_mode, add_optab, tem,
5640 gen_int_mode (normalizep, target_mode),
5641 target, 0, OPTAB_WIDEN);
5643 else if (!want_add
5644 && rtx_cost (trueval, mode, XOR, 1,
5645 optimize_insn_for_speed_p ()) == 0)
5647 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5648 normalizep, target_mode);
5649 if (tem)
5650 return expand_binop (target_mode, xor_optab, tem, trueval,
5651 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5655 delete_insns_since (last);
5657 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5658 if (code == ORDERED || code == UNORDERED)
5659 return 0;
5661 and_them = split_comparison (code, mode, &first_code, &code);
5663 /* If there are no NaNs, the first comparison should always fall through.
5664 Effectively change the comparison to the other one. */
5665 if (!HONOR_NANS (mode))
5667 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5668 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5669 target_mode);
5672 if (!HAVE_conditional_move)
5673 return 0;
5675 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5676 conditional move. */
5677 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5678 normalizep, target_mode);
5679 if (tem == 0)
5680 return 0;
5682 if (and_them)
5683 tem = emit_conditional_move (target, code, op0, op1, mode,
5684 tem, const0_rtx, GET_MODE (tem), 0);
5685 else
5686 tem = emit_conditional_move (target, code, op0, op1, mode,
5687 trueval, tem, GET_MODE (tem), 0);
5689 if (tem == 0)
5690 delete_insns_since (last);
5691 return tem;
5694 /* The remaining tricks only apply to integer comparisons. */
5696 if (GET_MODE_CLASS (mode) != MODE_INT)
5697 return 0;
5699 /* If this is an equality comparison of integers, we can try to exclusive-or
5700 (or subtract) the two operands and use a recursive call to try the
5701 comparison with zero. Don't do any of these cases if branches are
5702 very cheap. */
5704 if ((code == EQ || code == NE) && op1 != const0_rtx)
5706 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5707 OPTAB_WIDEN);
5709 if (tem == 0)
5710 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5711 OPTAB_WIDEN);
5712 if (tem != 0)
5713 tem = emit_store_flag (target, code, tem, const0_rtx,
5714 mode, unsignedp, normalizep);
5715 if (tem != 0)
5716 return tem;
5718 delete_insns_since (last);
5721 /* For integer comparisons, try the reverse comparison. However, for
5722 small X and if we'd have anyway to extend, implementing "X != 0"
5723 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5724 rcode = reverse_condition (code);
5725 if (can_compare_p (rcode, mode, ccp_store_flag)
5726 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5727 && code == NE
5728 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5729 && op1 == const0_rtx))
5731 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5732 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5734 /* Again, for the reverse comparison, use either an addition or a XOR. */
5735 if (want_add
5736 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5737 optimize_insn_for_speed_p ()) == 0)
5739 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5740 STORE_FLAG_VALUE, target_mode);
5741 if (tem != 0)
5742 tem = expand_binop (target_mode, add_optab, tem,
5743 gen_int_mode (normalizep, target_mode),
5744 target, 0, OPTAB_WIDEN);
5746 else if (!want_add
5747 && rtx_cost (trueval, mode, XOR, 1,
5748 optimize_insn_for_speed_p ()) == 0)
5750 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5751 normalizep, target_mode);
5752 if (tem != 0)
5753 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5754 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5757 if (tem != 0)
5758 return tem;
5759 delete_insns_since (last);
5762 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5763 the constant zero. Reject all other comparisons at this point. Only
5764 do LE and GT if branches are expensive since they are expensive on
5765 2-operand machines. */
5767 if (op1 != const0_rtx
5768 || (code != EQ && code != NE
5769 && (BRANCH_COST (optimize_insn_for_speed_p (),
5770 false) <= 1 || (code != LE && code != GT))))
5771 return 0;
5773 /* Try to put the result of the comparison in the sign bit. Assume we can't
5774 do the necessary operation below. */
5776 tem = 0;
5778 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5779 the sign bit set. */
5781 if (code == LE)
5783 /* This is destructive, so SUBTARGET can't be OP0. */
5784 if (rtx_equal_p (subtarget, op0))
5785 subtarget = 0;
5787 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5788 OPTAB_WIDEN);
5789 if (tem)
5790 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5791 OPTAB_WIDEN);
5794 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5795 number of bits in the mode of OP0, minus one. */
5797 if (code == GT)
5799 if (rtx_equal_p (subtarget, op0))
5800 subtarget = 0;
5802 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5803 GET_MODE_BITSIZE (mode) - 1,
5804 subtarget, 0);
5805 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5806 OPTAB_WIDEN);
5809 if (code == EQ || code == NE)
5811 /* For EQ or NE, one way to do the comparison is to apply an operation
5812 that converts the operand into a positive number if it is nonzero
5813 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5814 for NE we negate. This puts the result in the sign bit. Then we
5815 normalize with a shift, if needed.
5817 Two operations that can do the above actions are ABS and FFS, so try
5818 them. If that doesn't work, and MODE is smaller than a full word,
5819 we can use zero-extension to the wider mode (an unsigned conversion)
5820 as the operation. */
5822 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5823 that is compensated by the subsequent overflow when subtracting
5824 one / negating. */
5826 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5827 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5828 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5829 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5830 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5832 tem = convert_modes (word_mode, mode, op0, 1);
5833 mode = word_mode;
5836 if (tem != 0)
5838 if (code == EQ)
5839 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5840 0, OPTAB_WIDEN);
5841 else
5842 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5845 /* If we couldn't do it that way, for NE we can "or" the two's complement
5846 of the value with itself. For EQ, we take the one's complement of
5847 that "or", which is an extra insn, so we only handle EQ if branches
5848 are expensive. */
5850 if (tem == 0
5851 && (code == NE
5852 || BRANCH_COST (optimize_insn_for_speed_p (),
5853 false) > 1))
5855 if (rtx_equal_p (subtarget, op0))
5856 subtarget = 0;
5858 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5859 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5860 OPTAB_WIDEN);
5862 if (tem && code == EQ)
5863 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5867 if (tem && normalizep)
5868 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5869 GET_MODE_BITSIZE (mode) - 1,
5870 subtarget, normalizep == 1);
5872 if (tem)
5874 if (!target)
5876 else if (GET_MODE (tem) != target_mode)
5878 convert_move (target, tem, 0);
5879 tem = target;
5881 else if (!subtarget)
5883 emit_move_insn (target, tem);
5884 tem = target;
5887 else
5888 delete_insns_since (last);
5890 return tem;
5893 /* Like emit_store_flag, but always succeeds. */
5896 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5897 machine_mode mode, int unsignedp, int normalizep)
5899 rtx tem;
5900 rtx_code_label *label;
5901 rtx trueval, falseval;
5903 /* First see if emit_store_flag can do the job. */
5904 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5905 if (tem != 0)
5906 return tem;
5908 if (!target)
5909 target = gen_reg_rtx (word_mode);
5911 /* If this failed, we have to do this with set/compare/jump/set code.
5912 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5913 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5914 if (code == NE
5915 && GET_MODE_CLASS (mode) == MODE_INT
5916 && REG_P (target)
5917 && op0 == target
5918 && op1 == const0_rtx)
5920 label = gen_label_rtx ();
5921 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5922 NULL_RTX, NULL, label, -1);
5923 emit_move_insn (target, trueval);
5924 emit_label (label);
5925 return target;
5928 if (!REG_P (target)
5929 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5930 target = gen_reg_rtx (GET_MODE (target));
5932 /* Jump in the right direction if the target cannot implement CODE
5933 but can jump on its reverse condition. */
5934 falseval = const0_rtx;
5935 if (! can_compare_p (code, mode, ccp_jump)
5936 && (! FLOAT_MODE_P (mode)
5937 || code == ORDERED || code == UNORDERED
5938 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5939 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5941 enum rtx_code rcode;
5942 if (FLOAT_MODE_P (mode))
5943 rcode = reverse_condition_maybe_unordered (code);
5944 else
5945 rcode = reverse_condition (code);
5947 /* Canonicalize to UNORDERED for the libcall. */
5948 if (can_compare_p (rcode, mode, ccp_jump)
5949 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5951 falseval = trueval;
5952 trueval = const0_rtx;
5953 code = rcode;
5957 emit_move_insn (target, trueval);
5958 label = gen_label_rtx ();
5959 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5960 label, -1);
5962 emit_move_insn (target, falseval);
5963 emit_label (label);
5965 return target;
5968 /* Perform possibly multi-word comparison and conditional jump to LABEL
5969 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5970 now a thin wrapper around do_compare_rtx_and_jump. */
5972 static void
5973 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5974 rtx_code_label *label)
5976 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5977 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5978 NULL, label, -1);