[Patch AArch64 1/3] Enable CRC by default for armv8.1-a
[official-gcc.git] / gcc / expmed.c
blob31d905bd3d5739f360ba7621c74c07037b846832
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2016 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "tm_p.h"
31 #include "expmed.h"
32 #include "optabs.h"
33 #include "emit-rtl.h"
34 #include "diagnostic-core.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "dojump.h"
38 #include "explow.h"
39 #include "expr.h"
40 #include "langhooks.h"
42 struct target_expmed default_target_expmed;
43 #if SWITCHABLE_TARGET
44 struct target_expmed *this_target_expmed = &default_target_expmed;
45 #endif
47 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 rtx, bool);
52 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
53 unsigned HOST_WIDE_INT,
54 rtx, bool);
55 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT,
59 rtx, bool);
60 static rtx extract_fixed_bit_field (machine_mode, rtx,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
64 unsigned HOST_WIDE_INT,
65 unsigned HOST_WIDE_INT, rtx, int, bool);
66 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
68 unsigned HOST_WIDE_INT, int, bool);
69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
70 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
71 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
74 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
75 The mask is truncated if necessary to the width of mode MODE. The
76 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
78 static inline rtx
79 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
81 return immed_wide_int_const
82 (wi::shifted_mask (bitpos, bitsize, complement,
83 GET_MODE_PRECISION (mode)), mode);
86 /* Test whether a value is zero of a power of two. */
87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
88 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
90 struct init_expmed_rtl
92 rtx reg;
93 rtx plus;
94 rtx neg;
95 rtx mult;
96 rtx sdiv;
97 rtx udiv;
98 rtx sdiv_32;
99 rtx smod_32;
100 rtx wide_mult;
101 rtx wide_lshr;
102 rtx wide_trunc;
103 rtx shift;
104 rtx shift_mult;
105 rtx shift_add;
106 rtx shift_sub0;
107 rtx shift_sub1;
108 rtx zext;
109 rtx trunc;
111 rtx pow2[MAX_BITS_PER_WORD];
112 rtx cint[MAX_BITS_PER_WORD];
115 static void
116 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
117 machine_mode from_mode, bool speed)
119 int to_size, from_size;
120 rtx which;
122 to_size = GET_MODE_PRECISION (to_mode);
123 from_size = GET_MODE_PRECISION (from_mode);
125 /* Most partial integers have a precision less than the "full"
126 integer it requires for storage. In case one doesn't, for
127 comparison purposes here, reduce the bit size by one in that
128 case. */
129 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
130 && exact_log2 (to_size) != -1)
131 to_size --;
132 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
133 && exact_log2 (from_size) != -1)
134 from_size --;
136 /* Assume cost of zero-extend and sign-extend is the same. */
137 which = (to_size < from_size ? all->trunc : all->zext);
139 PUT_MODE (all->reg, from_mode);
140 set_convert_cost (to_mode, from_mode, speed,
141 set_src_cost (which, to_mode, speed));
144 static void
145 init_expmed_one_mode (struct init_expmed_rtl *all,
146 machine_mode mode, int speed)
148 int m, n, mode_bitsize;
149 machine_mode mode_from;
151 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
153 PUT_MODE (all->reg, mode);
154 PUT_MODE (all->plus, mode);
155 PUT_MODE (all->neg, mode);
156 PUT_MODE (all->mult, mode);
157 PUT_MODE (all->sdiv, mode);
158 PUT_MODE (all->udiv, mode);
159 PUT_MODE (all->sdiv_32, mode);
160 PUT_MODE (all->smod_32, mode);
161 PUT_MODE (all->wide_trunc, mode);
162 PUT_MODE (all->shift, mode);
163 PUT_MODE (all->shift_mult, mode);
164 PUT_MODE (all->shift_add, mode);
165 PUT_MODE (all->shift_sub0, mode);
166 PUT_MODE (all->shift_sub1, mode);
167 PUT_MODE (all->zext, mode);
168 PUT_MODE (all->trunc, mode);
170 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
171 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
172 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
173 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
174 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
176 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
177 <= 2 * add_cost (speed, mode)));
178 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
179 <= 4 * add_cost (speed, mode)));
181 set_shift_cost (speed, mode, 0, 0);
183 int cost = add_cost (speed, mode);
184 set_shiftadd_cost (speed, mode, 0, cost);
185 set_shiftsub0_cost (speed, mode, 0, cost);
186 set_shiftsub1_cost (speed, mode, 0, cost);
189 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
190 for (m = 1; m < n; m++)
192 XEXP (all->shift, 1) = all->cint[m];
193 XEXP (all->shift_mult, 1) = all->pow2[m];
195 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
196 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
197 speed));
198 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
199 speed));
200 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
201 speed));
204 if (SCALAR_INT_MODE_P (mode))
206 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
207 mode_from = (machine_mode)(mode_from + 1))
208 init_expmed_one_conv (all, mode, mode_from, speed);
210 if (GET_MODE_CLASS (mode) == MODE_INT)
212 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
213 if (wider_mode != VOIDmode)
215 PUT_MODE (all->zext, wider_mode);
216 PUT_MODE (all->wide_mult, wider_mode);
217 PUT_MODE (all->wide_lshr, wider_mode);
218 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
220 set_mul_widen_cost (speed, wider_mode,
221 set_src_cost (all->wide_mult, wider_mode, speed));
222 set_mul_highpart_cost (speed, mode,
223 set_src_cost (all->wide_trunc, mode, speed));
228 void
229 init_expmed (void)
231 struct init_expmed_rtl all;
232 machine_mode mode = QImode;
233 int m, speed;
235 memset (&all, 0, sizeof all);
236 for (m = 1; m < MAX_BITS_PER_WORD; m++)
238 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
239 all.cint[m] = GEN_INT (m);
242 /* Avoid using hard regs in ways which may be unsupported. */
243 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
244 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
245 all.neg = gen_rtx_NEG (mode, all.reg);
246 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
247 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
248 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
249 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
250 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
251 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
252 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
253 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
254 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
255 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
256 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
257 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
258 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
259 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
260 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
262 for (speed = 0; speed < 2; speed++)
264 crtl->maybe_hot_insn_p = speed;
265 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
267 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
268 mode = (machine_mode)(mode + 1))
269 init_expmed_one_mode (&all, mode, speed);
271 if (MIN_MODE_PARTIAL_INT != VOIDmode)
272 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
273 mode = (machine_mode)(mode + 1))
274 init_expmed_one_mode (&all, mode, speed);
276 if (MIN_MODE_VECTOR_INT != VOIDmode)
277 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
278 mode = (machine_mode)(mode + 1))
279 init_expmed_one_mode (&all, mode, speed);
282 if (alg_hash_used_p ())
284 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
285 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
287 else
288 set_alg_hash_used_p (true);
289 default_rtl_profile ();
291 ggc_free (all.trunc);
292 ggc_free (all.shift_sub1);
293 ggc_free (all.shift_sub0);
294 ggc_free (all.shift_add);
295 ggc_free (all.shift_mult);
296 ggc_free (all.shift);
297 ggc_free (all.wide_trunc);
298 ggc_free (all.wide_lshr);
299 ggc_free (all.wide_mult);
300 ggc_free (all.zext);
301 ggc_free (all.smod_32);
302 ggc_free (all.sdiv_32);
303 ggc_free (all.udiv);
304 ggc_free (all.sdiv);
305 ggc_free (all.mult);
306 ggc_free (all.neg);
307 ggc_free (all.plus);
308 ggc_free (all.reg);
311 /* Return an rtx representing minus the value of X.
312 MODE is the intended mode of the result,
313 useful if X is a CONST_INT. */
316 negate_rtx (machine_mode mode, rtx x)
318 rtx result = simplify_unary_operation (NEG, mode, x, mode);
320 if (result == 0)
321 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
323 return result;
326 /* Whether reverse storage order is supported on the target. */
327 static int reverse_storage_order_supported = -1;
329 /* Check whether reverse storage order is supported on the target. */
331 static void
332 check_reverse_storage_order_support (void)
334 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
336 reverse_storage_order_supported = 0;
337 sorry ("reverse scalar storage order");
339 else
340 reverse_storage_order_supported = 1;
343 /* Whether reverse FP storage order is supported on the target. */
344 static int reverse_float_storage_order_supported = -1;
346 /* Check whether reverse FP storage order is supported on the target. */
348 static void
349 check_reverse_float_storage_order_support (void)
351 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
353 reverse_float_storage_order_supported = 0;
354 sorry ("reverse floating-point scalar storage order");
356 else
357 reverse_float_storage_order_supported = 1;
360 /* Return an rtx representing value of X with reverse storage order.
361 MODE is the intended mode of the result,
362 useful if X is a CONST_INT. */
365 flip_storage_order (enum machine_mode mode, rtx x)
367 enum machine_mode int_mode;
368 rtx result;
370 if (mode == QImode)
371 return x;
373 if (COMPLEX_MODE_P (mode))
375 rtx real = read_complex_part (x, false);
376 rtx imag = read_complex_part (x, true);
378 real = flip_storage_order (GET_MODE_INNER (mode), real);
379 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
381 return gen_rtx_CONCAT (mode, real, imag);
384 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
385 check_reverse_storage_order_support ();
387 if (SCALAR_INT_MODE_P (mode))
388 int_mode = mode;
389 else
391 if (FLOAT_MODE_P (mode)
392 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
393 check_reverse_float_storage_order_support ();
395 int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
396 if (int_mode == BLKmode)
398 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
399 return x;
401 x = gen_lowpart (int_mode, x);
404 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
405 if (result == 0)
406 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
408 if (int_mode != mode)
409 result = gen_lowpart (mode, result);
411 return result;
414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
415 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
416 If MODE is BLKmode, return a reference to every byte in the bitfield.
417 Set *NEW_BITNUM to the bit position of the field within the new memory. */
419 static rtx
420 narrow_bit_field_mem (rtx mem, machine_mode mode,
421 unsigned HOST_WIDE_INT bitsize,
422 unsigned HOST_WIDE_INT bitnum,
423 unsigned HOST_WIDE_INT *new_bitnum)
425 if (mode == BLKmode)
427 *new_bitnum = bitnum % BITS_PER_UNIT;
428 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
429 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
430 / BITS_PER_UNIT);
431 return adjust_bitfield_address_size (mem, mode, offset, size);
433 else
435 unsigned int unit = GET_MODE_BITSIZE (mode);
436 *new_bitnum = bitnum % unit;
437 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
438 return adjust_bitfield_address (mem, mode, offset);
442 /* The caller wants to perform insertion or extraction PATTERN on a
443 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
444 BITREGION_START and BITREGION_END are as for store_bit_field
445 and FIELDMODE is the natural mode of the field.
447 Search for a mode that is compatible with the memory access
448 restrictions and (where applicable) with a register insertion or
449 extraction. Return the new memory on success, storing the adjusted
450 bit position in *NEW_BITNUM. Return null otherwise. */
452 static rtx
453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
454 rtx op0, HOST_WIDE_INT bitsize,
455 HOST_WIDE_INT bitnum,
456 unsigned HOST_WIDE_INT bitregion_start,
457 unsigned HOST_WIDE_INT bitregion_end,
458 machine_mode fieldmode,
459 unsigned HOST_WIDE_INT *new_bitnum)
461 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
462 bitregion_end, MEM_ALIGN (op0),
463 MEM_VOLATILE_P (op0));
464 machine_mode best_mode;
465 if (iter.next_mode (&best_mode))
467 /* We can use a memory in BEST_MODE. See whether this is true for
468 any wider modes. All other things being equal, we prefer to
469 use the widest mode possible because it tends to expose more
470 CSE opportunities. */
471 if (!iter.prefer_smaller_modes ())
473 /* Limit the search to the mode required by the corresponding
474 register insertion or extraction instruction, if any. */
475 machine_mode limit_mode = word_mode;
476 extraction_insn insn;
477 if (get_best_reg_extraction_insn (&insn, pattern,
478 GET_MODE_BITSIZE (best_mode),
479 fieldmode))
480 limit_mode = insn.field_mode;
482 machine_mode wider_mode;
483 while (iter.next_mode (&wider_mode)
484 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
485 best_mode = wider_mode;
487 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
488 new_bitnum);
490 return NULL_RTX;
493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
494 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
495 offset is then BITNUM / BITS_PER_UNIT. */
497 static bool
498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
499 unsigned HOST_WIDE_INT bitsize,
500 machine_mode struct_mode)
502 if (BYTES_BIG_ENDIAN)
503 return (bitnum % BITS_PER_UNIT == 0
504 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
505 || (bitnum + bitsize) % BITS_PER_WORD == 0));
506 else
507 return bitnum % BITS_PER_WORD == 0;
510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
511 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
512 Return false if the access would touch memory outside the range
513 BITREGION_START to BITREGION_END for conformance to the C++ memory
514 model. */
516 static bool
517 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
518 unsigned HOST_WIDE_INT bitnum,
519 machine_mode fieldmode,
520 unsigned HOST_WIDE_INT bitregion_start,
521 unsigned HOST_WIDE_INT bitregion_end)
523 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
525 /* -fstrict-volatile-bitfields must be enabled and we must have a
526 volatile MEM. */
527 if (!MEM_P (op0)
528 || !MEM_VOLATILE_P (op0)
529 || flag_strict_volatile_bitfields <= 0)
530 return false;
532 /* Non-integral modes likely only happen with packed structures.
533 Punt. */
534 if (!SCALAR_INT_MODE_P (fieldmode))
535 return false;
537 /* The bit size must not be larger than the field mode, and
538 the field mode must not be larger than a word. */
539 if (bitsize > modesize || modesize > BITS_PER_WORD)
540 return false;
542 /* Check for cases of unaligned fields that must be split. */
543 if (bitnum % modesize + bitsize > modesize)
544 return false;
546 /* The memory must be sufficiently aligned for a MODESIZE access.
547 This condition guarantees, that the memory access will not
548 touch anything after the end of the structure. */
549 if (MEM_ALIGN (op0) < modesize)
550 return false;
552 /* Check for cases where the C++ memory model applies. */
553 if (bitregion_end != 0
554 && (bitnum - bitnum % modesize < bitregion_start
555 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
556 return false;
558 return true;
561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
562 bit number BITNUM can be treated as a simple value of mode MODE. */
564 static bool
565 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
566 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
568 return (MEM_P (op0)
569 && bitnum % BITS_PER_UNIT == 0
570 && bitsize == GET_MODE_BITSIZE (mode)
571 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
572 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
573 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
576 /* Try to use instruction INSV to store VALUE into a field of OP0.
577 BITSIZE and BITNUM are as for store_bit_field. */
579 static bool
580 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
581 unsigned HOST_WIDE_INT bitsize,
582 unsigned HOST_WIDE_INT bitnum,
583 rtx value)
585 struct expand_operand ops[4];
586 rtx value1;
587 rtx xop0 = op0;
588 rtx_insn *last = get_last_insn ();
589 bool copy_back = false;
591 machine_mode op_mode = insv->field_mode;
592 unsigned int unit = GET_MODE_BITSIZE (op_mode);
593 if (bitsize == 0 || bitsize > unit)
594 return false;
596 if (MEM_P (xop0))
597 /* Get a reference to the first byte of the field. */
598 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
599 &bitnum);
600 else
602 /* Convert from counting within OP0 to counting in OP_MODE. */
603 if (BYTES_BIG_ENDIAN)
604 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
606 /* If xop0 is a register, we need it in OP_MODE
607 to make it acceptable to the format of insv. */
608 if (GET_CODE (xop0) == SUBREG)
609 /* We can't just change the mode, because this might clobber op0,
610 and we will need the original value of op0 if insv fails. */
611 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
612 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
613 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
616 /* If the destination is a paradoxical subreg such that we need a
617 truncate to the inner mode, perform the insertion on a temporary and
618 truncate the result to the original destination. Note that we can't
619 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
620 X) 0)) is (reg:N X). */
621 if (GET_CODE (xop0) == SUBREG
622 && REG_P (SUBREG_REG (xop0))
623 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
624 op_mode))
626 rtx tem = gen_reg_rtx (op_mode);
627 emit_move_insn (tem, xop0);
628 xop0 = tem;
629 copy_back = true;
632 /* There are similar overflow check at the start of store_bit_field_1,
633 but that only check the situation where the field lies completely
634 outside the register, while there do have situation where the field
635 lies partialy in the register, we need to adjust bitsize for this
636 partial overflow situation. Without this fix, pr48335-2.c on big-endian
637 will broken on those arch support bit insert instruction, like arm, aarch64
638 etc. */
639 if (bitsize + bitnum > unit && bitnum < unit)
641 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
642 "destination object, data truncated into %wu-bit",
643 bitsize, unit - bitnum);
644 bitsize = unit - bitnum;
647 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
648 "backwards" from the size of the unit we are inserting into.
649 Otherwise, we count bits from the most significant on a
650 BYTES/BITS_BIG_ENDIAN machine. */
652 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
653 bitnum = unit - bitsize - bitnum;
655 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
656 value1 = value;
657 if (GET_MODE (value) != op_mode)
659 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
661 rtx tmp;
662 /* Optimization: Don't bother really extending VALUE
663 if it has all the bits we will actually use. However,
664 if we must narrow it, be sure we do it correctly. */
666 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
668 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
669 if (! tmp)
670 tmp = simplify_gen_subreg (op_mode,
671 force_reg (GET_MODE (value),
672 value1),
673 GET_MODE (value), 0);
675 else
677 tmp = gen_lowpart_if_possible (op_mode, value1);
678 if (! tmp)
679 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
680 value1));
682 value1 = tmp;
684 else if (CONST_INT_P (value))
685 value1 = gen_int_mode (INTVAL (value), op_mode);
686 else
687 /* Parse phase is supposed to make VALUE's data type
688 match that of the component reference, which is a type
689 at least as wide as the field; so VALUE should have
690 a mode that corresponds to that type. */
691 gcc_assert (CONSTANT_P (value));
694 create_fixed_operand (&ops[0], xop0);
695 create_integer_operand (&ops[1], bitsize);
696 create_integer_operand (&ops[2], bitnum);
697 create_input_operand (&ops[3], value1, op_mode);
698 if (maybe_expand_insn (insv->icode, 4, ops))
700 if (copy_back)
701 convert_move (op0, xop0, true);
702 return true;
704 delete_insns_since (last);
705 return false;
708 /* A subroutine of store_bit_field, with the same arguments. Return true
709 if the operation could be implemented.
711 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
712 no other way of implementing the operation. If FALLBACK_P is false,
713 return false instead. */
715 static bool
716 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
717 unsigned HOST_WIDE_INT bitnum,
718 unsigned HOST_WIDE_INT bitregion_start,
719 unsigned HOST_WIDE_INT bitregion_end,
720 machine_mode fieldmode,
721 rtx value, bool reverse, bool fallback_p)
723 rtx op0 = str_rtx;
724 rtx orig_value;
726 while (GET_CODE (op0) == SUBREG)
728 /* The following line once was done only if WORDS_BIG_ENDIAN,
729 but I think that is a mistake. WORDS_BIG_ENDIAN is
730 meaningful at a much higher level; when structures are copied
731 between memory and regs, the higher-numbered regs
732 always get higher addresses. */
733 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
734 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
735 int byte_offset = 0;
737 /* Paradoxical subregs need special handling on big-endian machines. */
738 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
740 int difference = inner_mode_size - outer_mode_size;
742 if (WORDS_BIG_ENDIAN)
743 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
744 if (BYTES_BIG_ENDIAN)
745 byte_offset += difference % UNITS_PER_WORD;
747 else
748 byte_offset = SUBREG_BYTE (op0);
750 bitnum += byte_offset * BITS_PER_UNIT;
751 op0 = SUBREG_REG (op0);
754 /* No action is needed if the target is a register and if the field
755 lies completely outside that register. This can occur if the source
756 code contains an out-of-bounds access to a small array. */
757 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
758 return true;
760 /* Use vec_set patterns for inserting parts of vectors whenever
761 available. */
762 if (VECTOR_MODE_P (GET_MODE (op0))
763 && !MEM_P (op0)
764 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
765 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
766 && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
767 && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
769 struct expand_operand ops[3];
770 machine_mode outermode = GET_MODE (op0);
771 machine_mode innermode = GET_MODE_INNER (outermode);
772 enum insn_code icode = optab_handler (vec_set_optab, outermode);
773 int pos = bitnum / GET_MODE_BITSIZE (innermode);
775 create_fixed_operand (&ops[0], op0);
776 create_input_operand (&ops[1], value, innermode);
777 create_integer_operand (&ops[2], pos);
778 if (maybe_expand_insn (icode, 3, ops))
779 return true;
782 /* If the target is a register, overwriting the entire object, or storing
783 a full-word or multi-word field can be done with just a SUBREG. */
784 if (!MEM_P (op0)
785 && bitsize == GET_MODE_BITSIZE (fieldmode)
786 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
787 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
789 /* Use the subreg machinery either to narrow OP0 to the required
790 words or to cope with mode punning between equal-sized modes.
791 In the latter case, use subreg on the rhs side, not lhs. */
792 rtx sub;
794 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
796 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
797 if (sub)
799 if (reverse)
800 sub = flip_storage_order (GET_MODE (op0), sub);
801 emit_move_insn (op0, sub);
802 return true;
805 else
807 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
808 bitnum / BITS_PER_UNIT);
809 if (sub)
811 if (reverse)
812 value = flip_storage_order (fieldmode, value);
813 emit_move_insn (sub, value);
814 return true;
819 /* If the target is memory, storing any naturally aligned field can be
820 done with a simple store. For targets that support fast unaligned
821 memory, any naturally sized, unit aligned field can be done directly. */
822 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
824 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
825 if (reverse)
826 value = flip_storage_order (fieldmode, value);
827 emit_move_insn (op0, value);
828 return true;
831 /* Make sure we are playing with integral modes. Pun with subregs
832 if we aren't. This must come after the entire register case above,
833 since that case is valid for any mode. The following cases are only
834 valid for integral modes. */
836 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
837 if (imode != GET_MODE (op0))
839 if (MEM_P (op0))
840 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
841 else
843 gcc_assert (imode != BLKmode);
844 op0 = gen_lowpart (imode, op0);
849 /* Storing an lsb-aligned field in a register
850 can be done with a movstrict instruction. */
852 if (!MEM_P (op0)
853 && !reverse
854 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
855 && bitsize == GET_MODE_BITSIZE (fieldmode)
856 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
858 struct expand_operand ops[2];
859 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
860 rtx arg0 = op0;
861 unsigned HOST_WIDE_INT subreg_off;
863 if (GET_CODE (arg0) == SUBREG)
865 /* Else we've got some float mode source being extracted into
866 a different float mode destination -- this combination of
867 subregs results in Severe Tire Damage. */
868 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
869 || GET_MODE_CLASS (fieldmode) == MODE_INT
870 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
871 arg0 = SUBREG_REG (arg0);
874 subreg_off = bitnum / BITS_PER_UNIT;
875 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
877 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
879 create_fixed_operand (&ops[0], arg0);
880 /* Shrink the source operand to FIELDMODE. */
881 create_convert_operand_to (&ops[1], value, fieldmode, false);
882 if (maybe_expand_insn (icode, 2, ops))
883 return true;
887 /* Handle fields bigger than a word. */
889 if (bitsize > BITS_PER_WORD)
891 /* Here we transfer the words of the field
892 in the order least significant first.
893 This is because the most significant word is the one which may
894 be less than full.
895 However, only do that if the value is not BLKmode. */
897 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
898 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
899 unsigned int i;
900 rtx_insn *last;
902 /* This is the mode we must force value to, so that there will be enough
903 subwords to extract. Note that fieldmode will often (always?) be
904 VOIDmode, because that is what store_field uses to indicate that this
905 is a bit field, but passing VOIDmode to operand_subword_force
906 is not allowed. */
907 fieldmode = GET_MODE (value);
908 if (fieldmode == VOIDmode)
909 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
911 last = get_last_insn ();
912 for (i = 0; i < nwords; i++)
914 /* If I is 0, use the low-order word in both field and target;
915 if I is 1, use the next to lowest word; and so on. */
916 unsigned int wordnum = (backwards
917 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
918 - i - 1
919 : i);
920 unsigned int bit_offset = (backwards ^ reverse
921 ? MAX ((int) bitsize - ((int) i + 1)
922 * BITS_PER_WORD,
924 : (int) i * BITS_PER_WORD);
925 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
926 unsigned HOST_WIDE_INT new_bitsize =
927 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
929 /* If the remaining chunk doesn't have full wordsize we have
930 to make sure that for big-endian machines the higher order
931 bits are used. */
932 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
933 value_word = simplify_expand_binop (word_mode, lshr_optab,
934 value_word,
935 GEN_INT (BITS_PER_WORD
936 - new_bitsize),
937 NULL_RTX, true,
938 OPTAB_LIB_WIDEN);
940 if (!store_bit_field_1 (op0, new_bitsize,
941 bitnum + bit_offset,
942 bitregion_start, bitregion_end,
943 word_mode,
944 value_word, reverse, fallback_p))
946 delete_insns_since (last);
947 return false;
950 return true;
953 /* If VALUE has a floating-point or complex mode, access it as an
954 integer of the corresponding size. This can occur on a machine
955 with 64 bit registers that uses SFmode for float. It can also
956 occur for unaligned float or complex fields. */
957 orig_value = value;
958 if (GET_MODE (value) != VOIDmode
959 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
960 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
962 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
963 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
966 /* If OP0 is a multi-word register, narrow it to the affected word.
967 If the region spans two words, defer to store_split_bit_field. */
968 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
970 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
971 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
972 gcc_assert (op0);
973 bitnum %= BITS_PER_WORD;
974 if (bitnum + bitsize > BITS_PER_WORD)
976 if (!fallback_p)
977 return false;
979 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
980 bitregion_end, value, reverse);
981 return true;
985 /* From here on we can assume that the field to be stored in fits
986 within a word. If the destination is a register, it too fits
987 in a word. */
989 extraction_insn insv;
990 if (!MEM_P (op0)
991 && !reverse
992 && get_best_reg_extraction_insn (&insv, EP_insv,
993 GET_MODE_BITSIZE (GET_MODE (op0)),
994 fieldmode)
995 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
996 return true;
998 /* If OP0 is a memory, try copying it to a register and seeing if a
999 cheap register alternative is available. */
1000 if (MEM_P (op0) && !reverse)
1002 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1003 fieldmode)
1004 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1005 return true;
1007 rtx_insn *last = get_last_insn ();
1009 /* Try loading part of OP0 into a register, inserting the bitfield
1010 into that, and then copying the result back to OP0. */
1011 unsigned HOST_WIDE_INT bitpos;
1012 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1013 bitregion_start, bitregion_end,
1014 fieldmode, &bitpos);
1015 if (xop0)
1017 rtx tempreg = copy_to_reg (xop0);
1018 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1019 bitregion_start, bitregion_end,
1020 fieldmode, orig_value, reverse, false))
1022 emit_move_insn (xop0, tempreg);
1023 return true;
1025 delete_insns_since (last);
1029 if (!fallback_p)
1030 return false;
1032 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1033 bitregion_end, value, reverse);
1034 return true;
1037 /* Generate code to store value from rtx VALUE
1038 into a bit-field within structure STR_RTX
1039 containing BITSIZE bits starting at bit BITNUM.
1041 BITREGION_START is bitpos of the first bitfield in this region.
1042 BITREGION_END is the bitpos of the ending bitfield in this region.
1043 These two fields are 0, if the C++ memory model does not apply,
1044 or we are not interested in keeping track of bitfield regions.
1046 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1048 If REVERSE is true, the store is to be done in reverse order. */
1050 void
1051 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1052 unsigned HOST_WIDE_INT bitnum,
1053 unsigned HOST_WIDE_INT bitregion_start,
1054 unsigned HOST_WIDE_INT bitregion_end,
1055 machine_mode fieldmode,
1056 rtx value, bool reverse)
1058 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1059 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1060 bitregion_start, bitregion_end))
1062 /* Storing of a full word can be done with a simple store.
1063 We know here that the field can be accessed with one single
1064 instruction. For targets that support unaligned memory,
1065 an unaligned access may be necessary. */
1066 if (bitsize == GET_MODE_BITSIZE (fieldmode))
1068 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1069 bitnum / BITS_PER_UNIT);
1070 if (reverse)
1071 value = flip_storage_order (fieldmode, value);
1072 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1073 emit_move_insn (str_rtx, value);
1075 else
1077 rtx temp;
1079 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1080 &bitnum);
1081 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1082 temp = copy_to_reg (str_rtx);
1083 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1084 fieldmode, value, reverse, true))
1085 gcc_unreachable ();
1087 emit_move_insn (str_rtx, temp);
1090 return;
1093 /* Under the C++0x memory model, we must not touch bits outside the
1094 bit region. Adjust the address to start at the beginning of the
1095 bit region. */
1096 if (MEM_P (str_rtx) && bitregion_start > 0)
1098 machine_mode bestmode;
1099 HOST_WIDE_INT offset, size;
1101 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1103 offset = bitregion_start / BITS_PER_UNIT;
1104 bitnum -= bitregion_start;
1105 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1106 bitregion_end -= bitregion_start;
1107 bitregion_start = 0;
1108 bestmode = get_best_mode (bitsize, bitnum,
1109 bitregion_start, bitregion_end,
1110 MEM_ALIGN (str_rtx), VOIDmode,
1111 MEM_VOLATILE_P (str_rtx));
1112 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1115 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1116 bitregion_start, bitregion_end,
1117 fieldmode, value, reverse, true))
1118 gcc_unreachable ();
1121 /* Use shifts and boolean operations to store VALUE into a bit field of
1122 width BITSIZE in OP0, starting at bit BITNUM.
1124 If REVERSE is true, the store is to be done in reverse order. */
1126 static void
1127 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1128 unsigned HOST_WIDE_INT bitnum,
1129 unsigned HOST_WIDE_INT bitregion_start,
1130 unsigned HOST_WIDE_INT bitregion_end,
1131 rtx value, bool reverse)
1133 /* There is a case not handled here:
1134 a structure with a known alignment of just a halfword
1135 and a field split across two aligned halfwords within the structure.
1136 Or likewise a structure with a known alignment of just a byte
1137 and a field split across two bytes.
1138 Such cases are not supposed to be able to occur. */
1140 if (MEM_P (op0))
1142 machine_mode mode = GET_MODE (op0);
1143 if (GET_MODE_BITSIZE (mode) == 0
1144 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1145 mode = word_mode;
1146 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1147 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1149 if (mode == VOIDmode)
1151 /* The only way this should occur is if the field spans word
1152 boundaries. */
1153 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1154 bitregion_end, value, reverse);
1155 return;
1158 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1161 store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1164 /* Helper function for store_fixed_bit_field, stores
1165 the bit field always using the MODE of OP0. */
1167 static void
1168 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169 unsigned HOST_WIDE_INT bitnum,
1170 rtx value, bool reverse)
1172 machine_mode mode;
1173 rtx temp;
1174 int all_zero = 0;
1175 int all_one = 0;
1177 mode = GET_MODE (op0);
1178 gcc_assert (SCALAR_INT_MODE_P (mode));
1180 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1183 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1184 /* BITNUM is the distance between our msb
1185 and that of the containing datum.
1186 Convert it to the distance from the lsb. */
1187 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1189 /* Now BITNUM is always the distance between our lsb
1190 and that of OP0. */
1192 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1193 we must first convert its mode to MODE. */
1195 if (CONST_INT_P (value))
1197 unsigned HOST_WIDE_INT v = UINTVAL (value);
1199 if (bitsize < HOST_BITS_PER_WIDE_INT)
1200 v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1202 if (v == 0)
1203 all_zero = 1;
1204 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1205 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1206 || (bitsize == HOST_BITS_PER_WIDE_INT
1207 && v == (unsigned HOST_WIDE_INT) -1))
1208 all_one = 1;
1210 value = lshift_value (mode, v, bitnum);
1212 else
1214 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1215 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1217 if (GET_MODE (value) != mode)
1218 value = convert_to_mode (mode, value, 1);
1220 if (must_and)
1221 value = expand_binop (mode, and_optab, value,
1222 mask_rtx (mode, 0, bitsize, 0),
1223 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1224 if (bitnum > 0)
1225 value = expand_shift (LSHIFT_EXPR, mode, value,
1226 bitnum, NULL_RTX, 1);
1229 if (reverse)
1230 value = flip_storage_order (mode, value);
1232 /* Now clear the chosen bits in OP0,
1233 except that if VALUE is -1 we need not bother. */
1234 /* We keep the intermediates in registers to allow CSE to combine
1235 consecutive bitfield assignments. */
1237 temp = force_reg (mode, op0);
1239 if (! all_one)
1241 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1242 if (reverse)
1243 mask = flip_storage_order (mode, mask);
1244 temp = expand_binop (mode, and_optab, temp, mask,
1245 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1246 temp = force_reg (mode, temp);
1249 /* Now logical-or VALUE into OP0, unless it is zero. */
1251 if (! all_zero)
1253 temp = expand_binop (mode, ior_optab, temp, value,
1254 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1255 temp = force_reg (mode, temp);
1258 if (op0 != temp)
1260 op0 = copy_rtx (op0);
1261 emit_move_insn (op0, temp);
1265 /* Store a bit field that is split across multiple accessible memory objects.
1267 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1268 BITSIZE is the field width; BITPOS the position of its first bit
1269 (within the word).
1270 VALUE is the value to store.
1272 If REVERSE is true, the store is to be done in reverse order.
1274 This does not yet handle fields wider than BITS_PER_WORD. */
1276 static void
1277 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1278 unsigned HOST_WIDE_INT bitpos,
1279 unsigned HOST_WIDE_INT bitregion_start,
1280 unsigned HOST_WIDE_INT bitregion_end,
1281 rtx value, bool reverse)
1283 unsigned int unit, total_bits, bitsdone = 0;
1285 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1286 much at a time. */
1287 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1288 unit = BITS_PER_WORD;
1289 else
1290 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1292 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1294 again, and we will mutually recurse forever. */
1295 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1296 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1298 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1300 that VALUE might be a floating-point constant. */
1301 if (CONSTANT_P (value) && !CONST_INT_P (value))
1303 rtx word = gen_lowpart_common (word_mode, value);
1305 if (word && (value != word))
1306 value = word;
1307 else
1308 value = gen_lowpart_common (word_mode,
1309 force_reg (GET_MODE (value) != VOIDmode
1310 ? GET_MODE (value)
1311 : word_mode, value));
1314 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1316 while (bitsdone < bitsize)
1318 unsigned HOST_WIDE_INT thissize;
1319 unsigned HOST_WIDE_INT thispos;
1320 unsigned HOST_WIDE_INT offset;
1321 rtx part, word;
1323 offset = (bitpos + bitsdone) / unit;
1324 thispos = (bitpos + bitsdone) % unit;
1326 /* When region of bytes we can touch is restricted, decrease
1327 UNIT close to the end of the region as needed. If op0 is a REG
1328 or SUBREG of REG, don't do this, as there can't be data races
1329 on a register and we can expand shorter code in some cases. */
1330 if (bitregion_end
1331 && unit > BITS_PER_UNIT
1332 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1333 && !REG_P (op0)
1334 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1336 unit = unit / 2;
1337 continue;
1340 /* THISSIZE must not overrun a word boundary. Otherwise,
1341 store_fixed_bit_field will call us again, and we will mutually
1342 recurse forever. */
1343 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1344 thissize = MIN (thissize, unit - thispos);
1346 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1348 /* Fetch successively less significant portions. */
1349 if (CONST_INT_P (value))
1350 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1351 >> (bitsize - bitsdone - thissize))
1352 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1353 /* Likewise, but the source is little-endian. */
1354 else if (reverse)
1355 part = extract_fixed_bit_field (word_mode, value, thissize,
1356 bitsize - bitsdone - thissize,
1357 NULL_RTX, 1, false);
1358 else
1360 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1361 /* The args are chosen so that the last part includes the
1362 lsb. Give extract_bit_field the value it needs (with
1363 endianness compensation) to fetch the piece we want. */
1364 part = extract_fixed_bit_field (word_mode, value, thissize,
1365 total_bits - bitsize + bitsdone,
1366 NULL_RTX, 1, false);
1369 else
1371 /* Fetch successively more significant portions. */
1372 if (CONST_INT_P (value))
1373 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1374 >> bitsdone)
1375 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1376 /* Likewise, but the source is big-endian. */
1377 else if (reverse)
1378 part = extract_fixed_bit_field (word_mode, value, thissize,
1379 total_bits - bitsdone - thissize,
1380 NULL_RTX, 1, false);
1381 else
1382 part = extract_fixed_bit_field (word_mode, value, thissize,
1383 bitsdone, NULL_RTX, 1, false);
1386 /* If OP0 is a register, then handle OFFSET here.
1388 When handling multiword bitfields, extract_bit_field may pass
1389 down a word_mode SUBREG of a larger REG for a bitfield that actually
1390 crosses a word boundary. Thus, for a SUBREG, we must find
1391 the current word starting from the base register. */
1392 if (GET_CODE (op0) == SUBREG)
1394 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1395 + (offset * unit / BITS_PER_WORD);
1396 machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1397 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1398 word = word_offset ? const0_rtx : op0;
1399 else
1400 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1401 GET_MODE (SUBREG_REG (op0)));
1402 offset &= BITS_PER_WORD / unit - 1;
1404 else if (REG_P (op0))
1406 machine_mode op0_mode = GET_MODE (op0);
1407 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1408 word = offset ? const0_rtx : op0;
1409 else
1410 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1411 GET_MODE (op0));
1412 offset &= BITS_PER_WORD / unit - 1;
1414 else
1415 word = op0;
1417 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1418 it is just an out-of-bounds access. Ignore it. */
1419 if (word != const0_rtx)
1420 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1421 bitregion_start, bitregion_end, part,
1422 reverse);
1423 bitsdone += thissize;
1427 /* A subroutine of extract_bit_field_1 that converts return value X
1428 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1429 to extract_bit_field. */
1431 static rtx
1432 convert_extracted_bit_field (rtx x, machine_mode mode,
1433 machine_mode tmode, bool unsignedp)
1435 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1436 return x;
1438 /* If the x mode is not a scalar integral, first convert to the
1439 integer mode of that size and then access it as a floating-point
1440 value via a SUBREG. */
1441 if (!SCALAR_INT_MODE_P (tmode))
1443 machine_mode smode;
1445 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1446 x = convert_to_mode (smode, x, unsignedp);
1447 x = force_reg (smode, x);
1448 return gen_lowpart (tmode, x);
1451 return convert_to_mode (tmode, x, unsignedp);
1454 /* Try to use an ext(z)v pattern to extract a field from OP0.
1455 Return the extracted value on success, otherwise return null.
1456 EXT_MODE is the mode of the extraction and the other arguments
1457 are as for extract_bit_field. */
1459 static rtx
1460 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1461 unsigned HOST_WIDE_INT bitsize,
1462 unsigned HOST_WIDE_INT bitnum,
1463 int unsignedp, rtx target,
1464 machine_mode mode, machine_mode tmode)
1466 struct expand_operand ops[4];
1467 rtx spec_target = target;
1468 rtx spec_target_subreg = 0;
1469 machine_mode ext_mode = extv->field_mode;
1470 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1472 if (bitsize == 0 || unit < bitsize)
1473 return NULL_RTX;
1475 if (MEM_P (op0))
1476 /* Get a reference to the first byte of the field. */
1477 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1478 &bitnum);
1479 else
1481 /* Convert from counting within OP0 to counting in EXT_MODE. */
1482 if (BYTES_BIG_ENDIAN)
1483 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1485 /* If op0 is a register, we need it in EXT_MODE to make it
1486 acceptable to the format of ext(z)v. */
1487 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1488 return NULL_RTX;
1489 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1490 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1493 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1494 "backwards" from the size of the unit we are extracting from.
1495 Otherwise, we count bits from the most significant on a
1496 BYTES/BITS_BIG_ENDIAN machine. */
1498 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1499 bitnum = unit - bitsize - bitnum;
1501 if (target == 0)
1502 target = spec_target = gen_reg_rtx (tmode);
1504 if (GET_MODE (target) != ext_mode)
1506 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1507 between the mode of the extraction (word_mode) and the target
1508 mode. Instead, create a temporary and use convert_move to set
1509 the target. */
1510 if (REG_P (target)
1511 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1513 target = gen_lowpart (ext_mode, target);
1514 if (GET_MODE_PRECISION (ext_mode)
1515 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1516 spec_target_subreg = target;
1518 else
1519 target = gen_reg_rtx (ext_mode);
1522 create_output_operand (&ops[0], target, ext_mode);
1523 create_fixed_operand (&ops[1], op0);
1524 create_integer_operand (&ops[2], bitsize);
1525 create_integer_operand (&ops[3], bitnum);
1526 if (maybe_expand_insn (extv->icode, 4, ops))
1528 target = ops[0].value;
1529 if (target == spec_target)
1530 return target;
1531 if (target == spec_target_subreg)
1532 return spec_target;
1533 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1535 return NULL_RTX;
1538 /* A subroutine of extract_bit_field, with the same arguments.
1539 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1540 if we can find no other means of implementing the operation.
1541 if FALLBACK_P is false, return NULL instead. */
1543 static rtx
1544 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1545 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1546 machine_mode mode, machine_mode tmode,
1547 bool reverse, bool fallback_p)
1549 rtx op0 = str_rtx;
1550 machine_mode int_mode;
1551 machine_mode mode1;
1553 if (tmode == VOIDmode)
1554 tmode = mode;
1556 while (GET_CODE (op0) == SUBREG)
1558 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1559 op0 = SUBREG_REG (op0);
1562 /* If we have an out-of-bounds access to a register, just return an
1563 uninitialized register of the required mode. This can occur if the
1564 source code contains an out-of-bounds access to a small array. */
1565 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1566 return gen_reg_rtx (tmode);
1568 if (REG_P (op0)
1569 && mode == GET_MODE (op0)
1570 && bitnum == 0
1571 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1573 if (reverse)
1574 op0 = flip_storage_order (mode, op0);
1575 /* We're trying to extract a full register from itself. */
1576 return op0;
1579 /* See if we can get a better vector mode before extracting. */
1580 if (VECTOR_MODE_P (GET_MODE (op0))
1581 && !MEM_P (op0)
1582 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1584 machine_mode new_mode;
1586 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1587 new_mode = MIN_MODE_VECTOR_FLOAT;
1588 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1589 new_mode = MIN_MODE_VECTOR_FRACT;
1590 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1591 new_mode = MIN_MODE_VECTOR_UFRACT;
1592 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1593 new_mode = MIN_MODE_VECTOR_ACCUM;
1594 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1595 new_mode = MIN_MODE_VECTOR_UACCUM;
1596 else
1597 new_mode = MIN_MODE_VECTOR_INT;
1599 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1600 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1601 && targetm.vector_mode_supported_p (new_mode))
1602 break;
1603 if (new_mode != VOIDmode)
1604 op0 = gen_lowpart (new_mode, op0);
1607 /* Use vec_extract patterns for extracting parts of vectors whenever
1608 available. */
1609 if (VECTOR_MODE_P (GET_MODE (op0))
1610 && !MEM_P (op0)
1611 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1612 && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1613 == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1615 struct expand_operand ops[3];
1616 machine_mode outermode = GET_MODE (op0);
1617 machine_mode innermode = GET_MODE_INNER (outermode);
1618 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1619 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1621 create_output_operand (&ops[0], target, innermode);
1622 create_input_operand (&ops[1], op0, outermode);
1623 create_integer_operand (&ops[2], pos);
1624 if (maybe_expand_insn (icode, 3, ops))
1626 target = ops[0].value;
1627 if (GET_MODE (target) != mode)
1628 return gen_lowpart (tmode, target);
1629 return target;
1633 /* Make sure we are playing with integral modes. Pun with subregs
1634 if we aren't. */
1636 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1637 if (imode != GET_MODE (op0))
1639 if (MEM_P (op0))
1640 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1641 else if (imode != BLKmode)
1643 op0 = gen_lowpart (imode, op0);
1645 /* If we got a SUBREG, force it into a register since we
1646 aren't going to be able to do another SUBREG on it. */
1647 if (GET_CODE (op0) == SUBREG)
1648 op0 = force_reg (imode, op0);
1650 else if (REG_P (op0))
1652 rtx reg, subreg;
1653 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1654 MODE_INT);
1655 reg = gen_reg_rtx (imode);
1656 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1657 emit_move_insn (subreg, op0);
1658 op0 = reg;
1659 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1661 else
1663 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1664 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1665 emit_move_insn (mem, op0);
1666 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1671 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1672 If that's wrong, the solution is to test for it and set TARGET to 0
1673 if needed. */
1675 /* Get the mode of the field to use for atomic access or subreg
1676 conversion. */
1677 mode1 = mode;
1678 if (SCALAR_INT_MODE_P (tmode))
1680 machine_mode try_mode = mode_for_size (bitsize,
1681 GET_MODE_CLASS (tmode), 0);
1682 if (try_mode != BLKmode)
1683 mode1 = try_mode;
1685 gcc_assert (mode1 != BLKmode);
1687 /* Extraction of a full MODE1 value can be done with a subreg as long
1688 as the least significant bit of the value is the least significant
1689 bit of either OP0 or a word of OP0. */
1690 if (!MEM_P (op0)
1691 && !reverse
1692 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1693 && bitsize == GET_MODE_BITSIZE (mode1)
1694 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1696 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1697 bitnum / BITS_PER_UNIT);
1698 if (sub)
1699 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1702 /* Extraction of a full MODE1 value can be done with a load as long as
1703 the field is on a byte boundary and is sufficiently aligned. */
1704 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1706 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1707 if (reverse)
1708 op0 = flip_storage_order (mode1, op0);
1709 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1712 /* Handle fields bigger than a word. */
1714 if (bitsize > BITS_PER_WORD)
1716 /* Here we transfer the words of the field
1717 in the order least significant first.
1718 This is because the most significant word is the one which may
1719 be less than full. */
1721 const bool backwards = WORDS_BIG_ENDIAN;
1722 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1723 unsigned int i;
1724 rtx_insn *last;
1726 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1727 target = gen_reg_rtx (mode);
1729 /* In case we're about to clobber a base register or something
1730 (see gcc.c-torture/execute/20040625-1.c). */
1731 if (reg_mentioned_p (target, str_rtx))
1732 target = gen_reg_rtx (mode);
1734 /* Indicate for flow that the entire target reg is being set. */
1735 emit_clobber (target);
1737 last = get_last_insn ();
1738 for (i = 0; i < nwords; i++)
1740 /* If I is 0, use the low-order word in both field and target;
1741 if I is 1, use the next to lowest word; and so on. */
1742 /* Word number in TARGET to use. */
1743 unsigned int wordnum
1744 = (backwards
1745 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1746 : i);
1747 /* Offset from start of field in OP0. */
1748 unsigned int bit_offset = (backwards ^ reverse
1749 ? MAX ((int) bitsize - ((int) i + 1)
1750 * BITS_PER_WORD,
1752 : (int) i * BITS_PER_WORD);
1753 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1754 rtx result_part
1755 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1756 bitsize - i * BITS_PER_WORD),
1757 bitnum + bit_offset, 1, target_part,
1758 mode, word_mode, reverse, fallback_p);
1760 gcc_assert (target_part);
1761 if (!result_part)
1763 delete_insns_since (last);
1764 return NULL;
1767 if (result_part != target_part)
1768 emit_move_insn (target_part, result_part);
1771 if (unsignedp)
1773 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1774 need to be zero'd out. */
1775 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1777 unsigned int i, total_words;
1779 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1780 for (i = nwords; i < total_words; i++)
1781 emit_move_insn
1782 (operand_subword (target,
1783 backwards ? total_words - i - 1 : i,
1784 1, VOIDmode),
1785 const0_rtx);
1787 return target;
1790 /* Signed bit field: sign-extend with two arithmetic shifts. */
1791 target = expand_shift (LSHIFT_EXPR, mode, target,
1792 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1793 return expand_shift (RSHIFT_EXPR, mode, target,
1794 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1797 /* If OP0 is a multi-word register, narrow it to the affected word.
1798 If the region spans two words, defer to extract_split_bit_field. */
1799 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1801 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1802 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1803 bitnum %= BITS_PER_WORD;
1804 if (bitnum + bitsize > BITS_PER_WORD)
1806 if (!fallback_p)
1807 return NULL_RTX;
1808 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1809 reverse);
1810 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1814 /* From here on we know the desired field is smaller than a word.
1815 If OP0 is a register, it too fits within a word. */
1816 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1817 extraction_insn extv;
1818 if (!MEM_P (op0)
1819 && !reverse
1820 /* ??? We could limit the structure size to the part of OP0 that
1821 contains the field, with appropriate checks for endianness
1822 and TRULY_NOOP_TRUNCATION. */
1823 && get_best_reg_extraction_insn (&extv, pattern,
1824 GET_MODE_BITSIZE (GET_MODE (op0)),
1825 tmode))
1827 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1828 unsignedp, target, mode,
1829 tmode);
1830 if (result)
1831 return result;
1834 /* If OP0 is a memory, try copying it to a register and seeing if a
1835 cheap register alternative is available. */
1836 if (MEM_P (op0) & !reverse)
1838 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1839 tmode))
1841 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1842 bitnum, unsignedp,
1843 target, mode,
1844 tmode);
1845 if (result)
1846 return result;
1849 rtx_insn *last = get_last_insn ();
1851 /* Try loading part of OP0 into a register and extracting the
1852 bitfield from that. */
1853 unsigned HOST_WIDE_INT bitpos;
1854 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1855 0, 0, tmode, &bitpos);
1856 if (xop0)
1858 xop0 = copy_to_reg (xop0);
1859 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1860 unsignedp, target,
1861 mode, tmode, reverse, false);
1862 if (result)
1863 return result;
1864 delete_insns_since (last);
1868 if (!fallback_p)
1869 return NULL;
1871 /* Find a correspondingly-sized integer field, so we can apply
1872 shifts and masks to it. */
1873 int_mode = int_mode_for_mode (tmode);
1874 if (int_mode == BLKmode)
1875 int_mode = int_mode_for_mode (mode);
1876 /* Should probably push op0 out to memory and then do a load. */
1877 gcc_assert (int_mode != BLKmode);
1879 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1880 unsignedp, reverse);
1882 /* Complex values must be reversed piecewise, so we need to undo the global
1883 reversal, convert to the complex mode and reverse again. */
1884 if (reverse && COMPLEX_MODE_P (tmode))
1886 target = flip_storage_order (int_mode, target);
1887 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1888 target = flip_storage_order (tmode, target);
1890 else
1891 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1893 return target;
1896 /* Generate code to extract a byte-field from STR_RTX
1897 containing BITSIZE bits, starting at BITNUM,
1898 and put it in TARGET if possible (if TARGET is nonzero).
1899 Regardless of TARGET, we return the rtx for where the value is placed.
1901 STR_RTX is the structure containing the byte (a REG or MEM).
1902 UNSIGNEDP is nonzero if this is an unsigned bit field.
1903 MODE is the natural mode of the field value once extracted.
1904 TMODE is the mode the caller would like the value to have;
1905 but the value may be returned with type MODE instead.
1907 If REVERSE is true, the extraction is to be done in reverse order.
1909 If a TARGET is specified and we can store in it at no extra cost,
1910 we do so, and return TARGET.
1911 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1912 if they are equally easy. */
1915 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1916 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1917 machine_mode mode, machine_mode tmode, bool reverse)
1919 machine_mode mode1;
1921 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1922 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1923 mode1 = GET_MODE (str_rtx);
1924 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1925 mode1 = GET_MODE (target);
1926 else
1927 mode1 = tmode;
1929 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1931 /* Extraction of a full MODE1 value can be done with a simple load.
1932 We know here that the field can be accessed with one single
1933 instruction. For targets that support unaligned memory,
1934 an unaligned access may be necessary. */
1935 if (bitsize == GET_MODE_BITSIZE (mode1))
1937 rtx result = adjust_bitfield_address (str_rtx, mode1,
1938 bitnum / BITS_PER_UNIT);
1939 if (reverse)
1940 result = flip_storage_order (mode1, result);
1941 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1942 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1945 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1946 &bitnum);
1947 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1948 str_rtx = copy_to_reg (str_rtx);
1951 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1952 target, mode, tmode, reverse, true);
1955 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1956 from bit BITNUM of OP0.
1958 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1959 If REVERSE is true, the extraction is to be done in reverse order.
1961 If TARGET is nonzero, attempts to store the value there
1962 and return TARGET, but this is not guaranteed.
1963 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1965 static rtx
1966 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1967 unsigned HOST_WIDE_INT bitsize,
1968 unsigned HOST_WIDE_INT bitnum, rtx target,
1969 int unsignedp, bool reverse)
1971 if (MEM_P (op0))
1973 machine_mode mode
1974 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1975 MEM_VOLATILE_P (op0));
1977 if (mode == VOIDmode)
1978 /* The only way this should occur is if the field spans word
1979 boundaries. */
1980 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1981 reverse);
1983 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1986 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1987 target, unsignedp, reverse);
1990 /* Helper function for extract_fixed_bit_field, extracts
1991 the bit field always using the MODE of OP0. */
1993 static rtx
1994 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1995 unsigned HOST_WIDE_INT bitsize,
1996 unsigned HOST_WIDE_INT bitnum, rtx target,
1997 int unsignedp, bool reverse)
1999 machine_mode mode = GET_MODE (op0);
2000 gcc_assert (SCALAR_INT_MODE_P (mode));
2002 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2003 for invalid input, such as extract equivalent of f5 from
2004 gcc.dg/pr48335-2.c. */
2006 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2007 /* BITNUM is the distance between our msb and that of OP0.
2008 Convert it to the distance from the lsb. */
2009 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2011 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2012 We have reduced the big-endian case to the little-endian case. */
2013 if (reverse)
2014 op0 = flip_storage_order (mode, op0);
2016 if (unsignedp)
2018 if (bitnum)
2020 /* If the field does not already start at the lsb,
2021 shift it so it does. */
2022 /* Maybe propagate the target for the shift. */
2023 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2024 if (tmode != mode)
2025 subtarget = 0;
2026 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2028 /* Convert the value to the desired mode. */
2029 if (mode != tmode)
2030 op0 = convert_to_mode (tmode, op0, 1);
2032 /* Unless the msb of the field used to be the msb when we shifted,
2033 mask out the upper bits. */
2035 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2036 return expand_binop (GET_MODE (op0), and_optab, op0,
2037 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2038 target, 1, OPTAB_LIB_WIDEN);
2039 return op0;
2042 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2043 then arithmetic-shift its lsb to the lsb of the word. */
2044 op0 = force_reg (mode, op0);
2046 /* Find the narrowest integer mode that contains the field. */
2048 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2049 mode = GET_MODE_WIDER_MODE (mode))
2050 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2052 op0 = convert_to_mode (mode, op0, 0);
2053 break;
2056 if (mode != tmode)
2057 target = 0;
2059 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2061 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2062 /* Maybe propagate the target for the shift. */
2063 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2064 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2067 return expand_shift (RSHIFT_EXPR, mode, op0,
2068 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2071 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2072 VALUE << BITPOS. */
2074 static rtx
2075 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2076 int bitpos)
2078 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2081 /* Extract a bit field that is split across two words
2082 and return an RTX for the result.
2084 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2085 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2086 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2088 If REVERSE is true, the extraction is to be done in reverse order. */
2090 static rtx
2091 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2092 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2093 bool reverse)
2095 unsigned int unit;
2096 unsigned int bitsdone = 0;
2097 rtx result = NULL_RTX;
2098 int first = 1;
2100 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2101 much at a time. */
2102 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2103 unit = BITS_PER_WORD;
2104 else
2105 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2107 while (bitsdone < bitsize)
2109 unsigned HOST_WIDE_INT thissize;
2110 rtx part, word;
2111 unsigned HOST_WIDE_INT thispos;
2112 unsigned HOST_WIDE_INT offset;
2114 offset = (bitpos + bitsdone) / unit;
2115 thispos = (bitpos + bitsdone) % unit;
2117 /* THISSIZE must not overrun a word boundary. Otherwise,
2118 extract_fixed_bit_field will call us again, and we will mutually
2119 recurse forever. */
2120 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2121 thissize = MIN (thissize, unit - thispos);
2123 /* If OP0 is a register, then handle OFFSET here.
2125 When handling multiword bitfields, extract_bit_field may pass
2126 down a word_mode SUBREG of a larger REG for a bitfield that actually
2127 crosses a word boundary. Thus, for a SUBREG, we must find
2128 the current word starting from the base register. */
2129 if (GET_CODE (op0) == SUBREG)
2131 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2132 word = operand_subword_force (SUBREG_REG (op0), word_offset,
2133 GET_MODE (SUBREG_REG (op0)));
2134 offset = 0;
2136 else if (REG_P (op0))
2138 word = operand_subword_force (op0, offset, GET_MODE (op0));
2139 offset = 0;
2141 else
2142 word = op0;
2144 /* Extract the parts in bit-counting order,
2145 whose meaning is determined by BYTES_PER_UNIT.
2146 OFFSET is in UNITs, and UNIT is in bits. */
2147 part = extract_fixed_bit_field (word_mode, word, thissize,
2148 offset * unit + thispos, 0, 1, reverse);
2149 bitsdone += thissize;
2151 /* Shift this part into place for the result. */
2152 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2154 if (bitsize != bitsdone)
2155 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2156 bitsize - bitsdone, 0, 1);
2158 else
2160 if (bitsdone != thissize)
2161 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2162 bitsdone - thissize, 0, 1);
2165 if (first)
2166 result = part;
2167 else
2168 /* Combine the parts with bitwise or. This works
2169 because we extracted each part as an unsigned bit field. */
2170 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2171 OPTAB_LIB_WIDEN);
2173 first = 0;
2176 /* Unsigned bit field: we are done. */
2177 if (unsignedp)
2178 return result;
2179 /* Signed bit field: sign-extend with two arithmetic shifts. */
2180 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2181 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2182 return expand_shift (RSHIFT_EXPR, word_mode, result,
2183 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2186 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2187 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2188 MODE, fill the upper bits with zeros. Fail if the layout of either
2189 mode is unknown (as for CC modes) or if the extraction would involve
2190 unprofitable mode punning. Return the value on success, otherwise
2191 return null.
2193 This is different from gen_lowpart* in these respects:
2195 - the returned value must always be considered an rvalue
2197 - when MODE is wider than SRC_MODE, the extraction involves
2198 a zero extension
2200 - when MODE is smaller than SRC_MODE, the extraction involves
2201 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2203 In other words, this routine performs a computation, whereas the
2204 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2205 operations. */
2208 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2210 machine_mode int_mode, src_int_mode;
2212 if (mode == src_mode)
2213 return src;
2215 if (CONSTANT_P (src))
2217 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2218 fails, it will happily create (subreg (symbol_ref)) or similar
2219 invalid SUBREGs. */
2220 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2221 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2222 if (ret)
2223 return ret;
2225 if (GET_MODE (src) == VOIDmode
2226 || !validate_subreg (mode, src_mode, src, byte))
2227 return NULL_RTX;
2229 src = force_reg (GET_MODE (src), src);
2230 return gen_rtx_SUBREG (mode, src, byte);
2233 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2234 return NULL_RTX;
2236 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2237 && MODES_TIEABLE_P (mode, src_mode))
2239 rtx x = gen_lowpart_common (mode, src);
2240 if (x)
2241 return x;
2244 src_int_mode = int_mode_for_mode (src_mode);
2245 int_mode = int_mode_for_mode (mode);
2246 if (src_int_mode == BLKmode || int_mode == BLKmode)
2247 return NULL_RTX;
2249 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2250 return NULL_RTX;
2251 if (!MODES_TIEABLE_P (int_mode, mode))
2252 return NULL_RTX;
2254 src = gen_lowpart (src_int_mode, src);
2255 src = convert_modes (int_mode, src_int_mode, src, true);
2256 src = gen_lowpart (mode, src);
2257 return src;
2260 /* Add INC into TARGET. */
2262 void
2263 expand_inc (rtx target, rtx inc)
2265 rtx value = expand_binop (GET_MODE (target), add_optab,
2266 target, inc,
2267 target, 0, OPTAB_LIB_WIDEN);
2268 if (value != target)
2269 emit_move_insn (target, value);
2272 /* Subtract DEC from TARGET. */
2274 void
2275 expand_dec (rtx target, rtx dec)
2277 rtx value = expand_binop (GET_MODE (target), sub_optab,
2278 target, dec,
2279 target, 0, OPTAB_LIB_WIDEN);
2280 if (value != target)
2281 emit_move_insn (target, value);
2284 /* Output a shift instruction for expression code CODE,
2285 with SHIFTED being the rtx for the value to shift,
2286 and AMOUNT the rtx for the amount to shift by.
2287 Store the result in the rtx TARGET, if that is convenient.
2288 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2289 Return the rtx for where the value is. */
2291 static rtx
2292 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2293 rtx amount, rtx target, int unsignedp)
2295 rtx op1, temp = 0;
2296 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2297 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2298 optab lshift_optab = ashl_optab;
2299 optab rshift_arith_optab = ashr_optab;
2300 optab rshift_uns_optab = lshr_optab;
2301 optab lrotate_optab = rotl_optab;
2302 optab rrotate_optab = rotr_optab;
2303 machine_mode op1_mode;
2304 machine_mode scalar_mode = mode;
2305 int attempt;
2306 bool speed = optimize_insn_for_speed_p ();
2308 if (VECTOR_MODE_P (mode))
2309 scalar_mode = GET_MODE_INNER (mode);
2310 op1 = amount;
2311 op1_mode = GET_MODE (op1);
2313 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2314 shift amount is a vector, use the vector/vector shift patterns. */
2315 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2317 lshift_optab = vashl_optab;
2318 rshift_arith_optab = vashr_optab;
2319 rshift_uns_optab = vlshr_optab;
2320 lrotate_optab = vrotl_optab;
2321 rrotate_optab = vrotr_optab;
2324 /* Previously detected shift-counts computed by NEGATE_EXPR
2325 and shifted in the other direction; but that does not work
2326 on all machines. */
2328 if (SHIFT_COUNT_TRUNCATED)
2330 if (CONST_INT_P (op1)
2331 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2332 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2333 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2334 % GET_MODE_BITSIZE (scalar_mode));
2335 else if (GET_CODE (op1) == SUBREG
2336 && subreg_lowpart_p (op1)
2337 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2338 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2339 op1 = SUBREG_REG (op1);
2342 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2343 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2344 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2345 amount instead. */
2346 if (rotate
2347 && CONST_INT_P (op1)
2348 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2349 GET_MODE_BITSIZE (scalar_mode) - 1))
2351 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2352 left = !left;
2353 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2356 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2357 Note that this is not the case for bigger values. For instance a rotation
2358 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2359 0x04030201 (bswapsi). */
2360 if (rotate
2361 && CONST_INT_P (op1)
2362 && INTVAL (op1) == BITS_PER_UNIT
2363 && GET_MODE_SIZE (scalar_mode) == 2
2364 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2365 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2366 unsignedp);
2368 if (op1 == const0_rtx)
2369 return shifted;
2371 /* Check whether its cheaper to implement a left shift by a constant
2372 bit count by a sequence of additions. */
2373 if (code == LSHIFT_EXPR
2374 && CONST_INT_P (op1)
2375 && INTVAL (op1) > 0
2376 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2377 && INTVAL (op1) < MAX_BITS_PER_WORD
2378 && (shift_cost (speed, mode, INTVAL (op1))
2379 > INTVAL (op1) * add_cost (speed, mode))
2380 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2382 int i;
2383 for (i = 0; i < INTVAL (op1); i++)
2385 temp = force_reg (mode, shifted);
2386 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2387 unsignedp, OPTAB_LIB_WIDEN);
2389 return shifted;
2392 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2394 enum optab_methods methods;
2396 if (attempt == 0)
2397 methods = OPTAB_DIRECT;
2398 else if (attempt == 1)
2399 methods = OPTAB_WIDEN;
2400 else
2401 methods = OPTAB_LIB_WIDEN;
2403 if (rotate)
2405 /* Widening does not work for rotation. */
2406 if (methods == OPTAB_WIDEN)
2407 continue;
2408 else if (methods == OPTAB_LIB_WIDEN)
2410 /* If we have been unable to open-code this by a rotation,
2411 do it as the IOR of two shifts. I.e., to rotate A
2412 by N bits, compute
2413 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2414 where C is the bitsize of A.
2416 It is theoretically possible that the target machine might
2417 not be able to perform either shift and hence we would
2418 be making two libcalls rather than just the one for the
2419 shift (similarly if IOR could not be done). We will allow
2420 this extremely unlikely lossage to avoid complicating the
2421 code below. */
2423 rtx subtarget = target == shifted ? 0 : target;
2424 rtx new_amount, other_amount;
2425 rtx temp1;
2427 new_amount = op1;
2428 if (op1 == const0_rtx)
2429 return shifted;
2430 else if (CONST_INT_P (op1))
2431 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2432 - INTVAL (op1));
2433 else
2435 other_amount
2436 = simplify_gen_unary (NEG, GET_MODE (op1),
2437 op1, GET_MODE (op1));
2438 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2439 other_amount
2440 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2441 gen_int_mode (mask, GET_MODE (op1)));
2444 shifted = force_reg (mode, shifted);
2446 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2447 mode, shifted, new_amount, 0, 1);
2448 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2449 mode, shifted, other_amount,
2450 subtarget, 1);
2451 return expand_binop (mode, ior_optab, temp, temp1, target,
2452 unsignedp, methods);
2455 temp = expand_binop (mode,
2456 left ? lrotate_optab : rrotate_optab,
2457 shifted, op1, target, unsignedp, methods);
2459 else if (unsignedp)
2460 temp = expand_binop (mode,
2461 left ? lshift_optab : rshift_uns_optab,
2462 shifted, op1, target, unsignedp, methods);
2464 /* Do arithmetic shifts.
2465 Also, if we are going to widen the operand, we can just as well
2466 use an arithmetic right-shift instead of a logical one. */
2467 if (temp == 0 && ! rotate
2468 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2470 enum optab_methods methods1 = methods;
2472 /* If trying to widen a log shift to an arithmetic shift,
2473 don't accept an arithmetic shift of the same size. */
2474 if (unsignedp)
2475 methods1 = OPTAB_MUST_WIDEN;
2477 /* Arithmetic shift */
2479 temp = expand_binop (mode,
2480 left ? lshift_optab : rshift_arith_optab,
2481 shifted, op1, target, unsignedp, methods1);
2484 /* We used to try extzv here for logical right shifts, but that was
2485 only useful for one machine, the VAX, and caused poor code
2486 generation there for lshrdi3, so the code was deleted and a
2487 define_expand for lshrsi3 was added to vax.md. */
2490 gcc_assert (temp);
2491 return temp;
2494 /* Output a shift instruction for expression code CODE,
2495 with SHIFTED being the rtx for the value to shift,
2496 and AMOUNT the amount to shift by.
2497 Store the result in the rtx TARGET, if that is convenient.
2498 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2499 Return the rtx for where the value is. */
2502 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2503 int amount, rtx target, int unsignedp)
2505 return expand_shift_1 (code, mode,
2506 shifted, GEN_INT (amount), target, unsignedp);
2509 /* Output a shift instruction for expression code CODE,
2510 with SHIFTED being the rtx for the value to shift,
2511 and AMOUNT the tree for the amount to shift by.
2512 Store the result in the rtx TARGET, if that is convenient.
2513 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2514 Return the rtx for where the value is. */
2517 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2518 tree amount, rtx target, int unsignedp)
2520 return expand_shift_1 (code, mode,
2521 shifted, expand_normal (amount), target, unsignedp);
2525 /* Indicates the type of fixup needed after a constant multiplication.
2526 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2527 the result should be negated, and ADD_VARIANT means that the
2528 multiplicand should be added to the result. */
2529 enum mult_variant {basic_variant, negate_variant, add_variant};
2531 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2532 const struct mult_cost *, machine_mode mode);
2533 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2534 struct algorithm *, enum mult_variant *, int);
2535 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2536 const struct algorithm *, enum mult_variant);
2537 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2538 static rtx extract_high_half (machine_mode, rtx);
2539 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2540 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2541 int, int);
2542 /* Compute and return the best algorithm for multiplying by T.
2543 The algorithm must cost less than cost_limit
2544 If retval.cost >= COST_LIMIT, no algorithm was found and all
2545 other field of the returned struct are undefined.
2546 MODE is the machine mode of the multiplication. */
2548 static void
2549 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2550 const struct mult_cost *cost_limit, machine_mode mode)
2552 int m;
2553 struct algorithm *alg_in, *best_alg;
2554 struct mult_cost best_cost;
2555 struct mult_cost new_limit;
2556 int op_cost, op_latency;
2557 unsigned HOST_WIDE_INT orig_t = t;
2558 unsigned HOST_WIDE_INT q;
2559 int maxm, hash_index;
2560 bool cache_hit = false;
2561 enum alg_code cache_alg = alg_zero;
2562 bool speed = optimize_insn_for_speed_p ();
2563 machine_mode imode;
2564 struct alg_hash_entry *entry_ptr;
2566 /* Indicate that no algorithm is yet found. If no algorithm
2567 is found, this value will be returned and indicate failure. */
2568 alg_out->cost.cost = cost_limit->cost + 1;
2569 alg_out->cost.latency = cost_limit->latency + 1;
2571 if (cost_limit->cost < 0
2572 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2573 return;
2575 /* Be prepared for vector modes. */
2576 imode = GET_MODE_INNER (mode);
2578 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2580 /* Restrict the bits of "t" to the multiplication's mode. */
2581 t &= GET_MODE_MASK (imode);
2583 /* t == 1 can be done in zero cost. */
2584 if (t == 1)
2586 alg_out->ops = 1;
2587 alg_out->cost.cost = 0;
2588 alg_out->cost.latency = 0;
2589 alg_out->op[0] = alg_m;
2590 return;
2593 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2594 fail now. */
2595 if (t == 0)
2597 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2598 return;
2599 else
2601 alg_out->ops = 1;
2602 alg_out->cost.cost = zero_cost (speed);
2603 alg_out->cost.latency = zero_cost (speed);
2604 alg_out->op[0] = alg_zero;
2605 return;
2609 /* We'll be needing a couple extra algorithm structures now. */
2611 alg_in = XALLOCA (struct algorithm);
2612 best_alg = XALLOCA (struct algorithm);
2613 best_cost = *cost_limit;
2615 /* Compute the hash index. */
2616 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2618 /* See if we already know what to do for T. */
2619 entry_ptr = alg_hash_entry_ptr (hash_index);
2620 if (entry_ptr->t == t
2621 && entry_ptr->mode == mode
2622 && entry_ptr->mode == mode
2623 && entry_ptr->speed == speed
2624 && entry_ptr->alg != alg_unknown)
2626 cache_alg = entry_ptr->alg;
2628 if (cache_alg == alg_impossible)
2630 /* The cache tells us that it's impossible to synthesize
2631 multiplication by T within entry_ptr->cost. */
2632 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2633 /* COST_LIMIT is at least as restrictive as the one
2634 recorded in the hash table, in which case we have no
2635 hope of synthesizing a multiplication. Just
2636 return. */
2637 return;
2639 /* If we get here, COST_LIMIT is less restrictive than the
2640 one recorded in the hash table, so we may be able to
2641 synthesize a multiplication. Proceed as if we didn't
2642 have the cache entry. */
2644 else
2646 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2647 /* The cached algorithm shows that this multiplication
2648 requires more cost than COST_LIMIT. Just return. This
2649 way, we don't clobber this cache entry with
2650 alg_impossible but retain useful information. */
2651 return;
2653 cache_hit = true;
2655 switch (cache_alg)
2657 case alg_shift:
2658 goto do_alg_shift;
2660 case alg_add_t_m2:
2661 case alg_sub_t_m2:
2662 goto do_alg_addsub_t_m2;
2664 case alg_add_factor:
2665 case alg_sub_factor:
2666 goto do_alg_addsub_factor;
2668 case alg_add_t2_m:
2669 goto do_alg_add_t2_m;
2671 case alg_sub_t2_m:
2672 goto do_alg_sub_t2_m;
2674 default:
2675 gcc_unreachable ();
2680 /* If we have a group of zero bits at the low-order part of T, try
2681 multiplying by the remaining bits and then doing a shift. */
2683 if ((t & 1) == 0)
2685 do_alg_shift:
2686 m = floor_log2 (t & -t); /* m = number of low zero bits */
2687 if (m < maxm)
2689 q = t >> m;
2690 /* The function expand_shift will choose between a shift and
2691 a sequence of additions, so the observed cost is given as
2692 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2693 op_cost = m * add_cost (speed, mode);
2694 if (shift_cost (speed, mode, m) < op_cost)
2695 op_cost = shift_cost (speed, mode, m);
2696 new_limit.cost = best_cost.cost - op_cost;
2697 new_limit.latency = best_cost.latency - op_cost;
2698 synth_mult (alg_in, q, &new_limit, mode);
2700 alg_in->cost.cost += op_cost;
2701 alg_in->cost.latency += op_cost;
2702 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2704 best_cost = alg_in->cost;
2705 std::swap (alg_in, best_alg);
2706 best_alg->log[best_alg->ops] = m;
2707 best_alg->op[best_alg->ops] = alg_shift;
2710 /* See if treating ORIG_T as a signed number yields a better
2711 sequence. Try this sequence only for a negative ORIG_T
2712 as it would be useless for a non-negative ORIG_T. */
2713 if ((HOST_WIDE_INT) orig_t < 0)
2715 /* Shift ORIG_T as follows because a right shift of a
2716 negative-valued signed type is implementation
2717 defined. */
2718 q = ~(~orig_t >> m);
2719 /* The function expand_shift will choose between a shift
2720 and a sequence of additions, so the observed cost is
2721 given as MIN (m * add_cost(speed, mode),
2722 shift_cost(speed, mode, m)). */
2723 op_cost = m * add_cost (speed, mode);
2724 if (shift_cost (speed, mode, m) < op_cost)
2725 op_cost = shift_cost (speed, mode, m);
2726 new_limit.cost = best_cost.cost - op_cost;
2727 new_limit.latency = best_cost.latency - op_cost;
2728 synth_mult (alg_in, q, &new_limit, mode);
2730 alg_in->cost.cost += op_cost;
2731 alg_in->cost.latency += op_cost;
2732 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2734 best_cost = alg_in->cost;
2735 std::swap (alg_in, best_alg);
2736 best_alg->log[best_alg->ops] = m;
2737 best_alg->op[best_alg->ops] = alg_shift;
2741 if (cache_hit)
2742 goto done;
2745 /* If we have an odd number, add or subtract one. */
2746 if ((t & 1) != 0)
2748 unsigned HOST_WIDE_INT w;
2750 do_alg_addsub_t_m2:
2751 for (w = 1; (w & t) != 0; w <<= 1)
2753 /* If T was -1, then W will be zero after the loop. This is another
2754 case where T ends with ...111. Handling this with (T + 1) and
2755 subtract 1 produces slightly better code and results in algorithm
2756 selection much faster than treating it like the ...0111 case
2757 below. */
2758 if (w == 0
2759 || (w > 2
2760 /* Reject the case where t is 3.
2761 Thus we prefer addition in that case. */
2762 && t != 3))
2764 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2766 op_cost = add_cost (speed, mode);
2767 new_limit.cost = best_cost.cost - op_cost;
2768 new_limit.latency = best_cost.latency - op_cost;
2769 synth_mult (alg_in, t + 1, &new_limit, mode);
2771 alg_in->cost.cost += op_cost;
2772 alg_in->cost.latency += op_cost;
2773 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2775 best_cost = alg_in->cost;
2776 std::swap (alg_in, best_alg);
2777 best_alg->log[best_alg->ops] = 0;
2778 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2781 else
2783 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2785 op_cost = add_cost (speed, mode);
2786 new_limit.cost = best_cost.cost - op_cost;
2787 new_limit.latency = best_cost.latency - op_cost;
2788 synth_mult (alg_in, t - 1, &new_limit, mode);
2790 alg_in->cost.cost += op_cost;
2791 alg_in->cost.latency += op_cost;
2792 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2794 best_cost = alg_in->cost;
2795 std::swap (alg_in, best_alg);
2796 best_alg->log[best_alg->ops] = 0;
2797 best_alg->op[best_alg->ops] = alg_add_t_m2;
2801 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2802 quickly with a - a * n for some appropriate constant n. */
2803 m = exact_log2 (-orig_t + 1);
2804 if (m >= 0 && m < maxm)
2806 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2807 /* If the target has a cheap shift-and-subtract insn use
2808 that in preference to a shift insn followed by a sub insn.
2809 Assume that the shift-and-sub is "atomic" with a latency
2810 equal to it's cost, otherwise assume that on superscalar
2811 hardware the shift may be executed concurrently with the
2812 earlier steps in the algorithm. */
2813 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2815 op_cost = shiftsub1_cost (speed, mode, m);
2816 op_latency = op_cost;
2818 else
2819 op_latency = add_cost (speed, mode);
2821 new_limit.cost = best_cost.cost - op_cost;
2822 new_limit.latency = best_cost.latency - op_latency;
2823 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2824 &new_limit, mode);
2826 alg_in->cost.cost += op_cost;
2827 alg_in->cost.latency += op_latency;
2828 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2830 best_cost = alg_in->cost;
2831 std::swap (alg_in, best_alg);
2832 best_alg->log[best_alg->ops] = m;
2833 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2837 if (cache_hit)
2838 goto done;
2841 /* Look for factors of t of the form
2842 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2843 If we find such a factor, we can multiply by t using an algorithm that
2844 multiplies by q, shift the result by m and add/subtract it to itself.
2846 We search for large factors first and loop down, even if large factors
2847 are less probable than small; if we find a large factor we will find a
2848 good sequence quickly, and therefore be able to prune (by decreasing
2849 COST_LIMIT) the search. */
2851 do_alg_addsub_factor:
2852 for (m = floor_log2 (t - 1); m >= 2; m--)
2854 unsigned HOST_WIDE_INT d;
2856 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2857 if (t % d == 0 && t > d && m < maxm
2858 && (!cache_hit || cache_alg == alg_add_factor))
2860 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2861 if (shiftadd_cost (speed, mode, m) <= op_cost)
2862 op_cost = shiftadd_cost (speed, mode, m);
2864 op_latency = op_cost;
2867 new_limit.cost = best_cost.cost - op_cost;
2868 new_limit.latency = best_cost.latency - op_latency;
2869 synth_mult (alg_in, t / d, &new_limit, mode);
2871 alg_in->cost.cost += op_cost;
2872 alg_in->cost.latency += op_latency;
2873 if (alg_in->cost.latency < op_cost)
2874 alg_in->cost.latency = op_cost;
2875 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2877 best_cost = alg_in->cost;
2878 std::swap (alg_in, best_alg);
2879 best_alg->log[best_alg->ops] = m;
2880 best_alg->op[best_alg->ops] = alg_add_factor;
2882 /* Other factors will have been taken care of in the recursion. */
2883 break;
2886 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2887 if (t % d == 0 && t > d && m < maxm
2888 && (!cache_hit || cache_alg == alg_sub_factor))
2890 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2891 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2892 op_cost = shiftsub0_cost (speed, mode, m);
2894 op_latency = op_cost;
2896 new_limit.cost = best_cost.cost - op_cost;
2897 new_limit.latency = best_cost.latency - op_latency;
2898 synth_mult (alg_in, t / d, &new_limit, mode);
2900 alg_in->cost.cost += op_cost;
2901 alg_in->cost.latency += op_latency;
2902 if (alg_in->cost.latency < op_cost)
2903 alg_in->cost.latency = op_cost;
2904 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2906 best_cost = alg_in->cost;
2907 std::swap (alg_in, best_alg);
2908 best_alg->log[best_alg->ops] = m;
2909 best_alg->op[best_alg->ops] = alg_sub_factor;
2911 break;
2914 if (cache_hit)
2915 goto done;
2917 /* Try shift-and-add (load effective address) instructions,
2918 i.e. do a*3, a*5, a*9. */
2919 if ((t & 1) != 0)
2921 do_alg_add_t2_m:
2922 q = t - 1;
2923 q = q & -q;
2924 m = exact_log2 (q);
2925 if (m >= 0 && m < maxm)
2927 op_cost = shiftadd_cost (speed, mode, m);
2928 new_limit.cost = best_cost.cost - op_cost;
2929 new_limit.latency = best_cost.latency - op_cost;
2930 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2932 alg_in->cost.cost += op_cost;
2933 alg_in->cost.latency += op_cost;
2934 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2936 best_cost = alg_in->cost;
2937 std::swap (alg_in, best_alg);
2938 best_alg->log[best_alg->ops] = m;
2939 best_alg->op[best_alg->ops] = alg_add_t2_m;
2942 if (cache_hit)
2943 goto done;
2945 do_alg_sub_t2_m:
2946 q = t + 1;
2947 q = q & -q;
2948 m = exact_log2 (q);
2949 if (m >= 0 && m < maxm)
2951 op_cost = shiftsub0_cost (speed, mode, m);
2952 new_limit.cost = best_cost.cost - op_cost;
2953 new_limit.latency = best_cost.latency - op_cost;
2954 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2956 alg_in->cost.cost += op_cost;
2957 alg_in->cost.latency += op_cost;
2958 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2960 best_cost = alg_in->cost;
2961 std::swap (alg_in, best_alg);
2962 best_alg->log[best_alg->ops] = m;
2963 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2966 if (cache_hit)
2967 goto done;
2970 done:
2971 /* If best_cost has not decreased, we have not found any algorithm. */
2972 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2974 /* We failed to find an algorithm. Record alg_impossible for
2975 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2976 we are asked to find an algorithm for T within the same or
2977 lower COST_LIMIT, we can immediately return to the
2978 caller. */
2979 entry_ptr->t = t;
2980 entry_ptr->mode = mode;
2981 entry_ptr->speed = speed;
2982 entry_ptr->alg = alg_impossible;
2983 entry_ptr->cost = *cost_limit;
2984 return;
2987 /* Cache the result. */
2988 if (!cache_hit)
2990 entry_ptr->t = t;
2991 entry_ptr->mode = mode;
2992 entry_ptr->speed = speed;
2993 entry_ptr->alg = best_alg->op[best_alg->ops];
2994 entry_ptr->cost.cost = best_cost.cost;
2995 entry_ptr->cost.latency = best_cost.latency;
2998 /* If we are getting a too long sequence for `struct algorithm'
2999 to record, make this search fail. */
3000 if (best_alg->ops == MAX_BITS_PER_WORD)
3001 return;
3003 /* Copy the algorithm from temporary space to the space at alg_out.
3004 We avoid using structure assignment because the majority of
3005 best_alg is normally undefined, and this is a critical function. */
3006 alg_out->ops = best_alg->ops + 1;
3007 alg_out->cost = best_cost;
3008 memcpy (alg_out->op, best_alg->op,
3009 alg_out->ops * sizeof *alg_out->op);
3010 memcpy (alg_out->log, best_alg->log,
3011 alg_out->ops * sizeof *alg_out->log);
3014 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3015 Try three variations:
3017 - a shift/add sequence based on VAL itself
3018 - a shift/add sequence based on -VAL, followed by a negation
3019 - a shift/add sequence based on VAL - 1, followed by an addition.
3021 Return true if the cheapest of these cost less than MULT_COST,
3022 describing the algorithm in *ALG and final fixup in *VARIANT. */
3024 static bool
3025 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3026 struct algorithm *alg, enum mult_variant *variant,
3027 int mult_cost)
3029 struct algorithm alg2;
3030 struct mult_cost limit;
3031 int op_cost;
3032 bool speed = optimize_insn_for_speed_p ();
3034 /* Fail quickly for impossible bounds. */
3035 if (mult_cost < 0)
3036 return false;
3038 /* Ensure that mult_cost provides a reasonable upper bound.
3039 Any constant multiplication can be performed with less
3040 than 2 * bits additions. */
3041 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3042 if (mult_cost > op_cost)
3043 mult_cost = op_cost;
3045 *variant = basic_variant;
3046 limit.cost = mult_cost;
3047 limit.latency = mult_cost;
3048 synth_mult (alg, val, &limit, mode);
3050 /* This works only if the inverted value actually fits in an
3051 `unsigned int' */
3052 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3054 op_cost = neg_cost (speed, mode);
3055 if (MULT_COST_LESS (&alg->cost, mult_cost))
3057 limit.cost = alg->cost.cost - op_cost;
3058 limit.latency = alg->cost.latency - op_cost;
3060 else
3062 limit.cost = mult_cost - op_cost;
3063 limit.latency = mult_cost - op_cost;
3066 synth_mult (&alg2, -val, &limit, mode);
3067 alg2.cost.cost += op_cost;
3068 alg2.cost.latency += op_cost;
3069 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3070 *alg = alg2, *variant = negate_variant;
3073 /* This proves very useful for division-by-constant. */
3074 op_cost = add_cost (speed, mode);
3075 if (MULT_COST_LESS (&alg->cost, mult_cost))
3077 limit.cost = alg->cost.cost - op_cost;
3078 limit.latency = alg->cost.latency - op_cost;
3080 else
3082 limit.cost = mult_cost - op_cost;
3083 limit.latency = mult_cost - op_cost;
3086 synth_mult (&alg2, val - 1, &limit, mode);
3087 alg2.cost.cost += op_cost;
3088 alg2.cost.latency += op_cost;
3089 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3090 *alg = alg2, *variant = add_variant;
3092 return MULT_COST_LESS (&alg->cost, mult_cost);
3095 /* A subroutine of expand_mult, used for constant multiplications.
3096 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3097 convenient. Use the shift/add sequence described by ALG and apply
3098 the final fixup specified by VARIANT. */
3100 static rtx
3101 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3102 rtx target, const struct algorithm *alg,
3103 enum mult_variant variant)
3105 HOST_WIDE_INT val_so_far;
3106 rtx_insn *insn;
3107 rtx accum, tem;
3108 int opno;
3109 machine_mode nmode;
3111 /* Avoid referencing memory over and over and invalid sharing
3112 on SUBREGs. */
3113 op0 = force_reg (mode, op0);
3115 /* ACCUM starts out either as OP0 or as a zero, depending on
3116 the first operation. */
3118 if (alg->op[0] == alg_zero)
3120 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3121 val_so_far = 0;
3123 else if (alg->op[0] == alg_m)
3125 accum = copy_to_mode_reg (mode, op0);
3126 val_so_far = 1;
3128 else
3129 gcc_unreachable ();
3131 for (opno = 1; opno < alg->ops; opno++)
3133 int log = alg->log[opno];
3134 rtx shift_subtarget = optimize ? 0 : accum;
3135 rtx add_target
3136 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3137 && !optimize)
3138 ? target : 0;
3139 rtx accum_target = optimize ? 0 : accum;
3140 rtx accum_inner;
3142 switch (alg->op[opno])
3144 case alg_shift:
3145 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3146 /* REG_EQUAL note will be attached to the following insn. */
3147 emit_move_insn (accum, tem);
3148 val_so_far <<= log;
3149 break;
3151 case alg_add_t_m2:
3152 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3153 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3154 add_target ? add_target : accum_target);
3155 val_so_far += (HOST_WIDE_INT) 1 << log;
3156 break;
3158 case alg_sub_t_m2:
3159 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3160 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3161 add_target ? add_target : accum_target);
3162 val_so_far -= (HOST_WIDE_INT) 1 << log;
3163 break;
3165 case alg_add_t2_m:
3166 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3167 log, shift_subtarget, 0);
3168 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3169 add_target ? add_target : accum_target);
3170 val_so_far = (val_so_far << log) + 1;
3171 break;
3173 case alg_sub_t2_m:
3174 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3175 log, shift_subtarget, 0);
3176 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3177 add_target ? add_target : accum_target);
3178 val_so_far = (val_so_far << log) - 1;
3179 break;
3181 case alg_add_factor:
3182 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3183 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3184 add_target ? add_target : accum_target);
3185 val_so_far += val_so_far << log;
3186 break;
3188 case alg_sub_factor:
3189 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3190 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3191 (add_target
3192 ? add_target : (optimize ? 0 : tem)));
3193 val_so_far = (val_so_far << log) - val_so_far;
3194 break;
3196 default:
3197 gcc_unreachable ();
3200 if (SCALAR_INT_MODE_P (mode))
3202 /* Write a REG_EQUAL note on the last insn so that we can cse
3203 multiplication sequences. Note that if ACCUM is a SUBREG,
3204 we've set the inner register and must properly indicate that. */
3205 tem = op0, nmode = mode;
3206 accum_inner = accum;
3207 if (GET_CODE (accum) == SUBREG)
3209 accum_inner = SUBREG_REG (accum);
3210 nmode = GET_MODE (accum_inner);
3211 tem = gen_lowpart (nmode, op0);
3214 insn = get_last_insn ();
3215 set_dst_reg_note (insn, REG_EQUAL,
3216 gen_rtx_MULT (nmode, tem,
3217 gen_int_mode (val_so_far, nmode)),
3218 accum_inner);
3222 if (variant == negate_variant)
3224 val_so_far = -val_so_far;
3225 accum = expand_unop (mode, neg_optab, accum, target, 0);
3227 else if (variant == add_variant)
3229 val_so_far = val_so_far + 1;
3230 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3233 /* Compare only the bits of val and val_so_far that are significant
3234 in the result mode, to avoid sign-/zero-extension confusion. */
3235 nmode = GET_MODE_INNER (mode);
3236 val &= GET_MODE_MASK (nmode);
3237 val_so_far &= GET_MODE_MASK (nmode);
3238 gcc_assert (val == val_so_far);
3240 return accum;
3243 /* Perform a multiplication and return an rtx for the result.
3244 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3245 TARGET is a suggestion for where to store the result (an rtx).
3247 We check specially for a constant integer as OP1.
3248 If you want this check for OP0 as well, then before calling
3249 you should swap the two operands if OP0 would be constant. */
3252 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3253 int unsignedp)
3255 enum mult_variant variant;
3256 struct algorithm algorithm;
3257 rtx scalar_op1;
3258 int max_cost;
3259 bool speed = optimize_insn_for_speed_p ();
3260 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3262 if (CONSTANT_P (op0))
3263 std::swap (op0, op1);
3265 /* For vectors, there are several simplifications that can be made if
3266 all elements of the vector constant are identical. */
3267 scalar_op1 = unwrap_const_vec_duplicate (op1);
3269 if (INTEGRAL_MODE_P (mode))
3271 rtx fake_reg;
3272 HOST_WIDE_INT coeff;
3273 bool is_neg;
3274 int mode_bitsize;
3276 if (op1 == CONST0_RTX (mode))
3277 return op1;
3278 if (op1 == CONST1_RTX (mode))
3279 return op0;
3280 if (op1 == CONSTM1_RTX (mode))
3281 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3282 op0, target, 0);
3284 if (do_trapv)
3285 goto skip_synth;
3287 /* If mode is integer vector mode, check if the backend supports
3288 vector lshift (by scalar or vector) at all. If not, we can't use
3289 synthetized multiply. */
3290 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3291 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3292 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3293 goto skip_synth;
3295 /* These are the operations that are potentially turned into
3296 a sequence of shifts and additions. */
3297 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3299 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3300 less than or equal in size to `unsigned int' this doesn't matter.
3301 If the mode is larger than `unsigned int', then synth_mult works
3302 only if the constant value exactly fits in an `unsigned int' without
3303 any truncation. This means that multiplying by negative values does
3304 not work; results are off by 2^32 on a 32 bit machine. */
3305 if (CONST_INT_P (scalar_op1))
3307 coeff = INTVAL (scalar_op1);
3308 is_neg = coeff < 0;
3310 #if TARGET_SUPPORTS_WIDE_INT
3311 else if (CONST_WIDE_INT_P (scalar_op1))
3312 #else
3313 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3314 #endif
3316 int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3317 /* Perfect power of 2 (other than 1, which is handled above). */
3318 if (shift > 0)
3319 return expand_shift (LSHIFT_EXPR, mode, op0,
3320 shift, target, unsignedp);
3321 else
3322 goto skip_synth;
3324 else
3325 goto skip_synth;
3327 /* We used to test optimize here, on the grounds that it's better to
3328 produce a smaller program when -O is not used. But this causes
3329 such a terrible slowdown sometimes that it seems better to always
3330 use synth_mult. */
3332 /* Special case powers of two. */
3333 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3334 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3335 return expand_shift (LSHIFT_EXPR, mode, op0,
3336 floor_log2 (coeff), target, unsignedp);
3338 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3340 /* Attempt to handle multiplication of DImode values by negative
3341 coefficients, by performing the multiplication by a positive
3342 multiplier and then inverting the result. */
3343 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3345 /* Its safe to use -coeff even for INT_MIN, as the
3346 result is interpreted as an unsigned coefficient.
3347 Exclude cost of op0 from max_cost to match the cost
3348 calculation of the synth_mult. */
3349 coeff = -(unsigned HOST_WIDE_INT) coeff;
3350 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3351 mode, speed)
3352 - neg_cost (speed, mode));
3353 if (max_cost <= 0)
3354 goto skip_synth;
3356 /* Special case powers of two. */
3357 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3359 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3360 floor_log2 (coeff), target, unsignedp);
3361 return expand_unop (mode, neg_optab, temp, target, 0);
3364 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3365 max_cost))
3367 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3368 &algorithm, variant);
3369 return expand_unop (mode, neg_optab, temp, target, 0);
3371 goto skip_synth;
3374 /* Exclude cost of op0 from max_cost to match the cost
3375 calculation of the synth_mult. */
3376 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3377 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3378 return expand_mult_const (mode, op0, coeff, target,
3379 &algorithm, variant);
3381 skip_synth:
3383 /* Expand x*2.0 as x+x. */
3384 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3385 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3387 op0 = force_reg (GET_MODE (op0), op0);
3388 return expand_binop (mode, add_optab, op0, op0,
3389 target, unsignedp, OPTAB_LIB_WIDEN);
3392 /* This used to use umul_optab if unsigned, but for non-widening multiply
3393 there is no difference between signed and unsigned. */
3394 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3395 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3396 gcc_assert (op0);
3397 return op0;
3400 /* Return a cost estimate for multiplying a register by the given
3401 COEFFicient in the given MODE and SPEED. */
3404 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3406 int max_cost;
3407 struct algorithm algorithm;
3408 enum mult_variant variant;
3410 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3411 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3412 mode, speed);
3413 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3414 return algorithm.cost.cost;
3415 else
3416 return max_cost;
3419 /* Perform a widening multiplication and return an rtx for the result.
3420 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3421 TARGET is a suggestion for where to store the result (an rtx).
3422 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3423 or smul_widen_optab.
3425 We check specially for a constant integer as OP1, comparing the
3426 cost of a widening multiply against the cost of a sequence of shifts
3427 and adds. */
3430 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3431 int unsignedp, optab this_optab)
3433 bool speed = optimize_insn_for_speed_p ();
3434 rtx cop1;
3436 if (CONST_INT_P (op1)
3437 && GET_MODE (op0) != VOIDmode
3438 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3439 this_optab == umul_widen_optab))
3440 && CONST_INT_P (cop1)
3441 && (INTVAL (cop1) >= 0
3442 || HWI_COMPUTABLE_MODE_P (mode)))
3444 HOST_WIDE_INT coeff = INTVAL (cop1);
3445 int max_cost;
3446 enum mult_variant variant;
3447 struct algorithm algorithm;
3449 if (coeff == 0)
3450 return CONST0_RTX (mode);
3452 /* Special case powers of two. */
3453 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3455 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3456 return expand_shift (LSHIFT_EXPR, mode, op0,
3457 floor_log2 (coeff), target, unsignedp);
3460 /* Exclude cost of op0 from max_cost to match the cost
3461 calculation of the synth_mult. */
3462 max_cost = mul_widen_cost (speed, mode);
3463 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3464 max_cost))
3466 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3467 return expand_mult_const (mode, op0, coeff, target,
3468 &algorithm, variant);
3471 return expand_binop (mode, this_optab, op0, op1, target,
3472 unsignedp, OPTAB_LIB_WIDEN);
3475 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3476 replace division by D, and put the least significant N bits of the result
3477 in *MULTIPLIER_PTR and return the most significant bit.
3479 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3480 needed precision is in PRECISION (should be <= N).
3482 PRECISION should be as small as possible so this function can choose
3483 multiplier more freely.
3485 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3486 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3488 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3489 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3491 unsigned HOST_WIDE_INT
3492 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3493 unsigned HOST_WIDE_INT *multiplier_ptr,
3494 int *post_shift_ptr, int *lgup_ptr)
3496 int lgup, post_shift;
3497 int pow, pow2;
3499 /* lgup = ceil(log2(divisor)); */
3500 lgup = ceil_log2 (d);
3502 gcc_assert (lgup <= n);
3504 pow = n + lgup;
3505 pow2 = n + lgup - precision;
3507 /* mlow = 2^(N + lgup)/d */
3508 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3509 wide_int mlow = wi::udiv_trunc (val, d);
3511 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3512 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3513 wide_int mhigh = wi::udiv_trunc (val, d);
3515 /* If precision == N, then mlow, mhigh exceed 2^N
3516 (but they do not exceed 2^(N+1)). */
3518 /* Reduce to lowest terms. */
3519 for (post_shift = lgup; post_shift > 0; post_shift--)
3521 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3522 HOST_BITS_PER_WIDE_INT);
3523 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3524 HOST_BITS_PER_WIDE_INT);
3525 if (ml_lo >= mh_lo)
3526 break;
3528 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3529 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3532 *post_shift_ptr = post_shift;
3533 *lgup_ptr = lgup;
3534 if (n < HOST_BITS_PER_WIDE_INT)
3536 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3537 *multiplier_ptr = mhigh.to_uhwi () & mask;
3538 return mhigh.to_uhwi () >= mask;
3540 else
3542 *multiplier_ptr = mhigh.to_uhwi ();
3543 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3547 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3548 congruent to 1 (mod 2**N). */
3550 static unsigned HOST_WIDE_INT
3551 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3553 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3555 /* The algorithm notes that the choice y = x satisfies
3556 x*y == 1 mod 2^3, since x is assumed odd.
3557 Each iteration doubles the number of bits of significance in y. */
3559 unsigned HOST_WIDE_INT mask;
3560 unsigned HOST_WIDE_INT y = x;
3561 int nbit = 3;
3563 mask = (n == HOST_BITS_PER_WIDE_INT
3564 ? ~(unsigned HOST_WIDE_INT) 0
3565 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3567 while (nbit < n)
3569 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3570 nbit *= 2;
3572 return y;
3575 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3576 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3577 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3578 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3579 become signed.
3581 The result is put in TARGET if that is convenient.
3583 MODE is the mode of operation. */
3586 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3587 rtx op1, rtx target, int unsignedp)
3589 rtx tem;
3590 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3592 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3593 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3594 tem = expand_and (mode, tem, op1, NULL_RTX);
3595 adj_operand
3596 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3597 adj_operand);
3599 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3600 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3601 tem = expand_and (mode, tem, op0, NULL_RTX);
3602 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3603 target);
3605 return target;
3608 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3610 static rtx
3611 extract_high_half (machine_mode mode, rtx op)
3613 machine_mode wider_mode;
3615 if (mode == word_mode)
3616 return gen_highpart (mode, op);
3618 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3620 wider_mode = GET_MODE_WIDER_MODE (mode);
3621 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3622 GET_MODE_BITSIZE (mode), 0, 1);
3623 return convert_modes (mode, wider_mode, op, 0);
3626 /* Like expmed_mult_highpart, but only consider using a multiplication
3627 optab. OP1 is an rtx for the constant operand. */
3629 static rtx
3630 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3631 rtx target, int unsignedp, int max_cost)
3633 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3634 machine_mode wider_mode;
3635 optab moptab;
3636 rtx tem;
3637 int size;
3638 bool speed = optimize_insn_for_speed_p ();
3640 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3642 wider_mode = GET_MODE_WIDER_MODE (mode);
3643 size = GET_MODE_BITSIZE (mode);
3645 /* Firstly, try using a multiplication insn that only generates the needed
3646 high part of the product, and in the sign flavor of unsignedp. */
3647 if (mul_highpart_cost (speed, mode) < max_cost)
3649 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3650 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3651 unsignedp, OPTAB_DIRECT);
3652 if (tem)
3653 return tem;
3656 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3657 Need to adjust the result after the multiplication. */
3658 if (size - 1 < BITS_PER_WORD
3659 && (mul_highpart_cost (speed, mode)
3660 + 2 * shift_cost (speed, mode, size-1)
3661 + 4 * add_cost (speed, mode) < max_cost))
3663 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3664 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3665 unsignedp, OPTAB_DIRECT);
3666 if (tem)
3667 /* We used the wrong signedness. Adjust the result. */
3668 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3669 tem, unsignedp);
3672 /* Try widening multiplication. */
3673 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3674 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3675 && mul_widen_cost (speed, wider_mode) < max_cost)
3677 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3678 unsignedp, OPTAB_WIDEN);
3679 if (tem)
3680 return extract_high_half (mode, tem);
3683 /* Try widening the mode and perform a non-widening multiplication. */
3684 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3685 && size - 1 < BITS_PER_WORD
3686 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3687 < max_cost))
3689 rtx_insn *insns;
3690 rtx wop0, wop1;
3692 /* We need to widen the operands, for example to ensure the
3693 constant multiplier is correctly sign or zero extended.
3694 Use a sequence to clean-up any instructions emitted by
3695 the conversions if things don't work out. */
3696 start_sequence ();
3697 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3698 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3699 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3700 unsignedp, OPTAB_WIDEN);
3701 insns = get_insns ();
3702 end_sequence ();
3704 if (tem)
3706 emit_insn (insns);
3707 return extract_high_half (mode, tem);
3711 /* Try widening multiplication of opposite signedness, and adjust. */
3712 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3713 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3714 && size - 1 < BITS_PER_WORD
3715 && (mul_widen_cost (speed, wider_mode)
3716 + 2 * shift_cost (speed, mode, size-1)
3717 + 4 * add_cost (speed, mode) < max_cost))
3719 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3720 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3721 if (tem != 0)
3723 tem = extract_high_half (mode, tem);
3724 /* We used the wrong signedness. Adjust the result. */
3725 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3726 target, unsignedp);
3730 return 0;
3733 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3734 putting the high half of the result in TARGET if that is convenient,
3735 and return where the result is. If the operation can not be performed,
3736 0 is returned.
3738 MODE is the mode of operation and result.
3740 UNSIGNEDP nonzero means unsigned multiply.
3742 MAX_COST is the total allowed cost for the expanded RTL. */
3744 static rtx
3745 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3746 rtx target, int unsignedp, int max_cost)
3748 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3749 unsigned HOST_WIDE_INT cnst1;
3750 int extra_cost;
3751 bool sign_adjust = false;
3752 enum mult_variant variant;
3753 struct algorithm alg;
3754 rtx tem;
3755 bool speed = optimize_insn_for_speed_p ();
3757 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3758 /* We can't support modes wider than HOST_BITS_PER_INT. */
3759 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3761 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3763 /* We can't optimize modes wider than BITS_PER_WORD.
3764 ??? We might be able to perform double-word arithmetic if
3765 mode == word_mode, however all the cost calculations in
3766 synth_mult etc. assume single-word operations. */
3767 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3768 return expmed_mult_highpart_optab (mode, op0, op1, target,
3769 unsignedp, max_cost);
3771 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3773 /* Check whether we try to multiply by a negative constant. */
3774 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3776 sign_adjust = true;
3777 extra_cost += add_cost (speed, mode);
3780 /* See whether shift/add multiplication is cheap enough. */
3781 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3782 max_cost - extra_cost))
3784 /* See whether the specialized multiplication optabs are
3785 cheaper than the shift/add version. */
3786 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3787 alg.cost.cost + extra_cost);
3788 if (tem)
3789 return tem;
3791 tem = convert_to_mode (wider_mode, op0, unsignedp);
3792 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3793 tem = extract_high_half (mode, tem);
3795 /* Adjust result for signedness. */
3796 if (sign_adjust)
3797 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3799 return tem;
3801 return expmed_mult_highpart_optab (mode, op0, op1, target,
3802 unsignedp, max_cost);
3806 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3808 static rtx
3809 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3811 rtx result, temp, shift;
3812 rtx_code_label *label;
3813 int logd;
3814 int prec = GET_MODE_PRECISION (mode);
3816 logd = floor_log2 (d);
3817 result = gen_reg_rtx (mode);
3819 /* Avoid conditional branches when they're expensive. */
3820 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3821 && optimize_insn_for_speed_p ())
3823 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3824 mode, 0, -1);
3825 if (signmask)
3827 HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3828 signmask = force_reg (mode, signmask);
3829 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3831 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3832 which instruction sequence to use. If logical right shifts
3833 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3834 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3836 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3837 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3838 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3839 > COSTS_N_INSNS (2)))
3841 temp = expand_binop (mode, xor_optab, op0, signmask,
3842 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3843 temp = expand_binop (mode, sub_optab, temp, signmask,
3844 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3845 temp = expand_binop (mode, and_optab, temp,
3846 gen_int_mode (masklow, mode),
3847 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3848 temp = expand_binop (mode, xor_optab, temp, signmask,
3849 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3850 temp = expand_binop (mode, sub_optab, temp, signmask,
3851 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3853 else
3855 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3856 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3857 signmask = force_reg (mode, signmask);
3859 temp = expand_binop (mode, add_optab, op0, signmask,
3860 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3861 temp = expand_binop (mode, and_optab, temp,
3862 gen_int_mode (masklow, mode),
3863 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3864 temp = expand_binop (mode, sub_optab, temp, signmask,
3865 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3867 return temp;
3871 /* Mask contains the mode's signbit and the significant bits of the
3872 modulus. By including the signbit in the operation, many targets
3873 can avoid an explicit compare operation in the following comparison
3874 against zero. */
3875 wide_int mask = wi::mask (logd, false, prec);
3876 mask = wi::set_bit (mask, prec - 1);
3878 temp = expand_binop (mode, and_optab, op0,
3879 immed_wide_int_const (mask, mode),
3880 result, 1, OPTAB_LIB_WIDEN);
3881 if (temp != result)
3882 emit_move_insn (result, temp);
3884 label = gen_label_rtx ();
3885 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3887 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3888 0, OPTAB_LIB_WIDEN);
3890 mask = wi::mask (logd, true, prec);
3891 temp = expand_binop (mode, ior_optab, temp,
3892 immed_wide_int_const (mask, mode),
3893 result, 1, OPTAB_LIB_WIDEN);
3894 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3895 0, OPTAB_LIB_WIDEN);
3896 if (temp != result)
3897 emit_move_insn (result, temp);
3898 emit_label (label);
3899 return result;
3902 /* Expand signed division of OP0 by a power of two D in mode MODE.
3903 This routine is only called for positive values of D. */
3905 static rtx
3906 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3908 rtx temp;
3909 rtx_code_label *label;
3910 int logd;
3912 logd = floor_log2 (d);
3914 if (d == 2
3915 && BRANCH_COST (optimize_insn_for_speed_p (),
3916 false) >= 1)
3918 temp = gen_reg_rtx (mode);
3919 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3920 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3921 0, OPTAB_LIB_WIDEN);
3922 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3925 if (HAVE_conditional_move
3926 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3928 rtx temp2;
3930 start_sequence ();
3931 temp2 = copy_to_mode_reg (mode, op0);
3932 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3933 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3934 temp = force_reg (mode, temp);
3936 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3937 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3938 mode, temp, temp2, mode, 0);
3939 if (temp2)
3941 rtx_insn *seq = get_insns ();
3942 end_sequence ();
3943 emit_insn (seq);
3944 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3946 end_sequence ();
3949 if (BRANCH_COST (optimize_insn_for_speed_p (),
3950 false) >= 2)
3952 int ushift = GET_MODE_BITSIZE (mode) - logd;
3954 temp = gen_reg_rtx (mode);
3955 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3956 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3957 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3958 > COSTS_N_INSNS (1))
3959 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3960 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3961 else
3962 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3963 ushift, NULL_RTX, 1);
3964 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3965 0, OPTAB_LIB_WIDEN);
3966 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3969 label = gen_label_rtx ();
3970 temp = copy_to_mode_reg (mode, op0);
3971 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3972 expand_inc (temp, gen_int_mode (d - 1, mode));
3973 emit_label (label);
3974 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3977 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3978 if that is convenient, and returning where the result is.
3979 You may request either the quotient or the remainder as the result;
3980 specify REM_FLAG nonzero to get the remainder.
3982 CODE is the expression code for which kind of division this is;
3983 it controls how rounding is done. MODE is the machine mode to use.
3984 UNSIGNEDP nonzero means do unsigned division. */
3986 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3987 and then correct it by or'ing in missing high bits
3988 if result of ANDI is nonzero.
3989 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3990 This could optimize to a bfexts instruction.
3991 But C doesn't use these operations, so their optimizations are
3992 left for later. */
3993 /* ??? For modulo, we don't actually need the highpart of the first product,
3994 the low part will do nicely. And for small divisors, the second multiply
3995 can also be a low-part only multiply or even be completely left out.
3996 E.g. to calculate the remainder of a division by 3 with a 32 bit
3997 multiply, multiply with 0x55555556 and extract the upper two bits;
3998 the result is exact for inputs up to 0x1fffffff.
3999 The input range can be reduced by using cross-sum rules.
4000 For odd divisors >= 3, the following table gives right shift counts
4001 so that if a number is shifted by an integer multiple of the given
4002 amount, the remainder stays the same:
4003 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4004 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4005 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4006 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4007 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4009 Cross-sum rules for even numbers can be derived by leaving as many bits
4010 to the right alone as the divisor has zeros to the right.
4011 E.g. if x is an unsigned 32 bit number:
4012 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4016 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4017 rtx op0, rtx op1, rtx target, int unsignedp)
4019 machine_mode compute_mode;
4020 rtx tquotient;
4021 rtx quotient = 0, remainder = 0;
4022 rtx_insn *last;
4023 int size;
4024 rtx_insn *insn;
4025 optab optab1, optab2;
4026 int op1_is_constant, op1_is_pow2 = 0;
4027 int max_cost, extra_cost;
4028 static HOST_WIDE_INT last_div_const = 0;
4029 bool speed = optimize_insn_for_speed_p ();
4031 op1_is_constant = CONST_INT_P (op1);
4032 if (op1_is_constant)
4034 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
4035 if (unsignedp)
4036 ext_op1 &= GET_MODE_MASK (mode);
4037 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
4038 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
4042 This is the structure of expand_divmod:
4044 First comes code to fix up the operands so we can perform the operations
4045 correctly and efficiently.
4047 Second comes a switch statement with code specific for each rounding mode.
4048 For some special operands this code emits all RTL for the desired
4049 operation, for other cases, it generates only a quotient and stores it in
4050 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4051 to indicate that it has not done anything.
4053 Last comes code that finishes the operation. If QUOTIENT is set and
4054 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4055 QUOTIENT is not set, it is computed using trunc rounding.
4057 We try to generate special code for division and remainder when OP1 is a
4058 constant. If |OP1| = 2**n we can use shifts and some other fast
4059 operations. For other values of OP1, we compute a carefully selected
4060 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4061 by m.
4063 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4064 half of the product. Different strategies for generating the product are
4065 implemented in expmed_mult_highpart.
4067 If what we actually want is the remainder, we generate that by another
4068 by-constant multiplication and a subtraction. */
4070 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4071 code below will malfunction if we are, so check here and handle
4072 the special case if so. */
4073 if (op1 == const1_rtx)
4074 return rem_flag ? const0_rtx : op0;
4076 /* When dividing by -1, we could get an overflow.
4077 negv_optab can handle overflows. */
4078 if (! unsignedp && op1 == constm1_rtx)
4080 if (rem_flag)
4081 return const0_rtx;
4082 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4083 ? negv_optab : neg_optab, op0, target, 0);
4086 if (target
4087 /* Don't use the function value register as a target
4088 since we have to read it as well as write it,
4089 and function-inlining gets confused by this. */
4090 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4091 /* Don't clobber an operand while doing a multi-step calculation. */
4092 || ((rem_flag || op1_is_constant)
4093 && (reg_mentioned_p (target, op0)
4094 || (MEM_P (op0) && MEM_P (target))))
4095 || reg_mentioned_p (target, op1)
4096 || (MEM_P (op1) && MEM_P (target))))
4097 target = 0;
4099 /* Get the mode in which to perform this computation. Normally it will
4100 be MODE, but sometimes we can't do the desired operation in MODE.
4101 If so, pick a wider mode in which we can do the operation. Convert
4102 to that mode at the start to avoid repeated conversions.
4104 First see what operations we need. These depend on the expression
4105 we are evaluating. (We assume that divxx3 insns exist under the
4106 same conditions that modxx3 insns and that these insns don't normally
4107 fail. If these assumptions are not correct, we may generate less
4108 efficient code in some cases.)
4110 Then see if we find a mode in which we can open-code that operation
4111 (either a division, modulus, or shift). Finally, check for the smallest
4112 mode for which we can do the operation with a library call. */
4114 /* We might want to refine this now that we have division-by-constant
4115 optimization. Since expmed_mult_highpart tries so many variants, it is
4116 not straightforward to generalize this. Maybe we should make an array
4117 of possible modes in init_expmed? Save this for GCC 2.7. */
4119 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4120 ? (unsignedp ? lshr_optab : ashr_optab)
4121 : (unsignedp ? udiv_optab : sdiv_optab));
4122 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4123 ? optab1
4124 : (unsignedp ? udivmod_optab : sdivmod_optab));
4126 for (compute_mode = mode; compute_mode != VOIDmode;
4127 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4128 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4129 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4130 break;
4132 if (compute_mode == VOIDmode)
4133 for (compute_mode = mode; compute_mode != VOIDmode;
4134 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4135 if (optab_libfunc (optab1, compute_mode)
4136 || optab_libfunc (optab2, compute_mode))
4137 break;
4139 /* If we still couldn't find a mode, use MODE, but expand_binop will
4140 probably die. */
4141 if (compute_mode == VOIDmode)
4142 compute_mode = mode;
4144 if (target && GET_MODE (target) == compute_mode)
4145 tquotient = target;
4146 else
4147 tquotient = gen_reg_rtx (compute_mode);
4149 size = GET_MODE_BITSIZE (compute_mode);
4150 #if 0
4151 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4152 (mode), and thereby get better code when OP1 is a constant. Do that
4153 later. It will require going over all usages of SIZE below. */
4154 size = GET_MODE_BITSIZE (mode);
4155 #endif
4157 /* Only deduct something for a REM if the last divide done was
4158 for a different constant. Then set the constant of the last
4159 divide. */
4160 max_cost = (unsignedp
4161 ? udiv_cost (speed, compute_mode)
4162 : sdiv_cost (speed, compute_mode));
4163 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4164 && INTVAL (op1) == last_div_const))
4165 max_cost -= (mul_cost (speed, compute_mode)
4166 + add_cost (speed, compute_mode));
4168 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4170 /* Now convert to the best mode to use. */
4171 if (compute_mode != mode)
4173 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4174 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4176 /* convert_modes may have placed op1 into a register, so we
4177 must recompute the following. */
4178 op1_is_constant = CONST_INT_P (op1);
4179 op1_is_pow2 = (op1_is_constant
4180 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4181 || (! unsignedp
4182 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4185 /* If one of the operands is a volatile MEM, copy it into a register. */
4187 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4188 op0 = force_reg (compute_mode, op0);
4189 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4190 op1 = force_reg (compute_mode, op1);
4192 /* If we need the remainder or if OP1 is constant, we need to
4193 put OP0 in a register in case it has any queued subexpressions. */
4194 if (rem_flag || op1_is_constant)
4195 op0 = force_reg (compute_mode, op0);
4197 last = get_last_insn ();
4199 /* Promote floor rounding to trunc rounding for unsigned operations. */
4200 if (unsignedp)
4202 if (code == FLOOR_DIV_EXPR)
4203 code = TRUNC_DIV_EXPR;
4204 if (code == FLOOR_MOD_EXPR)
4205 code = TRUNC_MOD_EXPR;
4206 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4207 code = TRUNC_DIV_EXPR;
4210 if (op1 != const0_rtx)
4211 switch (code)
4213 case TRUNC_MOD_EXPR:
4214 case TRUNC_DIV_EXPR:
4215 if (op1_is_constant)
4217 if (unsignedp)
4219 unsigned HOST_WIDE_INT mh, ml;
4220 int pre_shift, post_shift;
4221 int dummy;
4222 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4223 & GET_MODE_MASK (compute_mode));
4225 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4227 pre_shift = floor_log2 (d);
4228 if (rem_flag)
4230 unsigned HOST_WIDE_INT mask
4231 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4232 remainder
4233 = expand_binop (compute_mode, and_optab, op0,
4234 gen_int_mode (mask, compute_mode),
4235 remainder, 1,
4236 OPTAB_LIB_WIDEN);
4237 if (remainder)
4238 return gen_lowpart (mode, remainder);
4240 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4241 pre_shift, tquotient, 1);
4243 else if (size <= HOST_BITS_PER_WIDE_INT)
4245 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4247 /* Most significant bit of divisor is set; emit an scc
4248 insn. */
4249 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4250 compute_mode, 1, 1);
4252 else
4254 /* Find a suitable multiplier and right shift count
4255 instead of multiplying with D. */
4257 mh = choose_multiplier (d, size, size,
4258 &ml, &post_shift, &dummy);
4260 /* If the suggested multiplier is more than SIZE bits,
4261 we can do better for even divisors, using an
4262 initial right shift. */
4263 if (mh != 0 && (d & 1) == 0)
4265 pre_shift = floor_log2 (d & -d);
4266 mh = choose_multiplier (d >> pre_shift, size,
4267 size - pre_shift,
4268 &ml, &post_shift, &dummy);
4269 gcc_assert (!mh);
4271 else
4272 pre_shift = 0;
4274 if (mh != 0)
4276 rtx t1, t2, t3, t4;
4278 if (post_shift - 1 >= BITS_PER_WORD)
4279 goto fail1;
4281 extra_cost
4282 = (shift_cost (speed, compute_mode, post_shift - 1)
4283 + shift_cost (speed, compute_mode, 1)
4284 + 2 * add_cost (speed, compute_mode));
4285 t1 = expmed_mult_highpart
4286 (compute_mode, op0,
4287 gen_int_mode (ml, compute_mode),
4288 NULL_RTX, 1, max_cost - extra_cost);
4289 if (t1 == 0)
4290 goto fail1;
4291 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4292 op0, t1),
4293 NULL_RTX);
4294 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4295 t2, 1, NULL_RTX, 1);
4296 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4297 t1, t3),
4298 NULL_RTX);
4299 quotient = expand_shift
4300 (RSHIFT_EXPR, compute_mode, t4,
4301 post_shift - 1, tquotient, 1);
4303 else
4305 rtx t1, t2;
4307 if (pre_shift >= BITS_PER_WORD
4308 || post_shift >= BITS_PER_WORD)
4309 goto fail1;
4311 t1 = expand_shift
4312 (RSHIFT_EXPR, compute_mode, op0,
4313 pre_shift, NULL_RTX, 1);
4314 extra_cost
4315 = (shift_cost (speed, compute_mode, pre_shift)
4316 + shift_cost (speed, compute_mode, post_shift));
4317 t2 = expmed_mult_highpart
4318 (compute_mode, t1,
4319 gen_int_mode (ml, compute_mode),
4320 NULL_RTX, 1, max_cost - extra_cost);
4321 if (t2 == 0)
4322 goto fail1;
4323 quotient = expand_shift
4324 (RSHIFT_EXPR, compute_mode, t2,
4325 post_shift, tquotient, 1);
4329 else /* Too wide mode to use tricky code */
4330 break;
4332 insn = get_last_insn ();
4333 if (insn != last)
4334 set_dst_reg_note (insn, REG_EQUAL,
4335 gen_rtx_UDIV (compute_mode, op0, op1),
4336 quotient);
4338 else /* TRUNC_DIV, signed */
4340 unsigned HOST_WIDE_INT ml;
4341 int lgup, post_shift;
4342 rtx mlr;
4343 HOST_WIDE_INT d = INTVAL (op1);
4344 unsigned HOST_WIDE_INT abs_d;
4346 /* Since d might be INT_MIN, we have to cast to
4347 unsigned HOST_WIDE_INT before negating to avoid
4348 undefined signed overflow. */
4349 abs_d = (d >= 0
4350 ? (unsigned HOST_WIDE_INT) d
4351 : - (unsigned HOST_WIDE_INT) d);
4353 /* n rem d = n rem -d */
4354 if (rem_flag && d < 0)
4356 d = abs_d;
4357 op1 = gen_int_mode (abs_d, compute_mode);
4360 if (d == 1)
4361 quotient = op0;
4362 else if (d == -1)
4363 quotient = expand_unop (compute_mode, neg_optab, op0,
4364 tquotient, 0);
4365 else if (HOST_BITS_PER_WIDE_INT >= size
4366 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4368 /* This case is not handled correctly below. */
4369 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4370 compute_mode, 1, 1);
4371 if (quotient == 0)
4372 goto fail1;
4374 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4375 && (rem_flag
4376 ? smod_pow2_cheap (speed, compute_mode)
4377 : sdiv_pow2_cheap (speed, compute_mode))
4378 /* We assume that cheap metric is true if the
4379 optab has an expander for this mode. */
4380 && ((optab_handler ((rem_flag ? smod_optab
4381 : sdiv_optab),
4382 compute_mode)
4383 != CODE_FOR_nothing)
4384 || (optab_handler (sdivmod_optab,
4385 compute_mode)
4386 != CODE_FOR_nothing)))
4388 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4390 if (rem_flag)
4392 remainder = expand_smod_pow2 (compute_mode, op0, d);
4393 if (remainder)
4394 return gen_lowpart (mode, remainder);
4397 if (sdiv_pow2_cheap (speed, compute_mode)
4398 && ((optab_handler (sdiv_optab, compute_mode)
4399 != CODE_FOR_nothing)
4400 || (optab_handler (sdivmod_optab, compute_mode)
4401 != CODE_FOR_nothing)))
4402 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4403 compute_mode, op0,
4404 gen_int_mode (abs_d,
4405 compute_mode),
4406 NULL_RTX, 0);
4407 else
4408 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4410 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4411 negate the quotient. */
4412 if (d < 0)
4414 insn = get_last_insn ();
4415 if (insn != last
4416 && abs_d < ((unsigned HOST_WIDE_INT) 1
4417 << (HOST_BITS_PER_WIDE_INT - 1)))
4418 set_dst_reg_note (insn, REG_EQUAL,
4419 gen_rtx_DIV (compute_mode, op0,
4420 gen_int_mode
4421 (abs_d,
4422 compute_mode)),
4423 quotient);
4425 quotient = expand_unop (compute_mode, neg_optab,
4426 quotient, quotient, 0);
4429 else if (size <= HOST_BITS_PER_WIDE_INT)
4431 choose_multiplier (abs_d, size, size - 1,
4432 &ml, &post_shift, &lgup);
4433 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4435 rtx t1, t2, t3;
4437 if (post_shift >= BITS_PER_WORD
4438 || size - 1 >= BITS_PER_WORD)
4439 goto fail1;
4441 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4442 + shift_cost (speed, compute_mode, size - 1)
4443 + add_cost (speed, compute_mode));
4444 t1 = expmed_mult_highpart
4445 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4446 NULL_RTX, 0, max_cost - extra_cost);
4447 if (t1 == 0)
4448 goto fail1;
4449 t2 = expand_shift
4450 (RSHIFT_EXPR, compute_mode, t1,
4451 post_shift, NULL_RTX, 0);
4452 t3 = expand_shift
4453 (RSHIFT_EXPR, compute_mode, op0,
4454 size - 1, NULL_RTX, 0);
4455 if (d < 0)
4456 quotient
4457 = force_operand (gen_rtx_MINUS (compute_mode,
4458 t3, t2),
4459 tquotient);
4460 else
4461 quotient
4462 = force_operand (gen_rtx_MINUS (compute_mode,
4463 t2, t3),
4464 tquotient);
4466 else
4468 rtx t1, t2, t3, t4;
4470 if (post_shift >= BITS_PER_WORD
4471 || size - 1 >= BITS_PER_WORD)
4472 goto fail1;
4474 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4475 mlr = gen_int_mode (ml, compute_mode);
4476 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4477 + shift_cost (speed, compute_mode, size - 1)
4478 + 2 * add_cost (speed, compute_mode));
4479 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4480 NULL_RTX, 0,
4481 max_cost - extra_cost);
4482 if (t1 == 0)
4483 goto fail1;
4484 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4485 t1, op0),
4486 NULL_RTX);
4487 t3 = expand_shift
4488 (RSHIFT_EXPR, compute_mode, t2,
4489 post_shift, NULL_RTX, 0);
4490 t4 = expand_shift
4491 (RSHIFT_EXPR, compute_mode, op0,
4492 size - 1, NULL_RTX, 0);
4493 if (d < 0)
4494 quotient
4495 = force_operand (gen_rtx_MINUS (compute_mode,
4496 t4, t3),
4497 tquotient);
4498 else
4499 quotient
4500 = force_operand (gen_rtx_MINUS (compute_mode,
4501 t3, t4),
4502 tquotient);
4505 else /* Too wide mode to use tricky code */
4506 break;
4508 insn = get_last_insn ();
4509 if (insn != last)
4510 set_dst_reg_note (insn, REG_EQUAL,
4511 gen_rtx_DIV (compute_mode, op0, op1),
4512 quotient);
4514 break;
4516 fail1:
4517 delete_insns_since (last);
4518 break;
4520 case FLOOR_DIV_EXPR:
4521 case FLOOR_MOD_EXPR:
4522 /* We will come here only for signed operations. */
4523 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4525 unsigned HOST_WIDE_INT mh, ml;
4526 int pre_shift, lgup, post_shift;
4527 HOST_WIDE_INT d = INTVAL (op1);
4529 if (d > 0)
4531 /* We could just as easily deal with negative constants here,
4532 but it does not seem worth the trouble for GCC 2.6. */
4533 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4535 pre_shift = floor_log2 (d);
4536 if (rem_flag)
4538 unsigned HOST_WIDE_INT mask
4539 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4540 remainder = expand_binop
4541 (compute_mode, and_optab, op0,
4542 gen_int_mode (mask, compute_mode),
4543 remainder, 0, OPTAB_LIB_WIDEN);
4544 if (remainder)
4545 return gen_lowpart (mode, remainder);
4547 quotient = expand_shift
4548 (RSHIFT_EXPR, compute_mode, op0,
4549 pre_shift, tquotient, 0);
4551 else
4553 rtx t1, t2, t3, t4;
4555 mh = choose_multiplier (d, size, size - 1,
4556 &ml, &post_shift, &lgup);
4557 gcc_assert (!mh);
4559 if (post_shift < BITS_PER_WORD
4560 && size - 1 < BITS_PER_WORD)
4562 t1 = expand_shift
4563 (RSHIFT_EXPR, compute_mode, op0,
4564 size - 1, NULL_RTX, 0);
4565 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4566 NULL_RTX, 0, OPTAB_WIDEN);
4567 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4568 + shift_cost (speed, compute_mode, size - 1)
4569 + 2 * add_cost (speed, compute_mode));
4570 t3 = expmed_mult_highpart
4571 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4572 NULL_RTX, 1, max_cost - extra_cost);
4573 if (t3 != 0)
4575 t4 = expand_shift
4576 (RSHIFT_EXPR, compute_mode, t3,
4577 post_shift, NULL_RTX, 1);
4578 quotient = expand_binop (compute_mode, xor_optab,
4579 t4, t1, tquotient, 0,
4580 OPTAB_WIDEN);
4585 else
4587 rtx nsign, t1, t2, t3, t4;
4588 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4589 op0, constm1_rtx), NULL_RTX);
4590 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4591 0, OPTAB_WIDEN);
4592 nsign = expand_shift
4593 (RSHIFT_EXPR, compute_mode, t2,
4594 size - 1, NULL_RTX, 0);
4595 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4596 NULL_RTX);
4597 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4598 NULL_RTX, 0);
4599 if (t4)
4601 rtx t5;
4602 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4603 NULL_RTX, 0);
4604 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4605 t4, t5),
4606 tquotient);
4611 if (quotient != 0)
4612 break;
4613 delete_insns_since (last);
4615 /* Try using an instruction that produces both the quotient and
4616 remainder, using truncation. We can easily compensate the quotient
4617 or remainder to get floor rounding, once we have the remainder.
4618 Notice that we compute also the final remainder value here,
4619 and return the result right away. */
4620 if (target == 0 || GET_MODE (target) != compute_mode)
4621 target = gen_reg_rtx (compute_mode);
4623 if (rem_flag)
4625 remainder
4626 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4627 quotient = gen_reg_rtx (compute_mode);
4629 else
4631 quotient
4632 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4633 remainder = gen_reg_rtx (compute_mode);
4636 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4637 quotient, remainder, 0))
4639 /* This could be computed with a branch-less sequence.
4640 Save that for later. */
4641 rtx tem;
4642 rtx_code_label *label = gen_label_rtx ();
4643 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4644 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4645 NULL_RTX, 0, OPTAB_WIDEN);
4646 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4647 expand_dec (quotient, const1_rtx);
4648 expand_inc (remainder, op1);
4649 emit_label (label);
4650 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4653 /* No luck with division elimination or divmod. Have to do it
4654 by conditionally adjusting op0 *and* the result. */
4656 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4657 rtx adjusted_op0;
4658 rtx tem;
4660 quotient = gen_reg_rtx (compute_mode);
4661 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4662 label1 = gen_label_rtx ();
4663 label2 = gen_label_rtx ();
4664 label3 = gen_label_rtx ();
4665 label4 = gen_label_rtx ();
4666 label5 = gen_label_rtx ();
4667 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4668 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4669 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4670 quotient, 0, OPTAB_LIB_WIDEN);
4671 if (tem != quotient)
4672 emit_move_insn (quotient, tem);
4673 emit_jump_insn (targetm.gen_jump (label5));
4674 emit_barrier ();
4675 emit_label (label1);
4676 expand_inc (adjusted_op0, const1_rtx);
4677 emit_jump_insn (targetm.gen_jump (label4));
4678 emit_barrier ();
4679 emit_label (label2);
4680 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4681 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4682 quotient, 0, OPTAB_LIB_WIDEN);
4683 if (tem != quotient)
4684 emit_move_insn (quotient, tem);
4685 emit_jump_insn (targetm.gen_jump (label5));
4686 emit_barrier ();
4687 emit_label (label3);
4688 expand_dec (adjusted_op0, const1_rtx);
4689 emit_label (label4);
4690 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4691 quotient, 0, OPTAB_LIB_WIDEN);
4692 if (tem != quotient)
4693 emit_move_insn (quotient, tem);
4694 expand_dec (quotient, const1_rtx);
4695 emit_label (label5);
4697 break;
4699 case CEIL_DIV_EXPR:
4700 case CEIL_MOD_EXPR:
4701 if (unsignedp)
4703 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4705 rtx t1, t2, t3;
4706 unsigned HOST_WIDE_INT d = INTVAL (op1);
4707 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4708 floor_log2 (d), tquotient, 1);
4709 t2 = expand_binop (compute_mode, and_optab, op0,
4710 gen_int_mode (d - 1, compute_mode),
4711 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4712 t3 = gen_reg_rtx (compute_mode);
4713 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4714 compute_mode, 1, 1);
4715 if (t3 == 0)
4717 rtx_code_label *lab;
4718 lab = gen_label_rtx ();
4719 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4720 expand_inc (t1, const1_rtx);
4721 emit_label (lab);
4722 quotient = t1;
4724 else
4725 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4726 t1, t3),
4727 tquotient);
4728 break;
4731 /* Try using an instruction that produces both the quotient and
4732 remainder, using truncation. We can easily compensate the
4733 quotient or remainder to get ceiling rounding, once we have the
4734 remainder. Notice that we compute also the final remainder
4735 value here, and return the result right away. */
4736 if (target == 0 || GET_MODE (target) != compute_mode)
4737 target = gen_reg_rtx (compute_mode);
4739 if (rem_flag)
4741 remainder = (REG_P (target)
4742 ? target : gen_reg_rtx (compute_mode));
4743 quotient = gen_reg_rtx (compute_mode);
4745 else
4747 quotient = (REG_P (target)
4748 ? target : gen_reg_rtx (compute_mode));
4749 remainder = gen_reg_rtx (compute_mode);
4752 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4753 remainder, 1))
4755 /* This could be computed with a branch-less sequence.
4756 Save that for later. */
4757 rtx_code_label *label = gen_label_rtx ();
4758 do_cmp_and_jump (remainder, const0_rtx, EQ,
4759 compute_mode, label);
4760 expand_inc (quotient, const1_rtx);
4761 expand_dec (remainder, op1);
4762 emit_label (label);
4763 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4766 /* No luck with division elimination or divmod. Have to do it
4767 by conditionally adjusting op0 *and* the result. */
4769 rtx_code_label *label1, *label2;
4770 rtx adjusted_op0, tem;
4772 quotient = gen_reg_rtx (compute_mode);
4773 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4774 label1 = gen_label_rtx ();
4775 label2 = gen_label_rtx ();
4776 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4777 compute_mode, label1);
4778 emit_move_insn (quotient, const0_rtx);
4779 emit_jump_insn (targetm.gen_jump (label2));
4780 emit_barrier ();
4781 emit_label (label1);
4782 expand_dec (adjusted_op0, const1_rtx);
4783 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4784 quotient, 1, OPTAB_LIB_WIDEN);
4785 if (tem != quotient)
4786 emit_move_insn (quotient, tem);
4787 expand_inc (quotient, const1_rtx);
4788 emit_label (label2);
4791 else /* signed */
4793 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4794 && INTVAL (op1) >= 0)
4796 /* This is extremely similar to the code for the unsigned case
4797 above. For 2.7 we should merge these variants, but for
4798 2.6.1 I don't want to touch the code for unsigned since that
4799 get used in C. The signed case will only be used by other
4800 languages (Ada). */
4802 rtx t1, t2, t3;
4803 unsigned HOST_WIDE_INT d = INTVAL (op1);
4804 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4805 floor_log2 (d), tquotient, 0);
4806 t2 = expand_binop (compute_mode, and_optab, op0,
4807 gen_int_mode (d - 1, compute_mode),
4808 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4809 t3 = gen_reg_rtx (compute_mode);
4810 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4811 compute_mode, 1, 1);
4812 if (t3 == 0)
4814 rtx_code_label *lab;
4815 lab = gen_label_rtx ();
4816 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4817 expand_inc (t1, const1_rtx);
4818 emit_label (lab);
4819 quotient = t1;
4821 else
4822 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4823 t1, t3),
4824 tquotient);
4825 break;
4828 /* Try using an instruction that produces both the quotient and
4829 remainder, using truncation. We can easily compensate the
4830 quotient or remainder to get ceiling rounding, once we have the
4831 remainder. Notice that we compute also the final remainder
4832 value here, and return the result right away. */
4833 if (target == 0 || GET_MODE (target) != compute_mode)
4834 target = gen_reg_rtx (compute_mode);
4835 if (rem_flag)
4837 remainder= (REG_P (target)
4838 ? target : gen_reg_rtx (compute_mode));
4839 quotient = gen_reg_rtx (compute_mode);
4841 else
4843 quotient = (REG_P (target)
4844 ? target : gen_reg_rtx (compute_mode));
4845 remainder = gen_reg_rtx (compute_mode);
4848 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4849 remainder, 0))
4851 /* This could be computed with a branch-less sequence.
4852 Save that for later. */
4853 rtx tem;
4854 rtx_code_label *label = gen_label_rtx ();
4855 do_cmp_and_jump (remainder, const0_rtx, EQ,
4856 compute_mode, label);
4857 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4858 NULL_RTX, 0, OPTAB_WIDEN);
4859 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4860 expand_inc (quotient, const1_rtx);
4861 expand_dec (remainder, op1);
4862 emit_label (label);
4863 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4866 /* No luck with division elimination or divmod. Have to do it
4867 by conditionally adjusting op0 *and* the result. */
4869 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4870 rtx adjusted_op0;
4871 rtx tem;
4873 quotient = gen_reg_rtx (compute_mode);
4874 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4875 label1 = gen_label_rtx ();
4876 label2 = gen_label_rtx ();
4877 label3 = gen_label_rtx ();
4878 label4 = gen_label_rtx ();
4879 label5 = gen_label_rtx ();
4880 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4881 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4882 compute_mode, label1);
4883 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4884 quotient, 0, OPTAB_LIB_WIDEN);
4885 if (tem != quotient)
4886 emit_move_insn (quotient, tem);
4887 emit_jump_insn (targetm.gen_jump (label5));
4888 emit_barrier ();
4889 emit_label (label1);
4890 expand_dec (adjusted_op0, const1_rtx);
4891 emit_jump_insn (targetm.gen_jump (label4));
4892 emit_barrier ();
4893 emit_label (label2);
4894 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4895 compute_mode, label3);
4896 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4897 quotient, 0, OPTAB_LIB_WIDEN);
4898 if (tem != quotient)
4899 emit_move_insn (quotient, tem);
4900 emit_jump_insn (targetm.gen_jump (label5));
4901 emit_barrier ();
4902 emit_label (label3);
4903 expand_inc (adjusted_op0, const1_rtx);
4904 emit_label (label4);
4905 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4906 quotient, 0, OPTAB_LIB_WIDEN);
4907 if (tem != quotient)
4908 emit_move_insn (quotient, tem);
4909 expand_inc (quotient, const1_rtx);
4910 emit_label (label5);
4913 break;
4915 case EXACT_DIV_EXPR:
4916 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4918 HOST_WIDE_INT d = INTVAL (op1);
4919 unsigned HOST_WIDE_INT ml;
4920 int pre_shift;
4921 rtx t1;
4923 pre_shift = floor_log2 (d & -d);
4924 ml = invert_mod2n (d >> pre_shift, size);
4925 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4926 pre_shift, NULL_RTX, unsignedp);
4927 quotient = expand_mult (compute_mode, t1,
4928 gen_int_mode (ml, compute_mode),
4929 NULL_RTX, 1);
4931 insn = get_last_insn ();
4932 set_dst_reg_note (insn, REG_EQUAL,
4933 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4934 compute_mode, op0, op1),
4935 quotient);
4937 break;
4939 case ROUND_DIV_EXPR:
4940 case ROUND_MOD_EXPR:
4941 if (unsignedp)
4943 rtx tem;
4944 rtx_code_label *label;
4945 label = gen_label_rtx ();
4946 quotient = gen_reg_rtx (compute_mode);
4947 remainder = gen_reg_rtx (compute_mode);
4948 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4950 rtx tem;
4951 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4952 quotient, 1, OPTAB_LIB_WIDEN);
4953 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4954 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4955 remainder, 1, OPTAB_LIB_WIDEN);
4957 tem = plus_constant (compute_mode, op1, -1);
4958 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4959 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4960 expand_inc (quotient, const1_rtx);
4961 expand_dec (remainder, op1);
4962 emit_label (label);
4964 else
4966 rtx abs_rem, abs_op1, tem, mask;
4967 rtx_code_label *label;
4968 label = gen_label_rtx ();
4969 quotient = gen_reg_rtx (compute_mode);
4970 remainder = gen_reg_rtx (compute_mode);
4971 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4973 rtx tem;
4974 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4975 quotient, 0, OPTAB_LIB_WIDEN);
4976 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4977 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4978 remainder, 0, OPTAB_LIB_WIDEN);
4980 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4981 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4982 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4983 1, NULL_RTX, 1);
4984 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4985 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4986 NULL_RTX, 0, OPTAB_WIDEN);
4987 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4988 size - 1, NULL_RTX, 0);
4989 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4990 NULL_RTX, 0, OPTAB_WIDEN);
4991 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4992 NULL_RTX, 0, OPTAB_WIDEN);
4993 expand_inc (quotient, tem);
4994 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4995 NULL_RTX, 0, OPTAB_WIDEN);
4996 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4997 NULL_RTX, 0, OPTAB_WIDEN);
4998 expand_dec (remainder, tem);
4999 emit_label (label);
5001 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5003 default:
5004 gcc_unreachable ();
5007 if (quotient == 0)
5009 if (target && GET_MODE (target) != compute_mode)
5010 target = 0;
5012 if (rem_flag)
5014 /* Try to produce the remainder without producing the quotient.
5015 If we seem to have a divmod pattern that does not require widening,
5016 don't try widening here. We should really have a WIDEN argument
5017 to expand_twoval_binop, since what we'd really like to do here is
5018 1) try a mod insn in compute_mode
5019 2) try a divmod insn in compute_mode
5020 3) try a div insn in compute_mode and multiply-subtract to get
5021 remainder
5022 4) try the same things with widening allowed. */
5023 remainder
5024 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5025 op0, op1, target,
5026 unsignedp,
5027 ((optab_handler (optab2, compute_mode)
5028 != CODE_FOR_nothing)
5029 ? OPTAB_DIRECT : OPTAB_WIDEN));
5030 if (remainder == 0)
5032 /* No luck there. Can we do remainder and divide at once
5033 without a library call? */
5034 remainder = gen_reg_rtx (compute_mode);
5035 if (! expand_twoval_binop ((unsignedp
5036 ? udivmod_optab
5037 : sdivmod_optab),
5038 op0, op1,
5039 NULL_RTX, remainder, unsignedp))
5040 remainder = 0;
5043 if (remainder)
5044 return gen_lowpart (mode, remainder);
5047 /* Produce the quotient. Try a quotient insn, but not a library call.
5048 If we have a divmod in this mode, use it in preference to widening
5049 the div (for this test we assume it will not fail). Note that optab2
5050 is set to the one of the two optabs that the call below will use. */
5051 quotient
5052 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5053 op0, op1, rem_flag ? NULL_RTX : target,
5054 unsignedp,
5055 ((optab_handler (optab2, compute_mode)
5056 != CODE_FOR_nothing)
5057 ? OPTAB_DIRECT : OPTAB_WIDEN));
5059 if (quotient == 0)
5061 /* No luck there. Try a quotient-and-remainder insn,
5062 keeping the quotient alone. */
5063 quotient = gen_reg_rtx (compute_mode);
5064 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5065 op0, op1,
5066 quotient, NULL_RTX, unsignedp))
5068 quotient = 0;
5069 if (! rem_flag)
5070 /* Still no luck. If we are not computing the remainder,
5071 use a library call for the quotient. */
5072 quotient = sign_expand_binop (compute_mode,
5073 udiv_optab, sdiv_optab,
5074 op0, op1, target,
5075 unsignedp, OPTAB_LIB_WIDEN);
5080 if (rem_flag)
5082 if (target && GET_MODE (target) != compute_mode)
5083 target = 0;
5085 if (quotient == 0)
5087 /* No divide instruction either. Use library for remainder. */
5088 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5089 op0, op1, target,
5090 unsignedp, OPTAB_LIB_WIDEN);
5091 /* No remainder function. Try a quotient-and-remainder
5092 function, keeping the remainder. */
5093 if (!remainder)
5095 remainder = gen_reg_rtx (compute_mode);
5096 if (!expand_twoval_binop_libfunc
5097 (unsignedp ? udivmod_optab : sdivmod_optab,
5098 op0, op1,
5099 NULL_RTX, remainder,
5100 unsignedp ? UMOD : MOD))
5101 remainder = NULL_RTX;
5104 else
5106 /* We divided. Now finish doing X - Y * (X / Y). */
5107 remainder = expand_mult (compute_mode, quotient, op1,
5108 NULL_RTX, unsignedp);
5109 remainder = expand_binop (compute_mode, sub_optab, op0,
5110 remainder, target, unsignedp,
5111 OPTAB_LIB_WIDEN);
5115 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5118 /* Return a tree node with data type TYPE, describing the value of X.
5119 Usually this is an VAR_DECL, if there is no obvious better choice.
5120 X may be an expression, however we only support those expressions
5121 generated by loop.c. */
5123 tree
5124 make_tree (tree type, rtx x)
5126 tree t;
5128 switch (GET_CODE (x))
5130 case CONST_INT:
5131 case CONST_WIDE_INT:
5132 t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5133 return t;
5135 case CONST_DOUBLE:
5136 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5137 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5138 t = wide_int_to_tree (type,
5139 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5140 HOST_BITS_PER_WIDE_INT * 2));
5141 else
5142 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5144 return t;
5146 case CONST_VECTOR:
5148 int units = CONST_VECTOR_NUNITS (x);
5149 tree itype = TREE_TYPE (type);
5150 tree *elts;
5151 int i;
5153 /* Build a tree with vector elements. */
5154 elts = XALLOCAVEC (tree, units);
5155 for (i = units - 1; i >= 0; --i)
5157 rtx elt = CONST_VECTOR_ELT (x, i);
5158 elts[i] = make_tree (itype, elt);
5161 return build_vector (type, elts);
5164 case PLUS:
5165 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5166 make_tree (type, XEXP (x, 1)));
5168 case MINUS:
5169 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5170 make_tree (type, XEXP (x, 1)));
5172 case NEG:
5173 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5175 case MULT:
5176 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5177 make_tree (type, XEXP (x, 1)));
5179 case ASHIFT:
5180 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5181 make_tree (type, XEXP (x, 1)));
5183 case LSHIFTRT:
5184 t = unsigned_type_for (type);
5185 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5186 make_tree (t, XEXP (x, 0)),
5187 make_tree (type, XEXP (x, 1))));
5189 case ASHIFTRT:
5190 t = signed_type_for (type);
5191 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5192 make_tree (t, XEXP (x, 0)),
5193 make_tree (type, XEXP (x, 1))));
5195 case DIV:
5196 if (TREE_CODE (type) != REAL_TYPE)
5197 t = signed_type_for (type);
5198 else
5199 t = type;
5201 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5202 make_tree (t, XEXP (x, 0)),
5203 make_tree (t, XEXP (x, 1))));
5204 case UDIV:
5205 t = unsigned_type_for (type);
5206 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5207 make_tree (t, XEXP (x, 0)),
5208 make_tree (t, XEXP (x, 1))));
5210 case SIGN_EXTEND:
5211 case ZERO_EXTEND:
5212 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5213 GET_CODE (x) == ZERO_EXTEND);
5214 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5216 case CONST:
5217 return make_tree (type, XEXP (x, 0));
5219 case SYMBOL_REF:
5220 t = SYMBOL_REF_DECL (x);
5221 if (t)
5222 return fold_convert (type, build_fold_addr_expr (t));
5223 /* else fall through. */
5225 default:
5226 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5228 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5229 address mode to pointer mode. */
5230 if (POINTER_TYPE_P (type))
5231 x = convert_memory_address_addr_space
5232 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5234 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5235 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5236 t->decl_with_rtl.rtl = x;
5238 return t;
5242 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5243 and returning TARGET.
5245 If TARGET is 0, a pseudo-register or constant is returned. */
5248 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5250 rtx tem = 0;
5252 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5253 tem = simplify_binary_operation (AND, mode, op0, op1);
5254 if (tem == 0)
5255 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5257 if (target == 0)
5258 target = tem;
5259 else if (tem != target)
5260 emit_move_insn (target, tem);
5261 return target;
5264 /* Helper function for emit_store_flag. */
5266 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5267 machine_mode mode, machine_mode compare_mode,
5268 int unsignedp, rtx x, rtx y, int normalizep,
5269 machine_mode target_mode)
5271 struct expand_operand ops[4];
5272 rtx op0, comparison, subtarget;
5273 rtx_insn *last;
5274 machine_mode result_mode = targetm.cstore_mode (icode);
5276 last = get_last_insn ();
5277 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5278 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5279 if (!x || !y)
5281 delete_insns_since (last);
5282 return NULL_RTX;
5285 if (target_mode == VOIDmode)
5286 target_mode = result_mode;
5287 if (!target)
5288 target = gen_reg_rtx (target_mode);
5290 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5292 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5293 create_fixed_operand (&ops[1], comparison);
5294 create_fixed_operand (&ops[2], x);
5295 create_fixed_operand (&ops[3], y);
5296 if (!maybe_expand_insn (icode, 4, ops))
5298 delete_insns_since (last);
5299 return NULL_RTX;
5301 subtarget = ops[0].value;
5303 /* If we are converting to a wider mode, first convert to
5304 TARGET_MODE, then normalize. This produces better combining
5305 opportunities on machines that have a SIGN_EXTRACT when we are
5306 testing a single bit. This mostly benefits the 68k.
5308 If STORE_FLAG_VALUE does not have the sign bit set when
5309 interpreted in MODE, we can do this conversion as unsigned, which
5310 is usually more efficient. */
5311 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5313 convert_move (target, subtarget,
5314 val_signbit_known_clear_p (result_mode,
5315 STORE_FLAG_VALUE));
5316 op0 = target;
5317 result_mode = target_mode;
5319 else
5320 op0 = subtarget;
5322 /* If we want to keep subexpressions around, don't reuse our last
5323 target. */
5324 if (optimize)
5325 subtarget = 0;
5327 /* Now normalize to the proper value in MODE. Sometimes we don't
5328 have to do anything. */
5329 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5331 /* STORE_FLAG_VALUE might be the most negative number, so write
5332 the comparison this way to avoid a compiler-time warning. */
5333 else if (- normalizep == STORE_FLAG_VALUE)
5334 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5336 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5337 it hard to use a value of just the sign bit due to ANSI integer
5338 constant typing rules. */
5339 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5340 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5341 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5342 normalizep == 1);
5343 else
5345 gcc_assert (STORE_FLAG_VALUE & 1);
5347 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5348 if (normalizep == -1)
5349 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5352 /* If we were converting to a smaller mode, do the conversion now. */
5353 if (target_mode != result_mode)
5355 convert_move (target, op0, 0);
5356 return target;
5358 else
5359 return op0;
5363 /* A subroutine of emit_store_flag only including "tricks" that do not
5364 need a recursive call. These are kept separate to avoid infinite
5365 loops. */
5367 static rtx
5368 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5369 machine_mode mode, int unsignedp, int normalizep,
5370 machine_mode target_mode)
5372 rtx subtarget;
5373 enum insn_code icode;
5374 machine_mode compare_mode;
5375 enum mode_class mclass;
5376 enum rtx_code scode;
5378 if (unsignedp)
5379 code = unsigned_condition (code);
5380 scode = swap_condition (code);
5382 /* If one operand is constant, make it the second one. Only do this
5383 if the other operand is not constant as well. */
5385 if (swap_commutative_operands_p (op0, op1))
5387 std::swap (op0, op1);
5388 code = swap_condition (code);
5391 if (mode == VOIDmode)
5392 mode = GET_MODE (op0);
5394 /* For some comparisons with 1 and -1, we can convert this to
5395 comparisons with zero. This will often produce more opportunities for
5396 store-flag insns. */
5398 switch (code)
5400 case LT:
5401 if (op1 == const1_rtx)
5402 op1 = const0_rtx, code = LE;
5403 break;
5404 case LE:
5405 if (op1 == constm1_rtx)
5406 op1 = const0_rtx, code = LT;
5407 break;
5408 case GE:
5409 if (op1 == const1_rtx)
5410 op1 = const0_rtx, code = GT;
5411 break;
5412 case GT:
5413 if (op1 == constm1_rtx)
5414 op1 = const0_rtx, code = GE;
5415 break;
5416 case GEU:
5417 if (op1 == const1_rtx)
5418 op1 = const0_rtx, code = NE;
5419 break;
5420 case LTU:
5421 if (op1 == const1_rtx)
5422 op1 = const0_rtx, code = EQ;
5423 break;
5424 default:
5425 break;
5428 /* If we are comparing a double-word integer with zero or -1, we can
5429 convert the comparison into one involving a single word. */
5430 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5431 && GET_MODE_CLASS (mode) == MODE_INT
5432 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5434 rtx tem;
5435 if ((code == EQ || code == NE)
5436 && (op1 == const0_rtx || op1 == constm1_rtx))
5438 rtx op00, op01;
5440 /* Do a logical OR or AND of the two words and compare the
5441 result. */
5442 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5443 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5444 tem = expand_binop (word_mode,
5445 op1 == const0_rtx ? ior_optab : and_optab,
5446 op00, op01, NULL_RTX, unsignedp,
5447 OPTAB_DIRECT);
5449 if (tem != 0)
5450 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5451 unsignedp, normalizep);
5453 else if ((code == LT || code == GE) && op1 == const0_rtx)
5455 rtx op0h;
5457 /* If testing the sign bit, can just test on high word. */
5458 op0h = simplify_gen_subreg (word_mode, op0, mode,
5459 subreg_highpart_offset (word_mode,
5460 mode));
5461 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5462 unsignedp, normalizep);
5464 else
5465 tem = NULL_RTX;
5467 if (tem)
5469 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5470 return tem;
5471 if (!target)
5472 target = gen_reg_rtx (target_mode);
5474 convert_move (target, tem,
5475 !val_signbit_known_set_p (word_mode,
5476 (normalizep ? normalizep
5477 : STORE_FLAG_VALUE)));
5478 return target;
5482 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5483 complement of A (for GE) and shifting the sign bit to the low bit. */
5484 if (op1 == const0_rtx && (code == LT || code == GE)
5485 && GET_MODE_CLASS (mode) == MODE_INT
5486 && (normalizep || STORE_FLAG_VALUE == 1
5487 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5489 subtarget = target;
5491 if (!target)
5492 target_mode = mode;
5494 /* If the result is to be wider than OP0, it is best to convert it
5495 first. If it is to be narrower, it is *incorrect* to convert it
5496 first. */
5497 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5499 op0 = convert_modes (target_mode, mode, op0, 0);
5500 mode = target_mode;
5503 if (target_mode != mode)
5504 subtarget = 0;
5506 if (code == GE)
5507 op0 = expand_unop (mode, one_cmpl_optab, op0,
5508 ((STORE_FLAG_VALUE == 1 || normalizep)
5509 ? 0 : subtarget), 0);
5511 if (STORE_FLAG_VALUE == 1 || normalizep)
5512 /* If we are supposed to produce a 0/1 value, we want to do
5513 a logical shift from the sign bit to the low-order bit; for
5514 a -1/0 value, we do an arithmetic shift. */
5515 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5516 GET_MODE_BITSIZE (mode) - 1,
5517 subtarget, normalizep != -1);
5519 if (mode != target_mode)
5520 op0 = convert_modes (target_mode, mode, op0, 0);
5522 return op0;
5525 mclass = GET_MODE_CLASS (mode);
5526 for (compare_mode = mode; compare_mode != VOIDmode;
5527 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5529 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5530 icode = optab_handler (cstore_optab, optab_mode);
5531 if (icode != CODE_FOR_nothing)
5533 do_pending_stack_adjust ();
5534 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5535 unsignedp, op0, op1, normalizep, target_mode);
5536 if (tem)
5537 return tem;
5539 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5541 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5542 unsignedp, op1, op0, normalizep, target_mode);
5543 if (tem)
5544 return tem;
5546 break;
5550 return 0;
5553 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5554 and storing in TARGET. Normally return TARGET.
5555 Return 0 if that cannot be done.
5557 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5558 it is VOIDmode, they cannot both be CONST_INT.
5560 UNSIGNEDP is for the case where we have to widen the operands
5561 to perform the operation. It says to use zero-extension.
5563 NORMALIZEP is 1 if we should convert the result to be either zero
5564 or one. Normalize is -1 if we should convert the result to be
5565 either zero or -1. If NORMALIZEP is zero, the result will be left
5566 "raw" out of the scc insn. */
5569 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5570 machine_mode mode, int unsignedp, int normalizep)
5572 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5573 enum rtx_code rcode;
5574 rtx subtarget;
5575 rtx tem, trueval;
5576 rtx_insn *last;
5578 /* If we compare constants, we shouldn't use a store-flag operation,
5579 but a constant load. We can get there via the vanilla route that
5580 usually generates a compare-branch sequence, but will in this case
5581 fold the comparison to a constant, and thus elide the branch. */
5582 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5583 return NULL_RTX;
5585 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5586 target_mode);
5587 if (tem)
5588 return tem;
5590 /* If we reached here, we can't do this with a scc insn, however there
5591 are some comparisons that can be done in other ways. Don't do any
5592 of these cases if branches are very cheap. */
5593 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5594 return 0;
5596 /* See what we need to return. We can only return a 1, -1, or the
5597 sign bit. */
5599 if (normalizep == 0)
5601 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5602 normalizep = STORE_FLAG_VALUE;
5604 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5606 else
5607 return 0;
5610 last = get_last_insn ();
5612 /* If optimizing, use different pseudo registers for each insn, instead
5613 of reusing the same pseudo. This leads to better CSE, but slows
5614 down the compiler, since there are more pseudos */
5615 subtarget = (!optimize
5616 && (target_mode == mode)) ? target : NULL_RTX;
5617 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5619 /* For floating-point comparisons, try the reverse comparison or try
5620 changing the "orderedness" of the comparison. */
5621 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5623 enum rtx_code first_code;
5624 bool and_them;
5626 rcode = reverse_condition_maybe_unordered (code);
5627 if (can_compare_p (rcode, mode, ccp_store_flag)
5628 && (code == ORDERED || code == UNORDERED
5629 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5630 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5632 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5633 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5635 /* For the reverse comparison, use either an addition or a XOR. */
5636 if (want_add
5637 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5638 optimize_insn_for_speed_p ()) == 0)
5640 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5641 STORE_FLAG_VALUE, target_mode);
5642 if (tem)
5643 return expand_binop (target_mode, add_optab, tem,
5644 gen_int_mode (normalizep, target_mode),
5645 target, 0, OPTAB_WIDEN);
5647 else if (!want_add
5648 && rtx_cost (trueval, mode, XOR, 1,
5649 optimize_insn_for_speed_p ()) == 0)
5651 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5652 normalizep, target_mode);
5653 if (tem)
5654 return expand_binop (target_mode, xor_optab, tem, trueval,
5655 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5659 delete_insns_since (last);
5661 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5662 if (code == ORDERED || code == UNORDERED)
5663 return 0;
5665 and_them = split_comparison (code, mode, &first_code, &code);
5667 /* If there are no NaNs, the first comparison should always fall through.
5668 Effectively change the comparison to the other one. */
5669 if (!HONOR_NANS (mode))
5671 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5672 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5673 target_mode);
5676 if (!HAVE_conditional_move)
5677 return 0;
5679 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5680 conditional move. */
5681 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5682 normalizep, target_mode);
5683 if (tem == 0)
5684 return 0;
5686 if (and_them)
5687 tem = emit_conditional_move (target, code, op0, op1, mode,
5688 tem, const0_rtx, GET_MODE (tem), 0);
5689 else
5690 tem = emit_conditional_move (target, code, op0, op1, mode,
5691 trueval, tem, GET_MODE (tem), 0);
5693 if (tem == 0)
5694 delete_insns_since (last);
5695 return tem;
5698 /* The remaining tricks only apply to integer comparisons. */
5700 if (GET_MODE_CLASS (mode) != MODE_INT)
5701 return 0;
5703 /* If this is an equality comparison of integers, we can try to exclusive-or
5704 (or subtract) the two operands and use a recursive call to try the
5705 comparison with zero. Don't do any of these cases if branches are
5706 very cheap. */
5708 if ((code == EQ || code == NE) && op1 != const0_rtx)
5710 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5711 OPTAB_WIDEN);
5713 if (tem == 0)
5714 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5715 OPTAB_WIDEN);
5716 if (tem != 0)
5717 tem = emit_store_flag (target, code, tem, const0_rtx,
5718 mode, unsignedp, normalizep);
5719 if (tem != 0)
5720 return tem;
5722 delete_insns_since (last);
5725 /* For integer comparisons, try the reverse comparison. However, for
5726 small X and if we'd have anyway to extend, implementing "X != 0"
5727 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5728 rcode = reverse_condition (code);
5729 if (can_compare_p (rcode, mode, ccp_store_flag)
5730 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5731 && code == NE
5732 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5733 && op1 == const0_rtx))
5735 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5736 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5738 /* Again, for the reverse comparison, use either an addition or a XOR. */
5739 if (want_add
5740 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5741 optimize_insn_for_speed_p ()) == 0)
5743 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5744 STORE_FLAG_VALUE, target_mode);
5745 if (tem != 0)
5746 tem = expand_binop (target_mode, add_optab, tem,
5747 gen_int_mode (normalizep, target_mode),
5748 target, 0, OPTAB_WIDEN);
5750 else if (!want_add
5751 && rtx_cost (trueval, mode, XOR, 1,
5752 optimize_insn_for_speed_p ()) == 0)
5754 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5755 normalizep, target_mode);
5756 if (tem != 0)
5757 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5758 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5761 if (tem != 0)
5762 return tem;
5763 delete_insns_since (last);
5766 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5767 the constant zero. Reject all other comparisons at this point. Only
5768 do LE and GT if branches are expensive since they are expensive on
5769 2-operand machines. */
5771 if (op1 != const0_rtx
5772 || (code != EQ && code != NE
5773 && (BRANCH_COST (optimize_insn_for_speed_p (),
5774 false) <= 1 || (code != LE && code != GT))))
5775 return 0;
5777 /* Try to put the result of the comparison in the sign bit. Assume we can't
5778 do the necessary operation below. */
5780 tem = 0;
5782 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5783 the sign bit set. */
5785 if (code == LE)
5787 /* This is destructive, so SUBTARGET can't be OP0. */
5788 if (rtx_equal_p (subtarget, op0))
5789 subtarget = 0;
5791 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5792 OPTAB_WIDEN);
5793 if (tem)
5794 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5795 OPTAB_WIDEN);
5798 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5799 number of bits in the mode of OP0, minus one. */
5801 if (code == GT)
5803 if (rtx_equal_p (subtarget, op0))
5804 subtarget = 0;
5806 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5807 GET_MODE_BITSIZE (mode) - 1,
5808 subtarget, 0);
5809 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5810 OPTAB_WIDEN);
5813 if (code == EQ || code == NE)
5815 /* For EQ or NE, one way to do the comparison is to apply an operation
5816 that converts the operand into a positive number if it is nonzero
5817 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5818 for NE we negate. This puts the result in the sign bit. Then we
5819 normalize with a shift, if needed.
5821 Two operations that can do the above actions are ABS and FFS, so try
5822 them. If that doesn't work, and MODE is smaller than a full word,
5823 we can use zero-extension to the wider mode (an unsigned conversion)
5824 as the operation. */
5826 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5827 that is compensated by the subsequent overflow when subtracting
5828 one / negating. */
5830 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5831 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5832 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5833 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5834 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5836 tem = convert_modes (word_mode, mode, op0, 1);
5837 mode = word_mode;
5840 if (tem != 0)
5842 if (code == EQ)
5843 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5844 0, OPTAB_WIDEN);
5845 else
5846 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5849 /* If we couldn't do it that way, for NE we can "or" the two's complement
5850 of the value with itself. For EQ, we take the one's complement of
5851 that "or", which is an extra insn, so we only handle EQ if branches
5852 are expensive. */
5854 if (tem == 0
5855 && (code == NE
5856 || BRANCH_COST (optimize_insn_for_speed_p (),
5857 false) > 1))
5859 if (rtx_equal_p (subtarget, op0))
5860 subtarget = 0;
5862 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5863 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5864 OPTAB_WIDEN);
5866 if (tem && code == EQ)
5867 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5871 if (tem && normalizep)
5872 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5873 GET_MODE_BITSIZE (mode) - 1,
5874 subtarget, normalizep == 1);
5876 if (tem)
5878 if (!target)
5880 else if (GET_MODE (tem) != target_mode)
5882 convert_move (target, tem, 0);
5883 tem = target;
5885 else if (!subtarget)
5887 emit_move_insn (target, tem);
5888 tem = target;
5891 else
5892 delete_insns_since (last);
5894 return tem;
5897 /* Like emit_store_flag, but always succeeds. */
5900 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5901 machine_mode mode, int unsignedp, int normalizep)
5903 rtx tem;
5904 rtx_code_label *label;
5905 rtx trueval, falseval;
5907 /* First see if emit_store_flag can do the job. */
5908 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5909 if (tem != 0)
5910 return tem;
5912 if (!target)
5913 target = gen_reg_rtx (word_mode);
5915 /* If this failed, we have to do this with set/compare/jump/set code.
5916 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5917 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5918 if (code == NE
5919 && GET_MODE_CLASS (mode) == MODE_INT
5920 && REG_P (target)
5921 && op0 == target
5922 && op1 == const0_rtx)
5924 label = gen_label_rtx ();
5925 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5926 NULL_RTX, NULL, label, -1);
5927 emit_move_insn (target, trueval);
5928 emit_label (label);
5929 return target;
5932 if (!REG_P (target)
5933 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5934 target = gen_reg_rtx (GET_MODE (target));
5936 /* Jump in the right direction if the target cannot implement CODE
5937 but can jump on its reverse condition. */
5938 falseval = const0_rtx;
5939 if (! can_compare_p (code, mode, ccp_jump)
5940 && (! FLOAT_MODE_P (mode)
5941 || code == ORDERED || code == UNORDERED
5942 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5943 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5945 enum rtx_code rcode;
5946 if (FLOAT_MODE_P (mode))
5947 rcode = reverse_condition_maybe_unordered (code);
5948 else
5949 rcode = reverse_condition (code);
5951 /* Canonicalize to UNORDERED for the libcall. */
5952 if (can_compare_p (rcode, mode, ccp_jump)
5953 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5955 falseval = trueval;
5956 trueval = const0_rtx;
5957 code = rcode;
5961 emit_move_insn (target, trueval);
5962 label = gen_label_rtx ();
5963 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5964 label, -1);
5966 emit_move_insn (target, falseval);
5967 emit_label (label);
5969 return target;
5972 /* Perform possibly multi-word comparison and conditional jump to LABEL
5973 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5974 now a thin wrapper around do_compare_rtx_and_jump. */
5976 static void
5977 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5978 rtx_code_label *label)
5980 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5981 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5982 NULL, label, -1);