Merge from trunk @222673.
[official-gcc.git] / gcc / expmed.c
blobca1a667ae78b3012daaad27cd2c54227768db06d
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2015 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "hash-set.h"
29 #include "machmode.h"
30 #include "vec.h"
31 #include "double-int.h"
32 #include "input.h"
33 #include "alias.h"
34 #include "symtab.h"
35 #include "wide-int.h"
36 #include "inchash.h"
37 #include "tree.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "tm_p.h"
41 #include "flags.h"
42 #include "insn-config.h"
43 #include "hashtab.h"
44 #include "hard-reg-set.h"
45 #include "function.h"
46 #include "statistics.h"
47 #include "real.h"
48 #include "fixed-value.h"
49 #include "expmed.h"
50 #include "dojump.h"
51 #include "explow.h"
52 #include "calls.h"
53 #include "emit-rtl.h"
54 #include "varasm.h"
55 #include "stmt.h"
56 #include "expr.h"
57 #include "insn-codes.h"
58 #include "optabs.h"
59 #include "recog.h"
60 #include "langhooks.h"
61 #include "predict.h"
62 #include "basic-block.h"
63 #include "df.h"
64 #include "target.h"
66 struct target_expmed default_target_expmed;
67 #if SWITCHABLE_TARGET
68 struct target_expmed *this_target_expmed = &default_target_expmed;
69 #endif
71 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
72 unsigned HOST_WIDE_INT,
73 unsigned HOST_WIDE_INT,
74 unsigned HOST_WIDE_INT,
75 rtx, bool);
76 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
77 unsigned HOST_WIDE_INT,
78 rtx, bool);
79 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
80 unsigned HOST_WIDE_INT,
81 unsigned HOST_WIDE_INT,
82 unsigned HOST_WIDE_INT,
83 rtx, bool);
84 static rtx extract_fixed_bit_field (machine_mode, rtx,
85 unsigned HOST_WIDE_INT,
86 unsigned HOST_WIDE_INT, rtx, int, bool);
87 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
88 unsigned HOST_WIDE_INT,
89 unsigned HOST_WIDE_INT, rtx, int, bool);
90 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
91 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
92 unsigned HOST_WIDE_INT, int, bool);
93 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
94 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
95 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
97 /* Return a constant integer mask value of mode MODE with BITSIZE ones
98 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
99 The mask is truncated if necessary to the width of mode MODE. The
100 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
102 static inline rtx
103 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
105 return immed_wide_int_const
106 (wi::shifted_mask (bitpos, bitsize, complement,
107 GET_MODE_PRECISION (mode)), mode);
110 /* Test whether a value is zero of a power of two. */
111 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
112 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
114 struct init_expmed_rtl
116 rtx reg;
117 rtx plus;
118 rtx neg;
119 rtx mult;
120 rtx sdiv;
121 rtx udiv;
122 rtx sdiv_32;
123 rtx smod_32;
124 rtx wide_mult;
125 rtx wide_lshr;
126 rtx wide_trunc;
127 rtx shift;
128 rtx shift_mult;
129 rtx shift_add;
130 rtx shift_sub0;
131 rtx shift_sub1;
132 rtx zext;
133 rtx trunc;
135 rtx pow2[MAX_BITS_PER_WORD];
136 rtx cint[MAX_BITS_PER_WORD];
139 static void
140 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
141 machine_mode from_mode, bool speed)
143 int to_size, from_size;
144 rtx which;
146 to_size = GET_MODE_PRECISION (to_mode);
147 from_size = GET_MODE_PRECISION (from_mode);
149 /* Most partial integers have a precision less than the "full"
150 integer it requires for storage. In case one doesn't, for
151 comparison purposes here, reduce the bit size by one in that
152 case. */
153 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
154 && exact_log2 (to_size) != -1)
155 to_size --;
156 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
157 && exact_log2 (from_size) != -1)
158 from_size --;
160 /* Assume cost of zero-extend and sign-extend is the same. */
161 which = (to_size < from_size ? all->trunc : all->zext);
163 PUT_MODE (all->reg, from_mode);
164 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
167 static void
168 init_expmed_one_mode (struct init_expmed_rtl *all,
169 machine_mode mode, int speed)
171 int m, n, mode_bitsize;
172 machine_mode mode_from;
174 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
176 PUT_MODE (all->reg, mode);
177 PUT_MODE (all->plus, mode);
178 PUT_MODE (all->neg, mode);
179 PUT_MODE (all->mult, mode);
180 PUT_MODE (all->sdiv, mode);
181 PUT_MODE (all->udiv, mode);
182 PUT_MODE (all->sdiv_32, mode);
183 PUT_MODE (all->smod_32, mode);
184 PUT_MODE (all->wide_trunc, mode);
185 PUT_MODE (all->shift, mode);
186 PUT_MODE (all->shift_mult, mode);
187 PUT_MODE (all->shift_add, mode);
188 PUT_MODE (all->shift_sub0, mode);
189 PUT_MODE (all->shift_sub1, mode);
190 PUT_MODE (all->zext, mode);
191 PUT_MODE (all->trunc, mode);
193 set_add_cost (speed, mode, set_src_cost (all->plus, speed));
194 set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
195 set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
196 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
197 set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
199 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
200 <= 2 * add_cost (speed, mode)));
201 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
202 <= 4 * add_cost (speed, mode)));
204 set_shift_cost (speed, mode, 0, 0);
206 int cost = add_cost (speed, mode);
207 set_shiftadd_cost (speed, mode, 0, cost);
208 set_shiftsub0_cost (speed, mode, 0, cost);
209 set_shiftsub1_cost (speed, mode, 0, cost);
212 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
213 for (m = 1; m < n; m++)
215 XEXP (all->shift, 1) = all->cint[m];
216 XEXP (all->shift_mult, 1) = all->pow2[m];
218 set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
219 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
220 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
221 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
224 if (SCALAR_INT_MODE_P (mode))
226 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
227 mode_from = (machine_mode)(mode_from + 1))
228 init_expmed_one_conv (all, mode, mode_from, speed);
230 if (GET_MODE_CLASS (mode) == MODE_INT)
232 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
233 if (wider_mode != VOIDmode)
235 PUT_MODE (all->zext, wider_mode);
236 PUT_MODE (all->wide_mult, wider_mode);
237 PUT_MODE (all->wide_lshr, wider_mode);
238 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
240 set_mul_widen_cost (speed, wider_mode,
241 set_src_cost (all->wide_mult, speed));
242 set_mul_highpart_cost (speed, mode,
243 set_src_cost (all->wide_trunc, speed));
248 void
249 init_expmed (void)
251 struct init_expmed_rtl all;
252 machine_mode mode = QImode;
253 int m, speed;
255 memset (&all, 0, sizeof all);
256 for (m = 1; m < MAX_BITS_PER_WORD; m++)
258 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
259 all.cint[m] = GEN_INT (m);
262 /* Avoid using hard regs in ways which may be unsupported. */
263 all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
264 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
265 all.neg = gen_rtx_NEG (mode, all.reg);
266 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
267 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
268 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
269 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
270 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
271 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
272 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
273 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
274 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
275 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
276 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
277 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
278 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
279 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
280 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
282 for (speed = 0; speed < 2; speed++)
284 crtl->maybe_hot_insn_p = speed;
285 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
287 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
288 mode = (machine_mode)(mode + 1))
289 init_expmed_one_mode (&all, mode, speed);
291 if (MIN_MODE_PARTIAL_INT != VOIDmode)
292 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
293 mode = (machine_mode)(mode + 1))
294 init_expmed_one_mode (&all, mode, speed);
296 if (MIN_MODE_VECTOR_INT != VOIDmode)
297 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
298 mode = (machine_mode)(mode + 1))
299 init_expmed_one_mode (&all, mode, speed);
302 if (alg_hash_used_p ())
304 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
305 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
307 else
308 set_alg_hash_used_p (true);
309 default_rtl_profile ();
311 ggc_free (all.trunc);
312 ggc_free (all.shift_sub1);
313 ggc_free (all.shift_sub0);
314 ggc_free (all.shift_add);
315 ggc_free (all.shift_mult);
316 ggc_free (all.shift);
317 ggc_free (all.wide_trunc);
318 ggc_free (all.wide_lshr);
319 ggc_free (all.wide_mult);
320 ggc_free (all.zext);
321 ggc_free (all.smod_32);
322 ggc_free (all.sdiv_32);
323 ggc_free (all.udiv);
324 ggc_free (all.sdiv);
325 ggc_free (all.mult);
326 ggc_free (all.neg);
327 ggc_free (all.plus);
328 ggc_free (all.reg);
331 /* Return an rtx representing minus the value of X.
332 MODE is the intended mode of the result,
333 useful if X is a CONST_INT. */
336 negate_rtx (machine_mode mode, rtx x)
338 rtx result = simplify_unary_operation (NEG, mode, x, mode);
340 if (result == 0)
341 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
343 return result;
346 /* Return an rtx representing value of X with reverse storage order.
347 MODE is the intended mode of the result,
348 useful if X is a CONST_INT. */
351 flip_storage_order (enum machine_mode mode, rtx x)
353 enum machine_mode int_mode;
354 rtx result;
356 if (mode == QImode)
357 return x;
359 if (SCALAR_INT_MODE_P (mode))
360 int_mode = mode;
361 else
363 int_mode = int_mode_for_mode (mode);
364 gcc_assert (int_mode != BLKmode);
365 x = gen_lowpart (int_mode, x);
368 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
369 if (result == 0)
370 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
372 if (int_mode != mode)
373 result = gen_lowpart (mode, result);
375 return result;
378 /* Adjust bitfield memory MEM so that it points to the first unit of mode
379 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
380 If MODE is BLKmode, return a reference to every byte in the bitfield.
381 Set *NEW_BITNUM to the bit position of the field within the new memory. */
383 static rtx
384 narrow_bit_field_mem (rtx mem, machine_mode mode,
385 unsigned HOST_WIDE_INT bitsize,
386 unsigned HOST_WIDE_INT bitnum,
387 unsigned HOST_WIDE_INT *new_bitnum)
389 if (mode == BLKmode)
391 *new_bitnum = bitnum % BITS_PER_UNIT;
392 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
393 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
394 / BITS_PER_UNIT);
395 return adjust_bitfield_address_size (mem, mode, offset, size);
397 else
399 unsigned int unit = GET_MODE_BITSIZE (mode);
400 *new_bitnum = bitnum % unit;
401 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
402 return adjust_bitfield_address (mem, mode, offset);
406 /* The caller wants to perform insertion or extraction PATTERN on a
407 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
408 BITREGION_START and BITREGION_END are as for store_bit_field
409 and FIELDMODE is the natural mode of the field.
411 Search for a mode that is compatible with the memory access
412 restrictions and (where applicable) with a register insertion or
413 extraction. Return the new memory on success, storing the adjusted
414 bit position in *NEW_BITNUM. Return null otherwise. */
416 static rtx
417 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
418 rtx op0, HOST_WIDE_INT bitsize,
419 HOST_WIDE_INT bitnum,
420 unsigned HOST_WIDE_INT bitregion_start,
421 unsigned HOST_WIDE_INT bitregion_end,
422 machine_mode fieldmode,
423 unsigned HOST_WIDE_INT *new_bitnum)
425 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
426 bitregion_end, MEM_ALIGN (op0),
427 MEM_VOLATILE_P (op0));
428 machine_mode best_mode;
429 if (iter.next_mode (&best_mode))
431 /* We can use a memory in BEST_MODE. See whether this is true for
432 any wider modes. All other things being equal, we prefer to
433 use the widest mode possible because it tends to expose more
434 CSE opportunities. */
435 if (!iter.prefer_smaller_modes ())
437 /* Limit the search to the mode required by the corresponding
438 register insertion or extraction instruction, if any. */
439 machine_mode limit_mode = word_mode;
440 extraction_insn insn;
441 if (get_best_reg_extraction_insn (&insn, pattern,
442 GET_MODE_BITSIZE (best_mode),
443 fieldmode))
444 limit_mode = insn.field_mode;
446 machine_mode wider_mode;
447 while (iter.next_mode (&wider_mode)
448 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
449 best_mode = wider_mode;
451 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
452 new_bitnum);
454 return NULL_RTX;
457 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
458 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
459 offset is then BITNUM / BITS_PER_UNIT. */
461 static bool
462 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
463 unsigned HOST_WIDE_INT bitsize,
464 machine_mode struct_mode)
466 if (BYTES_BIG_ENDIAN)
467 return (bitnum % BITS_PER_UNIT == 0
468 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
469 || (bitnum + bitsize) % BITS_PER_WORD == 0));
470 else
471 return bitnum % BITS_PER_WORD == 0;
474 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
475 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
476 Return false if the access would touch memory outside the range
477 BITREGION_START to BITREGION_END for conformance to the C++ memory
478 model. */
480 static bool
481 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
482 unsigned HOST_WIDE_INT bitnum,
483 machine_mode fieldmode,
484 unsigned HOST_WIDE_INT bitregion_start,
485 unsigned HOST_WIDE_INT bitregion_end)
487 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
489 /* -fstrict-volatile-bitfields must be enabled and we must have a
490 volatile MEM. */
491 if (!MEM_P (op0)
492 || !MEM_VOLATILE_P (op0)
493 || flag_strict_volatile_bitfields <= 0)
494 return false;
496 /* Non-integral modes likely only happen with packed structures.
497 Punt. */
498 if (!SCALAR_INT_MODE_P (fieldmode))
499 return false;
501 /* The bit size must not be larger than the field mode, and
502 the field mode must not be larger than a word. */
503 if (bitsize > modesize || modesize > BITS_PER_WORD)
504 return false;
506 /* Check for cases of unaligned fields that must be split. */
507 if (bitnum % modesize + bitsize > modesize)
508 return false;
510 /* The memory must be sufficiently aligned for a MODESIZE access.
511 This condition guarantees, that the memory access will not
512 touch anything after the end of the structure. */
513 if (MEM_ALIGN (op0) < modesize)
514 return false;
516 /* Check for cases where the C++ memory model applies. */
517 if (bitregion_end != 0
518 && (bitnum - bitnum % modesize < bitregion_start
519 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
520 return false;
522 return true;
525 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
526 bit number BITNUM can be treated as a simple value of mode MODE. */
528 static bool
529 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
530 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
532 return (MEM_P (op0)
533 && bitnum % BITS_PER_UNIT == 0
534 && bitsize == GET_MODE_BITSIZE (mode)
535 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
536 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
537 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
540 /* Try to use instruction INSV to store VALUE into a field of OP0.
541 BITSIZE and BITNUM are as for store_bit_field. */
543 static bool
544 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
545 unsigned HOST_WIDE_INT bitsize,
546 unsigned HOST_WIDE_INT bitnum,
547 rtx value)
549 struct expand_operand ops[4];
550 rtx value1;
551 rtx xop0 = op0;
552 rtx_insn *last = get_last_insn ();
553 bool copy_back = false;
555 machine_mode op_mode = insv->field_mode;
556 unsigned int unit = GET_MODE_BITSIZE (op_mode);
557 if (bitsize == 0 || bitsize > unit)
558 return false;
560 if (MEM_P (xop0))
561 /* Get a reference to the first byte of the field. */
562 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
563 &bitnum);
564 else
566 /* Convert from counting within OP0 to counting in OP_MODE. */
567 if (BYTES_BIG_ENDIAN)
568 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
570 /* If xop0 is a register, we need it in OP_MODE
571 to make it acceptable to the format of insv. */
572 if (GET_CODE (xop0) == SUBREG)
573 /* We can't just change the mode, because this might clobber op0,
574 and we will need the original value of op0 if insv fails. */
575 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
576 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
577 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
580 /* If the destination is a paradoxical subreg such that we need a
581 truncate to the inner mode, perform the insertion on a temporary and
582 truncate the result to the original destination. Note that we can't
583 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
584 X) 0)) is (reg:N X). */
585 if (GET_CODE (xop0) == SUBREG
586 && REG_P (SUBREG_REG (xop0))
587 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
588 op_mode))
590 rtx tem = gen_reg_rtx (op_mode);
591 emit_move_insn (tem, xop0);
592 xop0 = tem;
593 copy_back = true;
596 /* There are similar overflow check at the start of store_bit_field_1,
597 but that only check the situation where the field lies completely
598 outside the register, while there do have situation where the field
599 lies partialy in the register, we need to adjust bitsize for this
600 partial overflow situation. Without this fix, pr48335-2.c on big-endian
601 will broken on those arch support bit insert instruction, like arm, aarch64
602 etc. */
603 if (bitsize + bitnum > unit && bitnum < unit)
605 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
606 "destination object, data truncated into %wu-bit",
607 bitsize, unit - bitnum);
608 bitsize = unit - bitnum;
611 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
612 "backwards" from the size of the unit we are inserting into.
613 Otherwise, we count bits from the most significant on a
614 BYTES/BITS_BIG_ENDIAN machine. */
616 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
617 bitnum = unit - bitsize - bitnum;
619 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
620 value1 = value;
621 if (GET_MODE (value) != op_mode)
623 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
625 /* Optimization: Don't bother really extending VALUE
626 if it has all the bits we will actually use. However,
627 if we must narrow it, be sure we do it correctly. */
629 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
631 rtx tmp;
633 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
634 if (! tmp)
635 tmp = simplify_gen_subreg (op_mode,
636 force_reg (GET_MODE (value),
637 value1),
638 GET_MODE (value), 0);
639 value1 = tmp;
641 else
642 value1 = gen_lowpart (op_mode, value1);
644 else if (CONST_INT_P (value))
645 value1 = gen_int_mode (INTVAL (value), op_mode);
646 else
647 /* Parse phase is supposed to make VALUE's data type
648 match that of the component reference, which is a type
649 at least as wide as the field; so VALUE should have
650 a mode that corresponds to that type. */
651 gcc_assert (CONSTANT_P (value));
654 create_fixed_operand (&ops[0], xop0);
655 create_integer_operand (&ops[1], bitsize);
656 create_integer_operand (&ops[2], bitnum);
657 create_input_operand (&ops[3], value1, op_mode);
658 if (maybe_expand_insn (insv->icode, 4, ops))
660 if (copy_back)
661 convert_move (op0, xop0, true);
662 return true;
664 delete_insns_since (last);
665 return false;
668 /* A subroutine of store_bit_field, with the same arguments. Return true
669 if the operation could be implemented.
671 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
672 no other way of implementing the operation. If FALLBACK_P is false,
673 return false instead. */
675 static bool
676 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
677 unsigned HOST_WIDE_INT bitnum,
678 unsigned HOST_WIDE_INT bitregion_start,
679 unsigned HOST_WIDE_INT bitregion_end,
680 machine_mode fieldmode,
681 rtx value, bool reverse, bool fallback_p)
683 rtx op0 = str_rtx;
684 rtx orig_value;
686 while (GET_CODE (op0) == SUBREG)
688 /* The following line once was done only if WORDS_BIG_ENDIAN,
689 but I think that is a mistake. WORDS_BIG_ENDIAN is
690 meaningful at a much higher level; when structures are copied
691 between memory and regs, the higher-numbered regs
692 always get higher addresses. */
693 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
694 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
695 int byte_offset = 0;
697 /* Paradoxical subregs need special handling on big endian machines. */
698 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
700 int difference = inner_mode_size - outer_mode_size;
702 if (WORDS_BIG_ENDIAN)
703 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
704 if (BYTES_BIG_ENDIAN)
705 byte_offset += difference % UNITS_PER_WORD;
707 else
708 byte_offset = SUBREG_BYTE (op0);
710 bitnum += byte_offset * BITS_PER_UNIT;
711 op0 = SUBREG_REG (op0);
714 /* No action is needed if the target is a register and if the field
715 lies completely outside that register. This can occur if the source
716 code contains an out-of-bounds access to a small array. */
717 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
718 return true;
720 /* Use vec_set patterns for inserting parts of vectors whenever
721 available. */
722 if (VECTOR_MODE_P (GET_MODE (op0))
723 && !MEM_P (op0)
724 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
725 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
726 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
727 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
729 struct expand_operand ops[3];
730 machine_mode outermode = GET_MODE (op0);
731 machine_mode innermode = GET_MODE_INNER (outermode);
732 enum insn_code icode = optab_handler (vec_set_optab, outermode);
733 int pos = bitnum / GET_MODE_BITSIZE (innermode);
735 create_fixed_operand (&ops[0], op0);
736 create_input_operand (&ops[1], value, innermode);
737 create_integer_operand (&ops[2], pos);
738 if (maybe_expand_insn (icode, 3, ops))
739 return true;
742 /* If the target is a register, overwriting the entire object, or storing
743 a full-word or multi-word field can be done with just a SUBREG. */
744 if (!MEM_P (op0)
745 && bitsize == GET_MODE_BITSIZE (fieldmode)
746 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
747 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
749 /* Use the subreg machinery either to narrow OP0 to the required
750 words or to cope with mode punning between equal-sized modes.
751 In the latter case, use subreg on the rhs side, not lhs. */
752 rtx sub;
754 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
756 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
757 if (sub)
759 if (reverse)
760 sub = flip_storage_order (GET_MODE (op0), sub);
761 emit_move_insn (op0, sub);
762 return true;
765 else
767 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
768 bitnum / BITS_PER_UNIT);
769 if (sub)
771 if (reverse)
772 value = flip_storage_order (fieldmode, value);
773 emit_move_insn (sub, value);
774 return true;
779 /* If the target is memory, storing any naturally aligned field can be
780 done with a simple store. For targets that support fast unaligned
781 memory, any naturally sized, unit aligned field can be done directly. */
782 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
784 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
785 if (reverse)
786 value = flip_storage_order (fieldmode, value);
787 emit_move_insn (op0, value);
788 return true;
791 /* Make sure we are playing with integral modes. Pun with subregs
792 if we aren't. This must come after the entire register case above,
793 since that case is valid for any mode. The following cases are only
794 valid for integral modes. */
796 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
797 if (imode != GET_MODE (op0))
799 if (MEM_P (op0))
800 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
801 else
803 gcc_assert (imode != BLKmode);
804 op0 = gen_lowpart (imode, op0);
809 /* Storing an lsb-aligned field in a register
810 can be done with a movstrict instruction. */
812 if (!MEM_P (op0)
813 && !reverse
814 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
815 && bitsize == GET_MODE_BITSIZE (fieldmode)
816 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
818 struct expand_operand ops[2];
819 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
820 rtx arg0 = op0;
821 unsigned HOST_WIDE_INT subreg_off;
823 if (GET_CODE (arg0) == SUBREG)
825 /* Else we've got some float mode source being extracted into
826 a different float mode destination -- this combination of
827 subregs results in Severe Tire Damage. */
828 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
829 || GET_MODE_CLASS (fieldmode) == MODE_INT
830 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
831 arg0 = SUBREG_REG (arg0);
834 subreg_off = bitnum / BITS_PER_UNIT;
835 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
837 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
839 create_fixed_operand (&ops[0], arg0);
840 /* Shrink the source operand to FIELDMODE. */
841 create_convert_operand_to (&ops[1], value, fieldmode, false);
842 if (maybe_expand_insn (icode, 2, ops))
843 return true;
847 /* Handle fields bigger than a word. */
849 if (bitsize > BITS_PER_WORD)
851 /* Here we transfer the words of the field
852 in the order least significant first.
853 This is because the most significant word is the one which may
854 be less than full.
855 However, only do that if the value is not BLKmode. */
857 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
858 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
859 unsigned int i;
860 rtx_insn *last;
862 /* This is the mode we must force value to, so that there will be enough
863 subwords to extract. Note that fieldmode will often (always?) be
864 VOIDmode, because that is what store_field uses to indicate that this
865 is a bit field, but passing VOIDmode to operand_subword_force
866 is not allowed. */
867 fieldmode = GET_MODE (value);
868 if (fieldmode == VOIDmode)
869 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
871 last = get_last_insn ();
872 for (i = 0; i < nwords; i++)
874 /* If I is 0, use the low-order word in both field and target;
875 if I is 1, use the next to lowest word; and so on. */
876 unsigned int wordnum = (backwards
877 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
878 - i - 1
879 : i);
880 unsigned int bit_offset = (backwards ^ reverse
881 ? MAX ((int) bitsize - ((int) i + 1)
882 * BITS_PER_WORD,
884 : (int) i * BITS_PER_WORD);
885 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
886 unsigned HOST_WIDE_INT new_bitsize =
887 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
889 /* If the remaining chunk doesn't have full wordsize we have
890 to make sure that for big endian machines the higher order
891 bits are used. */
892 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
893 value_word = simplify_expand_binop (word_mode, lshr_optab,
894 value_word,
895 GEN_INT (BITS_PER_WORD
896 - new_bitsize),
897 NULL_RTX, true,
898 OPTAB_LIB_WIDEN);
900 if (!store_bit_field_1 (op0, new_bitsize,
901 bitnum + bit_offset,
902 bitregion_start, bitregion_end,
903 word_mode,
904 value_word, reverse, fallback_p))
906 delete_insns_since (last);
907 return false;
910 return true;
913 /* If VALUE has a floating-point or complex mode, access it as an
914 integer of the corresponding size. This can occur on a machine
915 with 64 bit registers that uses SFmode for float. It can also
916 occur for unaligned float or complex fields. */
917 orig_value = value;
918 if (GET_MODE (value) != VOIDmode
919 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
920 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
922 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
923 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
926 /* If OP0 is a multi-word register, narrow it to the affected word.
927 If the region spans two words, defer to store_split_bit_field. */
928 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
930 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
931 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
932 gcc_assert (op0);
933 bitnum %= BITS_PER_WORD;
934 if (bitnum + bitsize > BITS_PER_WORD)
936 if (!fallback_p)
937 return false;
939 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
940 bitregion_end, value, reverse);
941 return true;
945 /* From here on we can assume that the field to be stored in fits
946 within a word. If the destination is a register, it too fits
947 in a word. */
949 extraction_insn insv;
950 if (!MEM_P (op0)
951 && !reverse
952 && get_best_reg_extraction_insn (&insv, EP_insv,
953 GET_MODE_BITSIZE (GET_MODE (op0)),
954 fieldmode)
955 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
956 return true;
958 /* If OP0 is a memory, try copying it to a register and seeing if a
959 cheap register alternative is available. */
960 if (MEM_P (op0) && !reverse)
962 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
963 fieldmode)
964 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
965 return true;
967 rtx_insn *last = get_last_insn ();
969 /* Try loading part of OP0 into a register, inserting the bitfield
970 into that, and then copying the result back to OP0. */
971 unsigned HOST_WIDE_INT bitpos;
972 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
973 bitregion_start, bitregion_end,
974 fieldmode, &bitpos);
975 if (xop0)
977 rtx tempreg = copy_to_reg (xop0);
978 if (store_bit_field_1 (tempreg, bitsize, bitpos,
979 bitregion_start, bitregion_end,
980 fieldmode, orig_value, reverse, false))
982 emit_move_insn (xop0, tempreg);
983 return true;
985 delete_insns_since (last);
989 if (!fallback_p)
990 return false;
992 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
993 bitregion_end, value, reverse);
994 return true;
997 /* Generate code to store value from rtx VALUE
998 into a bit-field within structure STR_RTX
999 containing BITSIZE bits starting at bit BITNUM.
1001 BITREGION_START is bitpos of the first bitfield in this region.
1002 BITREGION_END is the bitpos of the ending bitfield in this region.
1003 These two fields are 0, if the C++ memory model does not apply,
1004 or we are not interested in keeping track of bitfield regions.
1006 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1008 If REVERSE is true, the store is to be done in reverse order. */
1010 void
1011 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1012 unsigned HOST_WIDE_INT bitnum,
1013 unsigned HOST_WIDE_INT bitregion_start,
1014 unsigned HOST_WIDE_INT bitregion_end,
1015 machine_mode fieldmode,
1016 rtx value, bool reverse)
1018 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1019 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1020 bitregion_start, bitregion_end))
1022 /* Storing of a full word can be done with a simple store.
1023 We know here that the field can be accessed with one single
1024 instruction. For targets that support unaligned memory,
1025 an unaligned access may be necessary. */
1026 if (bitsize == GET_MODE_BITSIZE (fieldmode))
1028 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1029 bitnum / BITS_PER_UNIT);
1030 if (reverse)
1031 value = flip_storage_order (fieldmode, value);
1032 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1033 emit_move_insn (str_rtx, value);
1035 else
1037 rtx temp;
1039 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1040 &bitnum);
1041 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1042 temp = copy_to_reg (str_rtx);
1043 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1044 fieldmode, value, reverse, true))
1045 gcc_unreachable ();
1047 emit_move_insn (str_rtx, temp);
1050 return;
1053 /* Under the C++0x memory model, we must not touch bits outside the
1054 bit region. Adjust the address to start at the beginning of the
1055 bit region. */
1056 if (MEM_P (str_rtx) && bitregion_start > 0)
1058 machine_mode bestmode;
1059 HOST_WIDE_INT offset, size;
1061 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1063 offset = bitregion_start / BITS_PER_UNIT;
1064 bitnum -= bitregion_start;
1065 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1066 bitregion_end -= bitregion_start;
1067 bitregion_start = 0;
1068 bestmode = get_best_mode (bitsize, bitnum,
1069 bitregion_start, bitregion_end,
1070 MEM_ALIGN (str_rtx), VOIDmode,
1071 MEM_VOLATILE_P (str_rtx));
1072 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1075 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1076 bitregion_start, bitregion_end,
1077 fieldmode, value, reverse, true))
1078 gcc_unreachable ();
1081 /* Use shifts and boolean operations to store VALUE into a bit field of
1082 width BITSIZE in OP0, starting at bit BITNUM.
1084 If REVERSE is true, the store is to be done in reverse order. */
1086 static void
1087 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1088 unsigned HOST_WIDE_INT bitnum,
1089 unsigned HOST_WIDE_INT bitregion_start,
1090 unsigned HOST_WIDE_INT bitregion_end,
1091 rtx value, bool reverse)
1093 /* There is a case not handled here:
1094 a structure with a known alignment of just a halfword
1095 and a field split across two aligned halfwords within the structure.
1096 Or likewise a structure with a known alignment of just a byte
1097 and a field split across two bytes.
1098 Such cases are not supposed to be able to occur. */
1100 if (MEM_P (op0))
1102 machine_mode mode = GET_MODE (op0);
1103 if (GET_MODE_BITSIZE (mode) == 0
1104 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1105 mode = word_mode;
1106 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1107 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1109 if (mode == VOIDmode)
1111 /* The only way this should occur is if the field spans word
1112 boundaries. */
1113 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1114 bitregion_end, value, reverse);
1115 return;
1118 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1121 store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1124 /* Helper function for store_fixed_bit_field, stores
1125 the bit field always using the MODE of OP0. */
1127 static void
1128 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1129 unsigned HOST_WIDE_INT bitnum,
1130 rtx value, bool reverse)
1132 machine_mode mode;
1133 rtx temp;
1134 int all_zero = 0;
1135 int all_one = 0;
1137 mode = GET_MODE (op0);
1138 gcc_assert (SCALAR_INT_MODE_P (mode));
1140 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1141 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1143 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1144 /* BITNUM is the distance between our msb
1145 and that of the containing datum.
1146 Convert it to the distance from the lsb. */
1147 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1149 /* Now BITNUM is always the distance between our lsb
1150 and that of OP0. */
1152 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1153 we must first convert its mode to MODE. */
1155 if (CONST_INT_P (value))
1157 unsigned HOST_WIDE_INT v = UINTVAL (value);
1159 if (bitsize < HOST_BITS_PER_WIDE_INT)
1160 v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1162 if (v == 0)
1163 all_zero = 1;
1164 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1165 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1166 || (bitsize == HOST_BITS_PER_WIDE_INT
1167 && v == (unsigned HOST_WIDE_INT) -1))
1168 all_one = 1;
1170 value = lshift_value (mode, v, bitnum);
1172 else
1174 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1175 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1177 if (GET_MODE (value) != mode)
1178 value = convert_to_mode (mode, value, 1);
1180 if (must_and)
1181 value = expand_binop (mode, and_optab, value,
1182 mask_rtx (mode, 0, bitsize, 0),
1183 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1184 if (bitnum > 0)
1185 value = expand_shift (LSHIFT_EXPR, mode, value,
1186 bitnum, NULL_RTX, 1);
1189 if (reverse)
1190 value = flip_storage_order (mode, value);
1192 /* Now clear the chosen bits in OP0,
1193 except that if VALUE is -1 we need not bother. */
1194 /* We keep the intermediates in registers to allow CSE to combine
1195 consecutive bitfield assignments. */
1197 temp = force_reg (mode, op0);
1199 if (! all_one)
1201 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1202 if (reverse)
1203 mask = flip_storage_order (mode, mask);
1204 temp = expand_binop (mode, and_optab, temp, mask,
1205 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1206 temp = force_reg (mode, temp);
1209 /* Now logical-or VALUE into OP0, unless it is zero. */
1211 if (! all_zero)
1213 temp = expand_binop (mode, ior_optab, temp, value,
1214 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1215 temp = force_reg (mode, temp);
1218 if (op0 != temp)
1220 op0 = copy_rtx (op0);
1221 emit_move_insn (op0, temp);
1225 /* Store a bit field that is split across multiple accessible memory objects.
1227 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1228 BITSIZE is the field width; BITPOS the position of its first bit
1229 (within the word).
1230 VALUE is the value to store.
1232 If REVERSE is true, the store is to be done in reverse order.
1234 This does not yet handle fields wider than BITS_PER_WORD. */
1236 static void
1237 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1238 unsigned HOST_WIDE_INT bitpos,
1239 unsigned HOST_WIDE_INT bitregion_start,
1240 unsigned HOST_WIDE_INT bitregion_end,
1241 rtx value, bool reverse)
1243 unsigned int unit, total_bits, bitsdone = 0;
1245 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1246 much at a time. */
1247 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1248 unit = BITS_PER_WORD;
1249 else
1250 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1252 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1253 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1254 again, and we will mutually recurse forever. */
1255 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1256 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1258 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1259 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1260 that VALUE might be a floating-point constant. */
1261 if (CONSTANT_P (value) && !CONST_INT_P (value))
1263 rtx word = gen_lowpart_common (word_mode, value);
1265 if (word && (value != word))
1266 value = word;
1267 else
1268 value = gen_lowpart_common (word_mode,
1269 force_reg (GET_MODE (value) != VOIDmode
1270 ? GET_MODE (value)
1271 : word_mode, value));
1274 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1276 while (bitsdone < bitsize)
1278 unsigned HOST_WIDE_INT thissize;
1279 unsigned HOST_WIDE_INT thispos;
1280 unsigned HOST_WIDE_INT offset;
1281 rtx part, word;
1283 offset = (bitpos + bitsdone) / unit;
1284 thispos = (bitpos + bitsdone) % unit;
1286 /* When region of bytes we can touch is restricted, decrease
1287 UNIT close to the end of the region as needed. If op0 is a REG
1288 or SUBREG of REG, don't do this, as there can't be data races
1289 on a register and we can expand shorter code in some cases. */
1290 if (bitregion_end
1291 && unit > BITS_PER_UNIT
1292 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1293 && !REG_P (op0)
1294 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1296 unit = unit / 2;
1297 continue;
1300 /* THISSIZE must not overrun a word boundary. Otherwise,
1301 store_fixed_bit_field will call us again, and we will mutually
1302 recurse forever. */
1303 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1304 thissize = MIN (thissize, unit - thispos);
1306 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1308 /* Fetch successively less significant portions. */
1309 if (CONST_INT_P (value))
1310 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1311 >> (bitsize - bitsdone - thissize))
1312 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1313 /* Likewise, but the source is little-endian. */
1314 else if (reverse)
1315 part = extract_fixed_bit_field (word_mode, value, thissize,
1316 bitsize - bitsdone - thissize,
1317 NULL_RTX, 1, false);
1318 else
1320 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1321 /* The args are chosen so that the last part includes the
1322 lsb. Give extract_bit_field the value it needs (with
1323 endianness compensation) to fetch the piece we want. */
1324 part = extract_fixed_bit_field (word_mode, value, thissize,
1325 total_bits - bitsize + bitsdone,
1326 NULL_RTX, 1, false);
1329 else
1331 /* Fetch successively more significant portions. */
1332 if (CONST_INT_P (value))
1333 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1334 >> bitsdone)
1335 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1336 /* Likewise, but the source is big-endian. */
1337 else if (reverse)
1338 part = extract_fixed_bit_field (word_mode, value, thissize,
1339 total_bits - bitsdone - thissize,
1340 NULL_RTX, 1, false);
1341 else
1342 part = extract_fixed_bit_field (word_mode, value, thissize,
1343 bitsdone, NULL_RTX, 1, false);
1346 /* If OP0 is a register, then handle OFFSET here.
1348 When handling multiword bitfields, extract_bit_field may pass
1349 down a word_mode SUBREG of a larger REG for a bitfield that actually
1350 crosses a word boundary. Thus, for a SUBREG, we must find
1351 the current word starting from the base register. */
1352 if (GET_CODE (op0) == SUBREG)
1354 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1355 + (offset * unit / BITS_PER_WORD);
1356 machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1357 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1358 word = word_offset ? const0_rtx : op0;
1359 else
1360 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1361 GET_MODE (SUBREG_REG (op0)));
1362 offset &= BITS_PER_WORD / unit - 1;
1364 else if (REG_P (op0))
1366 machine_mode op0_mode = GET_MODE (op0);
1367 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1368 word = offset ? const0_rtx : op0;
1369 else
1370 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1371 GET_MODE (op0));
1372 offset &= BITS_PER_WORD / unit - 1;
1374 else
1375 word = op0;
1377 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1378 it is just an out-of-bounds access. Ignore it. */
1379 if (word != const0_rtx)
1380 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1381 bitregion_start, bitregion_end, part,
1382 reverse);
1383 bitsdone += thissize;
1387 /* A subroutine of extract_bit_field_1 that converts return value X
1388 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1389 to extract_bit_field. */
1391 static rtx
1392 convert_extracted_bit_field (rtx x, machine_mode mode,
1393 machine_mode tmode, bool unsignedp)
1395 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1396 return x;
1398 /* If the x mode is not a scalar integral, first convert to the
1399 integer mode of that size and then access it as a floating-point
1400 value via a SUBREG. */
1401 if (!SCALAR_INT_MODE_P (tmode))
1403 machine_mode smode;
1405 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1406 x = convert_to_mode (smode, x, unsignedp);
1407 x = force_reg (smode, x);
1408 return gen_lowpart (tmode, x);
1411 return convert_to_mode (tmode, x, unsignedp);
1414 /* Try to use an ext(z)v pattern to extract a field from OP0.
1415 Return the extracted value on success, otherwise return null.
1416 EXT_MODE is the mode of the extraction and the other arguments
1417 are as for extract_bit_field. */
1419 static rtx
1420 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1421 unsigned HOST_WIDE_INT bitsize,
1422 unsigned HOST_WIDE_INT bitnum,
1423 int unsignedp, rtx target,
1424 machine_mode mode, machine_mode tmode)
1426 struct expand_operand ops[4];
1427 rtx spec_target = target;
1428 rtx spec_target_subreg = 0;
1429 machine_mode ext_mode = extv->field_mode;
1430 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1432 if (bitsize == 0 || unit < bitsize)
1433 return NULL_RTX;
1435 if (MEM_P (op0))
1436 /* Get a reference to the first byte of the field. */
1437 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1438 &bitnum);
1439 else
1441 /* Convert from counting within OP0 to counting in EXT_MODE. */
1442 if (BYTES_BIG_ENDIAN)
1443 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1445 /* If op0 is a register, we need it in EXT_MODE to make it
1446 acceptable to the format of ext(z)v. */
1447 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1448 return NULL_RTX;
1449 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1450 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1453 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1454 "backwards" from the size of the unit we are extracting from.
1455 Otherwise, we count bits from the most significant on a
1456 BYTES/BITS_BIG_ENDIAN machine. */
1458 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1459 bitnum = unit - bitsize - bitnum;
1461 if (target == 0)
1462 target = spec_target = gen_reg_rtx (tmode);
1464 if (GET_MODE (target) != ext_mode)
1466 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1467 between the mode of the extraction (word_mode) and the target
1468 mode. Instead, create a temporary and use convert_move to set
1469 the target. */
1470 if (REG_P (target)
1471 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1473 target = gen_lowpart (ext_mode, target);
1474 if (GET_MODE_PRECISION (ext_mode)
1475 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1476 spec_target_subreg = target;
1478 else
1479 target = gen_reg_rtx (ext_mode);
1482 create_output_operand (&ops[0], target, ext_mode);
1483 create_fixed_operand (&ops[1], op0);
1484 create_integer_operand (&ops[2], bitsize);
1485 create_integer_operand (&ops[3], bitnum);
1486 if (maybe_expand_insn (extv->icode, 4, ops))
1488 target = ops[0].value;
1489 if (target == spec_target)
1490 return target;
1491 if (target == spec_target_subreg)
1492 return spec_target;
1493 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1495 return NULL_RTX;
1498 /* A subroutine of extract_bit_field, with the same arguments.
1499 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1500 if we can find no other means of implementing the operation.
1501 if FALLBACK_P is false, return NULL instead. */
1503 static rtx
1504 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1505 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1506 machine_mode mode, machine_mode tmode,
1507 bool reverse, bool fallback_p)
1509 rtx op0 = str_rtx;
1510 machine_mode int_mode;
1511 machine_mode mode1;
1513 if (tmode == VOIDmode)
1514 tmode = mode;
1516 while (GET_CODE (op0) == SUBREG)
1518 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1519 op0 = SUBREG_REG (op0);
1522 /* If we have an out-of-bounds access to a register, just return an
1523 uninitialized register of the required mode. This can occur if the
1524 source code contains an out-of-bounds access to a small array. */
1525 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1526 return gen_reg_rtx (tmode);
1528 if (REG_P (op0)
1529 && mode == GET_MODE (op0)
1530 && bitnum == 0
1531 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1533 if (reverse)
1534 op0 = flip_storage_order (mode, op0);
1535 /* We're trying to extract a full register from itself. */
1536 return op0;
1539 /* See if we can get a better vector mode before extracting. */
1540 if (VECTOR_MODE_P (GET_MODE (op0))
1541 && !MEM_P (op0)
1542 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1544 machine_mode new_mode;
1546 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1547 new_mode = MIN_MODE_VECTOR_FLOAT;
1548 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1549 new_mode = MIN_MODE_VECTOR_FRACT;
1550 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1551 new_mode = MIN_MODE_VECTOR_UFRACT;
1552 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1553 new_mode = MIN_MODE_VECTOR_ACCUM;
1554 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1555 new_mode = MIN_MODE_VECTOR_UACCUM;
1556 else
1557 new_mode = MIN_MODE_VECTOR_INT;
1559 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1560 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1561 && targetm.vector_mode_supported_p (new_mode))
1562 break;
1563 if (new_mode != VOIDmode)
1564 op0 = gen_lowpart (new_mode, op0);
1567 /* Use vec_extract patterns for extracting parts of vectors whenever
1568 available. */
1569 if (VECTOR_MODE_P (GET_MODE (op0))
1570 && !MEM_P (op0)
1571 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1572 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1573 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1575 struct expand_operand ops[3];
1576 machine_mode outermode = GET_MODE (op0);
1577 machine_mode innermode = GET_MODE_INNER (outermode);
1578 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1579 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1581 create_output_operand (&ops[0], target, innermode);
1582 create_input_operand (&ops[1], op0, outermode);
1583 create_integer_operand (&ops[2], pos);
1584 if (maybe_expand_insn (icode, 3, ops))
1586 target = ops[0].value;
1587 if (GET_MODE (target) != mode)
1588 return gen_lowpart (tmode, target);
1589 return target;
1593 /* Make sure we are playing with integral modes. Pun with subregs
1594 if we aren't. */
1596 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1597 if (imode != GET_MODE (op0))
1599 if (MEM_P (op0))
1600 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1601 else if (imode != BLKmode)
1603 op0 = gen_lowpart (imode, op0);
1605 /* If we got a SUBREG, force it into a register since we
1606 aren't going to be able to do another SUBREG on it. */
1607 if (GET_CODE (op0) == SUBREG)
1608 op0 = force_reg (imode, op0);
1610 else if (REG_P (op0))
1612 rtx reg, subreg;
1613 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1614 MODE_INT);
1615 reg = gen_reg_rtx (imode);
1616 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1617 emit_move_insn (subreg, op0);
1618 op0 = reg;
1619 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1621 else
1623 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1624 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1625 emit_move_insn (mem, op0);
1626 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1631 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1632 If that's wrong, the solution is to test for it and set TARGET to 0
1633 if needed. */
1635 /* Get the mode of the field to use for atomic access or subreg
1636 conversion. */
1637 mode1 = mode;
1638 if (SCALAR_INT_MODE_P (tmode))
1640 machine_mode try_mode = mode_for_size (bitsize,
1641 GET_MODE_CLASS (tmode), 0);
1642 if (try_mode != BLKmode)
1643 mode1 = try_mode;
1645 gcc_assert (mode1 != BLKmode);
1647 /* Extraction of a full MODE1 value can be done with a subreg as long
1648 as the least significant bit of the value is the least significant
1649 bit of either OP0 or a word of OP0. */
1650 if (!MEM_P (op0)
1651 && !reverse
1652 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1653 && bitsize == GET_MODE_BITSIZE (mode1)
1654 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1656 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1657 bitnum / BITS_PER_UNIT);
1658 if (sub)
1659 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1662 /* Extraction of a full MODE1 value can be done with a load as long as
1663 the field is on a byte boundary and is sufficiently aligned. */
1664 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1666 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1667 if (reverse)
1668 op0 = flip_storage_order (mode1, op0);
1669 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1672 /* Handle fields bigger than a word. */
1674 if (bitsize > BITS_PER_WORD)
1676 /* Here we transfer the words of the field
1677 in the order least significant first.
1678 This is because the most significant word is the one which may
1679 be less than full. */
1681 const bool backwards = WORDS_BIG_ENDIAN;
1682 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1683 unsigned int i;
1684 rtx_insn *last;
1686 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1687 target = gen_reg_rtx (mode);
1689 /* Indicate for flow that the entire target reg is being set. */
1690 emit_clobber (target);
1692 last = get_last_insn ();
1693 for (i = 0; i < nwords; i++)
1695 /* If I is 0, use the low-order word in both field and target;
1696 if I is 1, use the next to lowest word; and so on. */
1697 /* Word number in TARGET to use. */
1698 unsigned int wordnum
1699 = (backwards
1700 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1701 : i);
1702 /* Offset from start of field in OP0. */
1703 unsigned int bit_offset = (backwards ^ reverse
1704 ? MAX ((int) bitsize - ((int) i + 1)
1705 * BITS_PER_WORD,
1707 : (int) i * BITS_PER_WORD);
1708 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1709 rtx result_part
1710 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1711 bitsize - i * BITS_PER_WORD),
1712 bitnum + bit_offset, 1, target_part,
1713 mode, word_mode, reverse, fallback_p);
1715 gcc_assert (target_part);
1716 if (!result_part)
1718 delete_insns_since (last);
1719 return NULL;
1722 if (result_part != target_part)
1723 emit_move_insn (target_part, result_part);
1726 if (unsignedp)
1728 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1729 need to be zero'd out. */
1730 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1732 unsigned int i, total_words;
1734 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1735 for (i = nwords; i < total_words; i++)
1736 emit_move_insn
1737 (operand_subword (target,
1738 backwards ? total_words - i - 1 : i,
1739 1, VOIDmode),
1740 const0_rtx);
1742 return target;
1745 /* Signed bit field: sign-extend with two arithmetic shifts. */
1746 target = expand_shift (LSHIFT_EXPR, mode, target,
1747 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1748 return expand_shift (RSHIFT_EXPR, mode, target,
1749 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1752 /* If OP0 is a multi-word register, narrow it to the affected word.
1753 If the region spans two words, defer to extract_split_bit_field. */
1754 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1756 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1757 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1758 bitnum %= BITS_PER_WORD;
1759 if (bitnum + bitsize > BITS_PER_WORD)
1761 if (!fallback_p)
1762 return NULL_RTX;
1763 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1764 reverse);
1765 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1769 /* From here on we know the desired field is smaller than a word.
1770 If OP0 is a register, it too fits within a word. */
1771 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1772 extraction_insn extv;
1773 if (!MEM_P (op0)
1774 && !reverse
1775 /* ??? We could limit the structure size to the part of OP0 that
1776 contains the field, with appropriate checks for endianness
1777 and TRULY_NOOP_TRUNCATION. */
1778 && get_best_reg_extraction_insn (&extv, pattern,
1779 GET_MODE_BITSIZE (GET_MODE (op0)),
1780 tmode))
1782 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1783 unsignedp, target, mode,
1784 tmode);
1785 if (result)
1786 return result;
1789 /* If OP0 is a memory, try copying it to a register and seeing if a
1790 cheap register alternative is available. */
1791 if (MEM_P (op0) & !reverse)
1793 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1794 tmode))
1796 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1797 bitnum, unsignedp,
1798 target, mode,
1799 tmode);
1800 if (result)
1801 return result;
1804 rtx_insn *last = get_last_insn ();
1806 /* Try loading part of OP0 into a register and extracting the
1807 bitfield from that. */
1808 unsigned HOST_WIDE_INT bitpos;
1809 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1810 0, 0, tmode, &bitpos);
1811 if (xop0)
1813 xop0 = copy_to_reg (xop0);
1814 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1815 unsignedp, target,
1816 mode, tmode, reverse, false);
1817 if (result)
1818 return result;
1819 delete_insns_since (last);
1823 if (!fallback_p)
1824 return NULL;
1826 /* Find a correspondingly-sized integer field, so we can apply
1827 shifts and masks to it. */
1828 int_mode = int_mode_for_mode (tmode);
1829 if (int_mode == BLKmode)
1830 int_mode = int_mode_for_mode (mode);
1831 /* Should probably push op0 out to memory and then do a load. */
1832 gcc_assert (int_mode != BLKmode);
1834 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1835 target, unsignedp, reverse);
1836 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1839 /* Generate code to extract a byte-field from STR_RTX
1840 containing BITSIZE bits, starting at BITNUM,
1841 and put it in TARGET if possible (if TARGET is nonzero).
1842 Regardless of TARGET, we return the rtx for where the value is placed.
1844 STR_RTX is the structure containing the byte (a REG or MEM).
1845 UNSIGNEDP is nonzero if this is an unsigned bit field.
1846 MODE is the natural mode of the field value once extracted.
1847 TMODE is the mode the caller would like the value to have;
1848 but the value may be returned with type MODE instead.
1850 If REVERSE is true, the extraction is to be done in reverse order.
1852 If a TARGET is specified and we can store in it at no extra cost,
1853 we do so, and return TARGET.
1854 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1855 if they are equally easy. */
1858 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1859 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1860 machine_mode mode, machine_mode tmode, bool reverse)
1862 machine_mode mode1;
1864 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1865 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1866 mode1 = GET_MODE (str_rtx);
1867 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1868 mode1 = GET_MODE (target);
1869 else
1870 mode1 = tmode;
1872 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1874 /* Extraction of a full MODE1 value can be done with a simple load.
1875 We know here that the field can be accessed with one single
1876 instruction. For targets that support unaligned memory,
1877 an unaligned access may be necessary. */
1878 if (bitsize == GET_MODE_BITSIZE (mode1))
1880 rtx result = adjust_bitfield_address (str_rtx, mode1,
1881 bitnum / BITS_PER_UNIT);
1882 if (reverse)
1883 result = flip_storage_order (mode1, result);
1884 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1885 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1888 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1889 &bitnum);
1890 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1891 str_rtx = copy_to_reg (str_rtx);
1894 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1895 target, mode, tmode, reverse, true);
1898 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1899 from bit BITNUM of OP0.
1901 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1902 If REVERSE is true, the extraction is to be done in reverse order.
1904 If TARGET is nonzero, attempts to store the value there
1905 and return TARGET, but this is not guaranteed.
1906 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1908 static rtx
1909 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1910 unsigned HOST_WIDE_INT bitsize,
1911 unsigned HOST_WIDE_INT bitnum, rtx target,
1912 int unsignedp, bool reverse)
1914 if (MEM_P (op0))
1916 machine_mode mode
1917 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1918 MEM_VOLATILE_P (op0));
1920 if (mode == VOIDmode)
1921 /* The only way this should occur is if the field spans word
1922 boundaries. */
1923 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1924 reverse);
1926 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1929 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1930 target, unsignedp, reverse);
1933 /* Helper function for extract_fixed_bit_field, extracts
1934 the bit field always using the MODE of OP0. */
1936 static rtx
1937 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1938 unsigned HOST_WIDE_INT bitsize,
1939 unsigned HOST_WIDE_INT bitnum, rtx target,
1940 int unsignedp, bool reverse)
1942 machine_mode mode = GET_MODE (op0);
1943 gcc_assert (SCALAR_INT_MODE_P (mode));
1945 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1946 for invalid input, such as extract equivalent of f5 from
1947 gcc.dg/pr48335-2.c. */
1949 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1950 /* BITNUM is the distance between our msb and that of OP0.
1951 Convert it to the distance from the lsb. */
1952 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1954 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1955 We have reduced the big-endian case to the little-endian case. */
1956 if (reverse)
1957 op0 = flip_storage_order (mode, op0);
1959 if (unsignedp)
1961 if (bitnum)
1963 /* If the field does not already start at the lsb,
1964 shift it so it does. */
1965 /* Maybe propagate the target for the shift. */
1966 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1967 if (tmode != mode)
1968 subtarget = 0;
1969 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1971 /* Convert the value to the desired mode. */
1972 if (mode != tmode)
1973 op0 = convert_to_mode (tmode, op0, 1);
1975 /* Unless the msb of the field used to be the msb when we shifted,
1976 mask out the upper bits. */
1978 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1979 return expand_binop (GET_MODE (op0), and_optab, op0,
1980 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1981 target, 1, OPTAB_LIB_WIDEN);
1982 return op0;
1985 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1986 then arithmetic-shift its lsb to the lsb of the word. */
1987 op0 = force_reg (mode, op0);
1989 /* Find the narrowest integer mode that contains the field. */
1991 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1992 mode = GET_MODE_WIDER_MODE (mode))
1993 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1995 op0 = convert_to_mode (mode, op0, 0);
1996 break;
1999 if (mode != tmode)
2000 target = 0;
2002 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2004 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2005 /* Maybe propagate the target for the shift. */
2006 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2007 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2010 return expand_shift (RSHIFT_EXPR, mode, op0,
2011 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2014 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2015 VALUE << BITPOS. */
2017 static rtx
2018 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2019 int bitpos)
2021 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2024 /* Extract a bit field that is split across two words
2025 and return an RTX for the result.
2027 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2028 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2029 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2031 If REVERSE is true, the extraction is to be done in reverse order. */
2033 static rtx
2034 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2035 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2036 bool reverse)
2038 unsigned int unit;
2039 unsigned int bitsdone = 0;
2040 rtx result = NULL_RTX;
2041 int first = 1;
2043 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2044 much at a time. */
2045 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2046 unit = BITS_PER_WORD;
2047 else
2048 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2050 while (bitsdone < bitsize)
2052 unsigned HOST_WIDE_INT thissize;
2053 rtx part, word;
2054 unsigned HOST_WIDE_INT thispos;
2055 unsigned HOST_WIDE_INT offset;
2057 offset = (bitpos + bitsdone) / unit;
2058 thispos = (bitpos + bitsdone) % unit;
2060 /* THISSIZE must not overrun a word boundary. Otherwise,
2061 extract_fixed_bit_field will call us again, and we will mutually
2062 recurse forever. */
2063 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2064 thissize = MIN (thissize, unit - thispos);
2066 /* If OP0 is a register, then handle OFFSET here.
2068 When handling multiword bitfields, extract_bit_field may pass
2069 down a word_mode SUBREG of a larger REG for a bitfield that actually
2070 crosses a word boundary. Thus, for a SUBREG, we must find
2071 the current word starting from the base register. */
2072 if (GET_CODE (op0) == SUBREG)
2074 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2075 word = operand_subword_force (SUBREG_REG (op0), word_offset,
2076 GET_MODE (SUBREG_REG (op0)));
2077 offset = 0;
2079 else if (REG_P (op0))
2081 word = operand_subword_force (op0, offset, GET_MODE (op0));
2082 offset = 0;
2084 else
2085 word = op0;
2087 /* Extract the parts in bit-counting order,
2088 whose meaning is determined by BYTES_PER_UNIT.
2089 OFFSET is in UNITs, and UNIT is in bits. */
2090 part = extract_fixed_bit_field (word_mode, word, thissize,
2091 offset * unit + thispos, 0, 1, reverse);
2092 bitsdone += thissize;
2094 /* Shift this part into place for the result. */
2095 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2097 if (bitsize != bitsdone)
2098 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2099 bitsize - bitsdone, 0, 1);
2101 else
2103 if (bitsdone != thissize)
2104 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2105 bitsdone - thissize, 0, 1);
2108 if (first)
2109 result = part;
2110 else
2111 /* Combine the parts with bitwise or. This works
2112 because we extracted each part as an unsigned bit field. */
2113 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2114 OPTAB_LIB_WIDEN);
2116 first = 0;
2119 /* Unsigned bit field: we are done. */
2120 if (unsignedp)
2121 return result;
2122 /* Signed bit field: sign-extend with two arithmetic shifts. */
2123 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2124 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2125 return expand_shift (RSHIFT_EXPR, word_mode, result,
2126 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2129 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2130 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2131 MODE, fill the upper bits with zeros. Fail if the layout of either
2132 mode is unknown (as for CC modes) or if the extraction would involve
2133 unprofitable mode punning. Return the value on success, otherwise
2134 return null.
2136 This is different from gen_lowpart* in these respects:
2138 - the returned value must always be considered an rvalue
2140 - when MODE is wider than SRC_MODE, the extraction involves
2141 a zero extension
2143 - when MODE is smaller than SRC_MODE, the extraction involves
2144 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2146 In other words, this routine performs a computation, whereas the
2147 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2148 operations. */
2151 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2153 machine_mode int_mode, src_int_mode;
2155 if (mode == src_mode)
2156 return src;
2158 if (CONSTANT_P (src))
2160 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2161 fails, it will happily create (subreg (symbol_ref)) or similar
2162 invalid SUBREGs. */
2163 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2164 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2165 if (ret)
2166 return ret;
2168 if (GET_MODE (src) == VOIDmode
2169 || !validate_subreg (mode, src_mode, src, byte))
2170 return NULL_RTX;
2172 src = force_reg (GET_MODE (src), src);
2173 return gen_rtx_SUBREG (mode, src, byte);
2176 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2177 return NULL_RTX;
2179 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2180 && MODES_TIEABLE_P (mode, src_mode))
2182 rtx x = gen_lowpart_common (mode, src);
2183 if (x)
2184 return x;
2187 src_int_mode = int_mode_for_mode (src_mode);
2188 int_mode = int_mode_for_mode (mode);
2189 if (src_int_mode == BLKmode || int_mode == BLKmode)
2190 return NULL_RTX;
2192 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2193 return NULL_RTX;
2194 if (!MODES_TIEABLE_P (int_mode, mode))
2195 return NULL_RTX;
2197 src = gen_lowpart (src_int_mode, src);
2198 src = convert_modes (int_mode, src_int_mode, src, true);
2199 src = gen_lowpart (mode, src);
2200 return src;
2203 /* Add INC into TARGET. */
2205 void
2206 expand_inc (rtx target, rtx inc)
2208 rtx value = expand_binop (GET_MODE (target), add_optab,
2209 target, inc,
2210 target, 0, OPTAB_LIB_WIDEN);
2211 if (value != target)
2212 emit_move_insn (target, value);
2215 /* Subtract DEC from TARGET. */
2217 void
2218 expand_dec (rtx target, rtx dec)
2220 rtx value = expand_binop (GET_MODE (target), sub_optab,
2221 target, dec,
2222 target, 0, OPTAB_LIB_WIDEN);
2223 if (value != target)
2224 emit_move_insn (target, value);
2227 /* Output a shift instruction for expression code CODE,
2228 with SHIFTED being the rtx for the value to shift,
2229 and AMOUNT the rtx for the amount to shift by.
2230 Store the result in the rtx TARGET, if that is convenient.
2231 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2232 Return the rtx for where the value is. */
2234 static rtx
2235 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2236 rtx amount, rtx target, int unsignedp)
2238 rtx op1, temp = 0;
2239 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2240 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2241 optab lshift_optab = ashl_optab;
2242 optab rshift_arith_optab = ashr_optab;
2243 optab rshift_uns_optab = lshr_optab;
2244 optab lrotate_optab = rotl_optab;
2245 optab rrotate_optab = rotr_optab;
2246 machine_mode op1_mode;
2247 machine_mode scalar_mode = mode;
2248 int attempt;
2249 bool speed = optimize_insn_for_speed_p ();
2251 if (VECTOR_MODE_P (mode))
2252 scalar_mode = GET_MODE_INNER (mode);
2253 op1 = amount;
2254 op1_mode = GET_MODE (op1);
2256 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2257 shift amount is a vector, use the vector/vector shift patterns. */
2258 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2260 lshift_optab = vashl_optab;
2261 rshift_arith_optab = vashr_optab;
2262 rshift_uns_optab = vlshr_optab;
2263 lrotate_optab = vrotl_optab;
2264 rrotate_optab = vrotr_optab;
2267 /* Previously detected shift-counts computed by NEGATE_EXPR
2268 and shifted in the other direction; but that does not work
2269 on all machines. */
2271 if (SHIFT_COUNT_TRUNCATED)
2273 if (CONST_INT_P (op1)
2274 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2275 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2276 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2277 % GET_MODE_BITSIZE (scalar_mode));
2278 else if (GET_CODE (op1) == SUBREG
2279 && subreg_lowpart_p (op1)
2280 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2281 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2282 op1 = SUBREG_REG (op1);
2285 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2286 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2287 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2288 amount instead. */
2289 if (rotate
2290 && CONST_INT_P (op1)
2291 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2292 GET_MODE_BITSIZE (scalar_mode) - 1))
2294 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2295 left = !left;
2296 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2299 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2300 Note that this is not the case for bigger values. For instance a rotation
2301 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2302 0x04030201 (bswapsi). */
2303 if (rotate
2304 && CONST_INT_P (op1)
2305 && INTVAL (op1) == BITS_PER_UNIT
2306 && GET_MODE_SIZE (scalar_mode) == 2
2307 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2308 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2309 unsignedp);
2311 if (op1 == const0_rtx)
2312 return shifted;
2314 /* Check whether its cheaper to implement a left shift by a constant
2315 bit count by a sequence of additions. */
2316 if (code == LSHIFT_EXPR
2317 && CONST_INT_P (op1)
2318 && INTVAL (op1) > 0
2319 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2320 && INTVAL (op1) < MAX_BITS_PER_WORD
2321 && (shift_cost (speed, mode, INTVAL (op1))
2322 > INTVAL (op1) * add_cost (speed, mode))
2323 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2325 int i;
2326 for (i = 0; i < INTVAL (op1); i++)
2328 temp = force_reg (mode, shifted);
2329 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2330 unsignedp, OPTAB_LIB_WIDEN);
2332 return shifted;
2335 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2337 enum optab_methods methods;
2339 if (attempt == 0)
2340 methods = OPTAB_DIRECT;
2341 else if (attempt == 1)
2342 methods = OPTAB_WIDEN;
2343 else
2344 methods = OPTAB_LIB_WIDEN;
2346 if (rotate)
2348 /* Widening does not work for rotation. */
2349 if (methods == OPTAB_WIDEN)
2350 continue;
2351 else if (methods == OPTAB_LIB_WIDEN)
2353 /* If we have been unable to open-code this by a rotation,
2354 do it as the IOR of two shifts. I.e., to rotate A
2355 by N bits, compute
2356 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2357 where C is the bitsize of A.
2359 It is theoretically possible that the target machine might
2360 not be able to perform either shift and hence we would
2361 be making two libcalls rather than just the one for the
2362 shift (similarly if IOR could not be done). We will allow
2363 this extremely unlikely lossage to avoid complicating the
2364 code below. */
2366 rtx subtarget = target == shifted ? 0 : target;
2367 rtx new_amount, other_amount;
2368 rtx temp1;
2370 new_amount = op1;
2371 if (op1 == const0_rtx)
2372 return shifted;
2373 else if (CONST_INT_P (op1))
2374 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2375 - INTVAL (op1));
2376 else
2378 other_amount
2379 = simplify_gen_unary (NEG, GET_MODE (op1),
2380 op1, GET_MODE (op1));
2381 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2382 other_amount
2383 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2384 gen_int_mode (mask, GET_MODE (op1)));
2387 shifted = force_reg (mode, shifted);
2389 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2390 mode, shifted, new_amount, 0, 1);
2391 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2392 mode, shifted, other_amount,
2393 subtarget, 1);
2394 return expand_binop (mode, ior_optab, temp, temp1, target,
2395 unsignedp, methods);
2398 temp = expand_binop (mode,
2399 left ? lrotate_optab : rrotate_optab,
2400 shifted, op1, target, unsignedp, methods);
2402 else if (unsignedp)
2403 temp = expand_binop (mode,
2404 left ? lshift_optab : rshift_uns_optab,
2405 shifted, op1, target, unsignedp, methods);
2407 /* Do arithmetic shifts.
2408 Also, if we are going to widen the operand, we can just as well
2409 use an arithmetic right-shift instead of a logical one. */
2410 if (temp == 0 && ! rotate
2411 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2413 enum optab_methods methods1 = methods;
2415 /* If trying to widen a log shift to an arithmetic shift,
2416 don't accept an arithmetic shift of the same size. */
2417 if (unsignedp)
2418 methods1 = OPTAB_MUST_WIDEN;
2420 /* Arithmetic shift */
2422 temp = expand_binop (mode,
2423 left ? lshift_optab : rshift_arith_optab,
2424 shifted, op1, target, unsignedp, methods1);
2427 /* We used to try extzv here for logical right shifts, but that was
2428 only useful for one machine, the VAX, and caused poor code
2429 generation there for lshrdi3, so the code was deleted and a
2430 define_expand for lshrsi3 was added to vax.md. */
2433 gcc_assert (temp);
2434 return temp;
2437 /* Output a shift instruction for expression code CODE,
2438 with SHIFTED being the rtx for the value to shift,
2439 and AMOUNT the amount to shift by.
2440 Store the result in the rtx TARGET, if that is convenient.
2441 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2442 Return the rtx for where the value is. */
2445 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2446 int amount, rtx target, int unsignedp)
2448 return expand_shift_1 (code, mode,
2449 shifted, GEN_INT (amount), target, unsignedp);
2452 /* Output a shift instruction for expression code CODE,
2453 with SHIFTED being the rtx for the value to shift,
2454 and AMOUNT the tree for the amount to shift by.
2455 Store the result in the rtx TARGET, if that is convenient.
2456 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2457 Return the rtx for where the value is. */
2460 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2461 tree amount, rtx target, int unsignedp)
2463 return expand_shift_1 (code, mode,
2464 shifted, expand_normal (amount), target, unsignedp);
2468 /* Indicates the type of fixup needed after a constant multiplication.
2469 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2470 the result should be negated, and ADD_VARIANT means that the
2471 multiplicand should be added to the result. */
2472 enum mult_variant {basic_variant, negate_variant, add_variant};
2474 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2475 const struct mult_cost *, machine_mode mode);
2476 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2477 struct algorithm *, enum mult_variant *, int);
2478 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2479 const struct algorithm *, enum mult_variant);
2480 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2481 static rtx extract_high_half (machine_mode, rtx);
2482 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2483 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2484 int, int);
2485 /* Compute and return the best algorithm for multiplying by T.
2486 The algorithm must cost less than cost_limit
2487 If retval.cost >= COST_LIMIT, no algorithm was found and all
2488 other field of the returned struct are undefined.
2489 MODE is the machine mode of the multiplication. */
2491 static void
2492 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2493 const struct mult_cost *cost_limit, machine_mode mode)
2495 int m;
2496 struct algorithm *alg_in, *best_alg;
2497 struct mult_cost best_cost;
2498 struct mult_cost new_limit;
2499 int op_cost, op_latency;
2500 unsigned HOST_WIDE_INT orig_t = t;
2501 unsigned HOST_WIDE_INT q;
2502 int maxm, hash_index;
2503 bool cache_hit = false;
2504 enum alg_code cache_alg = alg_zero;
2505 bool speed = optimize_insn_for_speed_p ();
2506 machine_mode imode;
2507 struct alg_hash_entry *entry_ptr;
2509 /* Indicate that no algorithm is yet found. If no algorithm
2510 is found, this value will be returned and indicate failure. */
2511 alg_out->cost.cost = cost_limit->cost + 1;
2512 alg_out->cost.latency = cost_limit->latency + 1;
2514 if (cost_limit->cost < 0
2515 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2516 return;
2518 /* Be prepared for vector modes. */
2519 imode = GET_MODE_INNER (mode);
2520 if (imode == VOIDmode)
2521 imode = mode;
2523 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2525 /* Restrict the bits of "t" to the multiplication's mode. */
2526 t &= GET_MODE_MASK (imode);
2528 /* t == 1 can be done in zero cost. */
2529 if (t == 1)
2531 alg_out->ops = 1;
2532 alg_out->cost.cost = 0;
2533 alg_out->cost.latency = 0;
2534 alg_out->op[0] = alg_m;
2535 return;
2538 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2539 fail now. */
2540 if (t == 0)
2542 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2543 return;
2544 else
2546 alg_out->ops = 1;
2547 alg_out->cost.cost = zero_cost (speed);
2548 alg_out->cost.latency = zero_cost (speed);
2549 alg_out->op[0] = alg_zero;
2550 return;
2554 /* We'll be needing a couple extra algorithm structures now. */
2556 alg_in = XALLOCA (struct algorithm);
2557 best_alg = XALLOCA (struct algorithm);
2558 best_cost = *cost_limit;
2560 /* Compute the hash index. */
2561 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2563 /* See if we already know what to do for T. */
2564 entry_ptr = alg_hash_entry_ptr (hash_index);
2565 if (entry_ptr->t == t
2566 && entry_ptr->mode == mode
2567 && entry_ptr->mode == mode
2568 && entry_ptr->speed == speed
2569 && entry_ptr->alg != alg_unknown)
2571 cache_alg = entry_ptr->alg;
2573 if (cache_alg == alg_impossible)
2575 /* The cache tells us that it's impossible to synthesize
2576 multiplication by T within entry_ptr->cost. */
2577 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2578 /* COST_LIMIT is at least as restrictive as the one
2579 recorded in the hash table, in which case we have no
2580 hope of synthesizing a multiplication. Just
2581 return. */
2582 return;
2584 /* If we get here, COST_LIMIT is less restrictive than the
2585 one recorded in the hash table, so we may be able to
2586 synthesize a multiplication. Proceed as if we didn't
2587 have the cache entry. */
2589 else
2591 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2592 /* The cached algorithm shows that this multiplication
2593 requires more cost than COST_LIMIT. Just return. This
2594 way, we don't clobber this cache entry with
2595 alg_impossible but retain useful information. */
2596 return;
2598 cache_hit = true;
2600 switch (cache_alg)
2602 case alg_shift:
2603 goto do_alg_shift;
2605 case alg_add_t_m2:
2606 case alg_sub_t_m2:
2607 goto do_alg_addsub_t_m2;
2609 case alg_add_factor:
2610 case alg_sub_factor:
2611 goto do_alg_addsub_factor;
2613 case alg_add_t2_m:
2614 goto do_alg_add_t2_m;
2616 case alg_sub_t2_m:
2617 goto do_alg_sub_t2_m;
2619 default:
2620 gcc_unreachable ();
2625 /* If we have a group of zero bits at the low-order part of T, try
2626 multiplying by the remaining bits and then doing a shift. */
2628 if ((t & 1) == 0)
2630 do_alg_shift:
2631 m = floor_log2 (t & -t); /* m = number of low zero bits */
2632 if (m < maxm)
2634 q = t >> m;
2635 /* The function expand_shift will choose between a shift and
2636 a sequence of additions, so the observed cost is given as
2637 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2638 op_cost = m * add_cost (speed, mode);
2639 if (shift_cost (speed, mode, m) < op_cost)
2640 op_cost = shift_cost (speed, mode, m);
2641 new_limit.cost = best_cost.cost - op_cost;
2642 new_limit.latency = best_cost.latency - op_cost;
2643 synth_mult (alg_in, q, &new_limit, mode);
2645 alg_in->cost.cost += op_cost;
2646 alg_in->cost.latency += op_cost;
2647 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2649 best_cost = alg_in->cost;
2650 std::swap (alg_in, best_alg);
2651 best_alg->log[best_alg->ops] = m;
2652 best_alg->op[best_alg->ops] = alg_shift;
2655 /* See if treating ORIG_T as a signed number yields a better
2656 sequence. Try this sequence only for a negative ORIG_T
2657 as it would be useless for a non-negative ORIG_T. */
2658 if ((HOST_WIDE_INT) orig_t < 0)
2660 /* Shift ORIG_T as follows because a right shift of a
2661 negative-valued signed type is implementation
2662 defined. */
2663 q = ~(~orig_t >> m);
2664 /* The function expand_shift will choose between a shift
2665 and a sequence of additions, so the observed cost is
2666 given as MIN (m * add_cost(speed, mode),
2667 shift_cost(speed, mode, m)). */
2668 op_cost = m * add_cost (speed, mode);
2669 if (shift_cost (speed, mode, m) < op_cost)
2670 op_cost = shift_cost (speed, mode, m);
2671 new_limit.cost = best_cost.cost - op_cost;
2672 new_limit.latency = best_cost.latency - op_cost;
2673 synth_mult (alg_in, q, &new_limit, mode);
2675 alg_in->cost.cost += op_cost;
2676 alg_in->cost.latency += op_cost;
2677 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2679 best_cost = alg_in->cost;
2680 std::swap (alg_in, best_alg);
2681 best_alg->log[best_alg->ops] = m;
2682 best_alg->op[best_alg->ops] = alg_shift;
2686 if (cache_hit)
2687 goto done;
2690 /* If we have an odd number, add or subtract one. */
2691 if ((t & 1) != 0)
2693 unsigned HOST_WIDE_INT w;
2695 do_alg_addsub_t_m2:
2696 for (w = 1; (w & t) != 0; w <<= 1)
2698 /* If T was -1, then W will be zero after the loop. This is another
2699 case where T ends with ...111. Handling this with (T + 1) and
2700 subtract 1 produces slightly better code and results in algorithm
2701 selection much faster than treating it like the ...0111 case
2702 below. */
2703 if (w == 0
2704 || (w > 2
2705 /* Reject the case where t is 3.
2706 Thus we prefer addition in that case. */
2707 && t != 3))
2709 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2711 op_cost = add_cost (speed, mode);
2712 new_limit.cost = best_cost.cost - op_cost;
2713 new_limit.latency = best_cost.latency - op_cost;
2714 synth_mult (alg_in, t + 1, &new_limit, mode);
2716 alg_in->cost.cost += op_cost;
2717 alg_in->cost.latency += op_cost;
2718 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2720 best_cost = alg_in->cost;
2721 std::swap (alg_in, best_alg);
2722 best_alg->log[best_alg->ops] = 0;
2723 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2726 else
2728 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2730 op_cost = add_cost (speed, mode);
2731 new_limit.cost = best_cost.cost - op_cost;
2732 new_limit.latency = best_cost.latency - op_cost;
2733 synth_mult (alg_in, t - 1, &new_limit, mode);
2735 alg_in->cost.cost += op_cost;
2736 alg_in->cost.latency += op_cost;
2737 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2739 best_cost = alg_in->cost;
2740 std::swap (alg_in, best_alg);
2741 best_alg->log[best_alg->ops] = 0;
2742 best_alg->op[best_alg->ops] = alg_add_t_m2;
2746 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2747 quickly with a - a * n for some appropriate constant n. */
2748 m = exact_log2 (-orig_t + 1);
2749 if (m >= 0 && m < maxm)
2751 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2752 /* If the target has a cheap shift-and-subtract insn use
2753 that in preference to a shift insn followed by a sub insn.
2754 Assume that the shift-and-sub is "atomic" with a latency
2755 equal to it's cost, otherwise assume that on superscalar
2756 hardware the shift may be executed concurrently with the
2757 earlier steps in the algorithm. */
2758 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2760 op_cost = shiftsub1_cost (speed, mode, m);
2761 op_latency = op_cost;
2763 else
2764 op_latency = add_cost (speed, mode);
2766 new_limit.cost = best_cost.cost - op_cost;
2767 new_limit.latency = best_cost.latency - op_latency;
2768 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2769 &new_limit, mode);
2771 alg_in->cost.cost += op_cost;
2772 alg_in->cost.latency += op_latency;
2773 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2775 best_cost = alg_in->cost;
2776 std::swap (alg_in, best_alg);
2777 best_alg->log[best_alg->ops] = m;
2778 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2782 if (cache_hit)
2783 goto done;
2786 /* Look for factors of t of the form
2787 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2788 If we find such a factor, we can multiply by t using an algorithm that
2789 multiplies by q, shift the result by m and add/subtract it to itself.
2791 We search for large factors first and loop down, even if large factors
2792 are less probable than small; if we find a large factor we will find a
2793 good sequence quickly, and therefore be able to prune (by decreasing
2794 COST_LIMIT) the search. */
2796 do_alg_addsub_factor:
2797 for (m = floor_log2 (t - 1); m >= 2; m--)
2799 unsigned HOST_WIDE_INT d;
2801 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2802 if (t % d == 0 && t > d && m < maxm
2803 && (!cache_hit || cache_alg == alg_add_factor))
2805 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2806 if (shiftadd_cost (speed, mode, m) <= op_cost)
2807 op_cost = shiftadd_cost (speed, mode, m);
2809 op_latency = op_cost;
2812 new_limit.cost = best_cost.cost - op_cost;
2813 new_limit.latency = best_cost.latency - op_latency;
2814 synth_mult (alg_in, t / d, &new_limit, mode);
2816 alg_in->cost.cost += op_cost;
2817 alg_in->cost.latency += op_latency;
2818 if (alg_in->cost.latency < op_cost)
2819 alg_in->cost.latency = op_cost;
2820 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2822 best_cost = alg_in->cost;
2823 std::swap (alg_in, best_alg);
2824 best_alg->log[best_alg->ops] = m;
2825 best_alg->op[best_alg->ops] = alg_add_factor;
2827 /* Other factors will have been taken care of in the recursion. */
2828 break;
2831 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2832 if (t % d == 0 && t > d && m < maxm
2833 && (!cache_hit || cache_alg == alg_sub_factor))
2835 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2836 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2837 op_cost = shiftsub0_cost (speed, mode, m);
2839 op_latency = op_cost;
2841 new_limit.cost = best_cost.cost - op_cost;
2842 new_limit.latency = best_cost.latency - op_latency;
2843 synth_mult (alg_in, t / d, &new_limit, mode);
2845 alg_in->cost.cost += op_cost;
2846 alg_in->cost.latency += op_latency;
2847 if (alg_in->cost.latency < op_cost)
2848 alg_in->cost.latency = op_cost;
2849 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2851 best_cost = alg_in->cost;
2852 std::swap (alg_in, best_alg);
2853 best_alg->log[best_alg->ops] = m;
2854 best_alg->op[best_alg->ops] = alg_sub_factor;
2856 break;
2859 if (cache_hit)
2860 goto done;
2862 /* Try shift-and-add (load effective address) instructions,
2863 i.e. do a*3, a*5, a*9. */
2864 if ((t & 1) != 0)
2866 do_alg_add_t2_m:
2867 q = t - 1;
2868 q = q & -q;
2869 m = exact_log2 (q);
2870 if (m >= 0 && m < maxm)
2872 op_cost = shiftadd_cost (speed, mode, m);
2873 new_limit.cost = best_cost.cost - op_cost;
2874 new_limit.latency = best_cost.latency - op_cost;
2875 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2877 alg_in->cost.cost += op_cost;
2878 alg_in->cost.latency += op_cost;
2879 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2881 best_cost = alg_in->cost;
2882 std::swap (alg_in, best_alg);
2883 best_alg->log[best_alg->ops] = m;
2884 best_alg->op[best_alg->ops] = alg_add_t2_m;
2887 if (cache_hit)
2888 goto done;
2890 do_alg_sub_t2_m:
2891 q = t + 1;
2892 q = q & -q;
2893 m = exact_log2 (q);
2894 if (m >= 0 && m < maxm)
2896 op_cost = shiftsub0_cost (speed, mode, m);
2897 new_limit.cost = best_cost.cost - op_cost;
2898 new_limit.latency = best_cost.latency - op_cost;
2899 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2901 alg_in->cost.cost += op_cost;
2902 alg_in->cost.latency += op_cost;
2903 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2905 best_cost = alg_in->cost;
2906 std::swap (alg_in, best_alg);
2907 best_alg->log[best_alg->ops] = m;
2908 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2911 if (cache_hit)
2912 goto done;
2915 done:
2916 /* If best_cost has not decreased, we have not found any algorithm. */
2917 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2919 /* We failed to find an algorithm. Record alg_impossible for
2920 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2921 we are asked to find an algorithm for T within the same or
2922 lower COST_LIMIT, we can immediately return to the
2923 caller. */
2924 entry_ptr->t = t;
2925 entry_ptr->mode = mode;
2926 entry_ptr->speed = speed;
2927 entry_ptr->alg = alg_impossible;
2928 entry_ptr->cost = *cost_limit;
2929 return;
2932 /* Cache the result. */
2933 if (!cache_hit)
2935 entry_ptr->t = t;
2936 entry_ptr->mode = mode;
2937 entry_ptr->speed = speed;
2938 entry_ptr->alg = best_alg->op[best_alg->ops];
2939 entry_ptr->cost.cost = best_cost.cost;
2940 entry_ptr->cost.latency = best_cost.latency;
2943 /* If we are getting a too long sequence for `struct algorithm'
2944 to record, make this search fail. */
2945 if (best_alg->ops == MAX_BITS_PER_WORD)
2946 return;
2948 /* Copy the algorithm from temporary space to the space at alg_out.
2949 We avoid using structure assignment because the majority of
2950 best_alg is normally undefined, and this is a critical function. */
2951 alg_out->ops = best_alg->ops + 1;
2952 alg_out->cost = best_cost;
2953 memcpy (alg_out->op, best_alg->op,
2954 alg_out->ops * sizeof *alg_out->op);
2955 memcpy (alg_out->log, best_alg->log,
2956 alg_out->ops * sizeof *alg_out->log);
2959 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2960 Try three variations:
2962 - a shift/add sequence based on VAL itself
2963 - a shift/add sequence based on -VAL, followed by a negation
2964 - a shift/add sequence based on VAL - 1, followed by an addition.
2966 Return true if the cheapest of these cost less than MULT_COST,
2967 describing the algorithm in *ALG and final fixup in *VARIANT. */
2969 static bool
2970 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2971 struct algorithm *alg, enum mult_variant *variant,
2972 int mult_cost)
2974 struct algorithm alg2;
2975 struct mult_cost limit;
2976 int op_cost;
2977 bool speed = optimize_insn_for_speed_p ();
2979 /* Fail quickly for impossible bounds. */
2980 if (mult_cost < 0)
2981 return false;
2983 /* Ensure that mult_cost provides a reasonable upper bound.
2984 Any constant multiplication can be performed with less
2985 than 2 * bits additions. */
2986 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2987 if (mult_cost > op_cost)
2988 mult_cost = op_cost;
2990 *variant = basic_variant;
2991 limit.cost = mult_cost;
2992 limit.latency = mult_cost;
2993 synth_mult (alg, val, &limit, mode);
2995 /* This works only if the inverted value actually fits in an
2996 `unsigned int' */
2997 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2999 op_cost = neg_cost (speed, mode);
3000 if (MULT_COST_LESS (&alg->cost, mult_cost))
3002 limit.cost = alg->cost.cost - op_cost;
3003 limit.latency = alg->cost.latency - op_cost;
3005 else
3007 limit.cost = mult_cost - op_cost;
3008 limit.latency = mult_cost - op_cost;
3011 synth_mult (&alg2, -val, &limit, mode);
3012 alg2.cost.cost += op_cost;
3013 alg2.cost.latency += op_cost;
3014 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3015 *alg = alg2, *variant = negate_variant;
3018 /* This proves very useful for division-by-constant. */
3019 op_cost = add_cost (speed, mode);
3020 if (MULT_COST_LESS (&alg->cost, mult_cost))
3022 limit.cost = alg->cost.cost - op_cost;
3023 limit.latency = alg->cost.latency - op_cost;
3025 else
3027 limit.cost = mult_cost - op_cost;
3028 limit.latency = mult_cost - op_cost;
3031 synth_mult (&alg2, val - 1, &limit, mode);
3032 alg2.cost.cost += op_cost;
3033 alg2.cost.latency += op_cost;
3034 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3035 *alg = alg2, *variant = add_variant;
3037 return MULT_COST_LESS (&alg->cost, mult_cost);
3040 /* A subroutine of expand_mult, used for constant multiplications.
3041 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3042 convenient. Use the shift/add sequence described by ALG and apply
3043 the final fixup specified by VARIANT. */
3045 static rtx
3046 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3047 rtx target, const struct algorithm *alg,
3048 enum mult_variant variant)
3050 HOST_WIDE_INT val_so_far;
3051 rtx_insn *insn;
3052 rtx accum, tem;
3053 int opno;
3054 machine_mode nmode;
3056 /* Avoid referencing memory over and over and invalid sharing
3057 on SUBREGs. */
3058 op0 = force_reg (mode, op0);
3060 /* ACCUM starts out either as OP0 or as a zero, depending on
3061 the first operation. */
3063 if (alg->op[0] == alg_zero)
3065 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3066 val_so_far = 0;
3068 else if (alg->op[0] == alg_m)
3070 accum = copy_to_mode_reg (mode, op0);
3071 val_so_far = 1;
3073 else
3074 gcc_unreachable ();
3076 for (opno = 1; opno < alg->ops; opno++)
3078 int log = alg->log[opno];
3079 rtx shift_subtarget = optimize ? 0 : accum;
3080 rtx add_target
3081 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3082 && !optimize)
3083 ? target : 0;
3084 rtx accum_target = optimize ? 0 : accum;
3085 rtx accum_inner;
3087 switch (alg->op[opno])
3089 case alg_shift:
3090 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3091 /* REG_EQUAL note will be attached to the following insn. */
3092 emit_move_insn (accum, tem);
3093 val_so_far <<= log;
3094 break;
3096 case alg_add_t_m2:
3097 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3098 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3099 add_target ? add_target : accum_target);
3100 val_so_far += (HOST_WIDE_INT) 1 << log;
3101 break;
3103 case alg_sub_t_m2:
3104 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3105 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3106 add_target ? add_target : accum_target);
3107 val_so_far -= (HOST_WIDE_INT) 1 << log;
3108 break;
3110 case alg_add_t2_m:
3111 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3112 log, shift_subtarget, 0);
3113 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3114 add_target ? add_target : accum_target);
3115 val_so_far = (val_so_far << log) + 1;
3116 break;
3118 case alg_sub_t2_m:
3119 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3120 log, shift_subtarget, 0);
3121 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3122 add_target ? add_target : accum_target);
3123 val_so_far = (val_so_far << log) - 1;
3124 break;
3126 case alg_add_factor:
3127 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3128 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3129 add_target ? add_target : accum_target);
3130 val_so_far += val_so_far << log;
3131 break;
3133 case alg_sub_factor:
3134 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3135 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3136 (add_target
3137 ? add_target : (optimize ? 0 : tem)));
3138 val_so_far = (val_so_far << log) - val_so_far;
3139 break;
3141 default:
3142 gcc_unreachable ();
3145 if (SCALAR_INT_MODE_P (mode))
3147 /* Write a REG_EQUAL note on the last insn so that we can cse
3148 multiplication sequences. Note that if ACCUM is a SUBREG,
3149 we've set the inner register and must properly indicate that. */
3150 tem = op0, nmode = mode;
3151 accum_inner = accum;
3152 if (GET_CODE (accum) == SUBREG)
3154 accum_inner = SUBREG_REG (accum);
3155 nmode = GET_MODE (accum_inner);
3156 tem = gen_lowpart (nmode, op0);
3159 insn = get_last_insn ();
3160 set_dst_reg_note (insn, REG_EQUAL,
3161 gen_rtx_MULT (nmode, tem,
3162 gen_int_mode (val_so_far, nmode)),
3163 accum_inner);
3167 if (variant == negate_variant)
3169 val_so_far = -val_so_far;
3170 accum = expand_unop (mode, neg_optab, accum, target, 0);
3172 else if (variant == add_variant)
3174 val_so_far = val_so_far + 1;
3175 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3178 /* Compare only the bits of val and val_so_far that are significant
3179 in the result mode, to avoid sign-/zero-extension confusion. */
3180 nmode = GET_MODE_INNER (mode);
3181 if (nmode == VOIDmode)
3182 nmode = mode;
3183 val &= GET_MODE_MASK (nmode);
3184 val_so_far &= GET_MODE_MASK (nmode);
3185 gcc_assert (val == val_so_far);
3187 return accum;
3190 /* Perform a multiplication and return an rtx for the result.
3191 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3192 TARGET is a suggestion for where to store the result (an rtx).
3194 We check specially for a constant integer as OP1.
3195 If you want this check for OP0 as well, then before calling
3196 you should swap the two operands if OP0 would be constant. */
3199 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3200 int unsignedp)
3202 enum mult_variant variant;
3203 struct algorithm algorithm;
3204 rtx scalar_op1;
3205 int max_cost;
3206 bool speed = optimize_insn_for_speed_p ();
3207 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3209 if (CONSTANT_P (op0))
3210 std::swap (op0, op1);
3212 /* For vectors, there are several simplifications that can be made if
3213 all elements of the vector constant are identical. */
3214 scalar_op1 = op1;
3215 if (GET_CODE (op1) == CONST_VECTOR)
3217 int i, n = CONST_VECTOR_NUNITS (op1);
3218 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3219 for (i = 1; i < n; ++i)
3220 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3221 goto skip_scalar;
3224 if (INTEGRAL_MODE_P (mode))
3226 rtx fake_reg;
3227 HOST_WIDE_INT coeff;
3228 bool is_neg;
3229 int mode_bitsize;
3231 if (op1 == CONST0_RTX (mode))
3232 return op1;
3233 if (op1 == CONST1_RTX (mode))
3234 return op0;
3235 if (op1 == CONSTM1_RTX (mode))
3236 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3237 op0, target, 0);
3239 if (do_trapv)
3240 goto skip_synth;
3242 /* If mode is integer vector mode, check if the backend supports
3243 vector lshift (by scalar or vector) at all. If not, we can't use
3244 synthetized multiply. */
3245 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3246 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3247 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3248 goto skip_synth;
3250 /* These are the operations that are potentially turned into
3251 a sequence of shifts and additions. */
3252 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3254 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3255 less than or equal in size to `unsigned int' this doesn't matter.
3256 If the mode is larger than `unsigned int', then synth_mult works
3257 only if the constant value exactly fits in an `unsigned int' without
3258 any truncation. This means that multiplying by negative values does
3259 not work; results are off by 2^32 on a 32 bit machine. */
3260 if (CONST_INT_P (scalar_op1))
3262 coeff = INTVAL (scalar_op1);
3263 is_neg = coeff < 0;
3265 #if TARGET_SUPPORTS_WIDE_INT
3266 else if (CONST_WIDE_INT_P (scalar_op1))
3267 #else
3268 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3269 #endif
3271 int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3272 /* Perfect power of 2 (other than 1, which is handled above). */
3273 if (shift > 0)
3274 return expand_shift (LSHIFT_EXPR, mode, op0,
3275 shift, target, unsignedp);
3276 else
3277 goto skip_synth;
3279 else
3280 goto skip_synth;
3282 /* We used to test optimize here, on the grounds that it's better to
3283 produce a smaller program when -O is not used. But this causes
3284 such a terrible slowdown sometimes that it seems better to always
3285 use synth_mult. */
3287 /* Special case powers of two. */
3288 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3289 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3290 return expand_shift (LSHIFT_EXPR, mode, op0,
3291 floor_log2 (coeff), target, unsignedp);
3293 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3295 /* Attempt to handle multiplication of DImode values by negative
3296 coefficients, by performing the multiplication by a positive
3297 multiplier and then inverting the result. */
3298 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3300 /* Its safe to use -coeff even for INT_MIN, as the
3301 result is interpreted as an unsigned coefficient.
3302 Exclude cost of op0 from max_cost to match the cost
3303 calculation of the synth_mult. */
3304 coeff = -(unsigned HOST_WIDE_INT) coeff;
3305 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3306 - neg_cost (speed, mode));
3307 if (max_cost <= 0)
3308 goto skip_synth;
3310 /* Special case powers of two. */
3311 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3313 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3314 floor_log2 (coeff), target, unsignedp);
3315 return expand_unop (mode, neg_optab, temp, target, 0);
3318 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3319 max_cost))
3321 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3322 &algorithm, variant);
3323 return expand_unop (mode, neg_optab, temp, target, 0);
3325 goto skip_synth;
3328 /* Exclude cost of op0 from max_cost to match the cost
3329 calculation of the synth_mult. */
3330 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3331 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3332 return expand_mult_const (mode, op0, coeff, target,
3333 &algorithm, variant);
3335 skip_synth:
3337 /* Expand x*2.0 as x+x. */
3338 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3340 REAL_VALUE_TYPE d;
3341 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3343 if (REAL_VALUES_EQUAL (d, dconst2))
3345 op0 = force_reg (GET_MODE (op0), op0);
3346 return expand_binop (mode, add_optab, op0, op0,
3347 target, unsignedp, OPTAB_LIB_WIDEN);
3350 skip_scalar:
3352 /* This used to use umul_optab if unsigned, but for non-widening multiply
3353 there is no difference between signed and unsigned. */
3354 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3355 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3356 gcc_assert (op0);
3357 return op0;
3360 /* Return a cost estimate for multiplying a register by the given
3361 COEFFicient in the given MODE and SPEED. */
3364 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3366 int max_cost;
3367 struct algorithm algorithm;
3368 enum mult_variant variant;
3370 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3371 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3372 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3373 return algorithm.cost.cost;
3374 else
3375 return max_cost;
3378 /* Perform a widening multiplication and return an rtx for the result.
3379 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3380 TARGET is a suggestion for where to store the result (an rtx).
3381 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3382 or smul_widen_optab.
3384 We check specially for a constant integer as OP1, comparing the
3385 cost of a widening multiply against the cost of a sequence of shifts
3386 and adds. */
3389 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3390 int unsignedp, optab this_optab)
3392 bool speed = optimize_insn_for_speed_p ();
3393 rtx cop1;
3395 if (CONST_INT_P (op1)
3396 && GET_MODE (op0) != VOIDmode
3397 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3398 this_optab == umul_widen_optab))
3399 && CONST_INT_P (cop1)
3400 && (INTVAL (cop1) >= 0
3401 || HWI_COMPUTABLE_MODE_P (mode)))
3403 HOST_WIDE_INT coeff = INTVAL (cop1);
3404 int max_cost;
3405 enum mult_variant variant;
3406 struct algorithm algorithm;
3408 if (coeff == 0)
3409 return CONST0_RTX (mode);
3411 /* Special case powers of two. */
3412 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3414 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3415 return expand_shift (LSHIFT_EXPR, mode, op0,
3416 floor_log2 (coeff), target, unsignedp);
3419 /* Exclude cost of op0 from max_cost to match the cost
3420 calculation of the synth_mult. */
3421 max_cost = mul_widen_cost (speed, mode);
3422 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3423 max_cost))
3425 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3426 return expand_mult_const (mode, op0, coeff, target,
3427 &algorithm, variant);
3430 return expand_binop (mode, this_optab, op0, op1, target,
3431 unsignedp, OPTAB_LIB_WIDEN);
3434 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3435 replace division by D, and put the least significant N bits of the result
3436 in *MULTIPLIER_PTR and return the most significant bit.
3438 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3439 needed precision is in PRECISION (should be <= N).
3441 PRECISION should be as small as possible so this function can choose
3442 multiplier more freely.
3444 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3445 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3447 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3448 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3450 unsigned HOST_WIDE_INT
3451 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3452 unsigned HOST_WIDE_INT *multiplier_ptr,
3453 int *post_shift_ptr, int *lgup_ptr)
3455 int lgup, post_shift;
3456 int pow, pow2;
3458 /* lgup = ceil(log2(divisor)); */
3459 lgup = ceil_log2 (d);
3461 gcc_assert (lgup <= n);
3463 pow = n + lgup;
3464 pow2 = n + lgup - precision;
3466 /* mlow = 2^(N + lgup)/d */
3467 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3468 wide_int mlow = wi::udiv_trunc (val, d);
3470 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3471 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3472 wide_int mhigh = wi::udiv_trunc (val, d);
3474 /* If precision == N, then mlow, mhigh exceed 2^N
3475 (but they do not exceed 2^(N+1)). */
3477 /* Reduce to lowest terms. */
3478 for (post_shift = lgup; post_shift > 0; post_shift--)
3480 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3481 HOST_BITS_PER_WIDE_INT);
3482 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3483 HOST_BITS_PER_WIDE_INT);
3484 if (ml_lo >= mh_lo)
3485 break;
3487 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3488 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3491 *post_shift_ptr = post_shift;
3492 *lgup_ptr = lgup;
3493 if (n < HOST_BITS_PER_WIDE_INT)
3495 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3496 *multiplier_ptr = mhigh.to_uhwi () & mask;
3497 return mhigh.to_uhwi () >= mask;
3499 else
3501 *multiplier_ptr = mhigh.to_uhwi ();
3502 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3506 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3507 congruent to 1 (mod 2**N). */
3509 static unsigned HOST_WIDE_INT
3510 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3512 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3514 /* The algorithm notes that the choice y = x satisfies
3515 x*y == 1 mod 2^3, since x is assumed odd.
3516 Each iteration doubles the number of bits of significance in y. */
3518 unsigned HOST_WIDE_INT mask;
3519 unsigned HOST_WIDE_INT y = x;
3520 int nbit = 3;
3522 mask = (n == HOST_BITS_PER_WIDE_INT
3523 ? ~(unsigned HOST_WIDE_INT) 0
3524 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3526 while (nbit < n)
3528 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3529 nbit *= 2;
3531 return y;
3534 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3535 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3536 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3537 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3538 become signed.
3540 The result is put in TARGET if that is convenient.
3542 MODE is the mode of operation. */
3545 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3546 rtx op1, rtx target, int unsignedp)
3548 rtx tem;
3549 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3551 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3552 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3553 tem = expand_and (mode, tem, op1, NULL_RTX);
3554 adj_operand
3555 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3556 adj_operand);
3558 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3559 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3560 tem = expand_and (mode, tem, op0, NULL_RTX);
3561 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3562 target);
3564 return target;
3567 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3569 static rtx
3570 extract_high_half (machine_mode mode, rtx op)
3572 machine_mode wider_mode;
3574 if (mode == word_mode)
3575 return gen_highpart (mode, op);
3577 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3579 wider_mode = GET_MODE_WIDER_MODE (mode);
3580 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3581 GET_MODE_BITSIZE (mode), 0, 1);
3582 return convert_modes (mode, wider_mode, op, 0);
3585 /* Like expmed_mult_highpart, but only consider using a multiplication
3586 optab. OP1 is an rtx for the constant operand. */
3588 static rtx
3589 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3590 rtx target, int unsignedp, int max_cost)
3592 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3593 machine_mode wider_mode;
3594 optab moptab;
3595 rtx tem;
3596 int size;
3597 bool speed = optimize_insn_for_speed_p ();
3599 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3601 wider_mode = GET_MODE_WIDER_MODE (mode);
3602 size = GET_MODE_BITSIZE (mode);
3604 /* Firstly, try using a multiplication insn that only generates the needed
3605 high part of the product, and in the sign flavor of unsignedp. */
3606 if (mul_highpart_cost (speed, mode) < max_cost)
3608 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3609 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3610 unsignedp, OPTAB_DIRECT);
3611 if (tem)
3612 return tem;
3615 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3616 Need to adjust the result after the multiplication. */
3617 if (size - 1 < BITS_PER_WORD
3618 && (mul_highpart_cost (speed, mode)
3619 + 2 * shift_cost (speed, mode, size-1)
3620 + 4 * add_cost (speed, mode) < max_cost))
3622 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3623 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3624 unsignedp, OPTAB_DIRECT);
3625 if (tem)
3626 /* We used the wrong signedness. Adjust the result. */
3627 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3628 tem, unsignedp);
3631 /* Try widening multiplication. */
3632 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3633 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3634 && mul_widen_cost (speed, wider_mode) < max_cost)
3636 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3637 unsignedp, OPTAB_WIDEN);
3638 if (tem)
3639 return extract_high_half (mode, tem);
3642 /* Try widening the mode and perform a non-widening multiplication. */
3643 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3644 && size - 1 < BITS_PER_WORD
3645 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3646 < max_cost))
3648 rtx_insn *insns;
3649 rtx wop0, wop1;
3651 /* We need to widen the operands, for example to ensure the
3652 constant multiplier is correctly sign or zero extended.
3653 Use a sequence to clean-up any instructions emitted by
3654 the conversions if things don't work out. */
3655 start_sequence ();
3656 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3657 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3658 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3659 unsignedp, OPTAB_WIDEN);
3660 insns = get_insns ();
3661 end_sequence ();
3663 if (tem)
3665 emit_insn (insns);
3666 return extract_high_half (mode, tem);
3670 /* Try widening multiplication of opposite signedness, and adjust. */
3671 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3672 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3673 && size - 1 < BITS_PER_WORD
3674 && (mul_widen_cost (speed, wider_mode)
3675 + 2 * shift_cost (speed, mode, size-1)
3676 + 4 * add_cost (speed, mode) < max_cost))
3678 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3679 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3680 if (tem != 0)
3682 tem = extract_high_half (mode, tem);
3683 /* We used the wrong signedness. Adjust the result. */
3684 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3685 target, unsignedp);
3689 return 0;
3692 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3693 putting the high half of the result in TARGET if that is convenient,
3694 and return where the result is. If the operation can not be performed,
3695 0 is returned.
3697 MODE is the mode of operation and result.
3699 UNSIGNEDP nonzero means unsigned multiply.
3701 MAX_COST is the total allowed cost for the expanded RTL. */
3703 static rtx
3704 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3705 rtx target, int unsignedp, int max_cost)
3707 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3708 unsigned HOST_WIDE_INT cnst1;
3709 int extra_cost;
3710 bool sign_adjust = false;
3711 enum mult_variant variant;
3712 struct algorithm alg;
3713 rtx tem;
3714 bool speed = optimize_insn_for_speed_p ();
3716 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3717 /* We can't support modes wider than HOST_BITS_PER_INT. */
3718 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3720 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3722 /* We can't optimize modes wider than BITS_PER_WORD.
3723 ??? We might be able to perform double-word arithmetic if
3724 mode == word_mode, however all the cost calculations in
3725 synth_mult etc. assume single-word operations. */
3726 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3727 return expmed_mult_highpart_optab (mode, op0, op1, target,
3728 unsignedp, max_cost);
3730 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3732 /* Check whether we try to multiply by a negative constant. */
3733 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3735 sign_adjust = true;
3736 extra_cost += add_cost (speed, mode);
3739 /* See whether shift/add multiplication is cheap enough. */
3740 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3741 max_cost - extra_cost))
3743 /* See whether the specialized multiplication optabs are
3744 cheaper than the shift/add version. */
3745 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3746 alg.cost.cost + extra_cost);
3747 if (tem)
3748 return tem;
3750 tem = convert_to_mode (wider_mode, op0, unsignedp);
3751 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3752 tem = extract_high_half (mode, tem);
3754 /* Adjust result for signedness. */
3755 if (sign_adjust)
3756 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3758 return tem;
3760 return expmed_mult_highpart_optab (mode, op0, op1, target,
3761 unsignedp, max_cost);
3765 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3767 static rtx
3768 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3770 rtx result, temp, shift;
3771 rtx_code_label *label;
3772 int logd;
3773 int prec = GET_MODE_PRECISION (mode);
3775 logd = floor_log2 (d);
3776 result = gen_reg_rtx (mode);
3778 /* Avoid conditional branches when they're expensive. */
3779 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3780 && optimize_insn_for_speed_p ())
3782 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3783 mode, 0, -1);
3784 if (signmask)
3786 HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3787 signmask = force_reg (mode, signmask);
3788 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3790 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3791 which instruction sequence to use. If logical right shifts
3792 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3793 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3795 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3796 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3797 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3798 > COSTS_N_INSNS (2)))
3800 temp = expand_binop (mode, xor_optab, op0, signmask,
3801 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3802 temp = expand_binop (mode, sub_optab, temp, signmask,
3803 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3804 temp = expand_binop (mode, and_optab, temp,
3805 gen_int_mode (masklow, mode),
3806 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3807 temp = expand_binop (mode, xor_optab, temp, signmask,
3808 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3809 temp = expand_binop (mode, sub_optab, temp, signmask,
3810 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3812 else
3814 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3815 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3816 signmask = force_reg (mode, signmask);
3818 temp = expand_binop (mode, add_optab, op0, signmask,
3819 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3820 temp = expand_binop (mode, and_optab, temp,
3821 gen_int_mode (masklow, mode),
3822 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3823 temp = expand_binop (mode, sub_optab, temp, signmask,
3824 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3826 return temp;
3830 /* Mask contains the mode's signbit and the significant bits of the
3831 modulus. By including the signbit in the operation, many targets
3832 can avoid an explicit compare operation in the following comparison
3833 against zero. */
3834 wide_int mask = wi::mask (logd, false, prec);
3835 mask = wi::set_bit (mask, prec - 1);
3837 temp = expand_binop (mode, and_optab, op0,
3838 immed_wide_int_const (mask, mode),
3839 result, 1, OPTAB_LIB_WIDEN);
3840 if (temp != result)
3841 emit_move_insn (result, temp);
3843 label = gen_label_rtx ();
3844 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3846 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3847 0, OPTAB_LIB_WIDEN);
3849 mask = wi::mask (logd, true, prec);
3850 temp = expand_binop (mode, ior_optab, temp,
3851 immed_wide_int_const (mask, mode),
3852 result, 1, OPTAB_LIB_WIDEN);
3853 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3854 0, OPTAB_LIB_WIDEN);
3855 if (temp != result)
3856 emit_move_insn (result, temp);
3857 emit_label (label);
3858 return result;
3861 /* Expand signed division of OP0 by a power of two D in mode MODE.
3862 This routine is only called for positive values of D. */
3864 static rtx
3865 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3867 rtx temp;
3868 rtx_code_label *label;
3869 int logd;
3871 logd = floor_log2 (d);
3873 if (d == 2
3874 && BRANCH_COST (optimize_insn_for_speed_p (),
3875 false) >= 1)
3877 temp = gen_reg_rtx (mode);
3878 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3879 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3880 0, OPTAB_LIB_WIDEN);
3881 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3884 #ifdef HAVE_conditional_move
3885 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3886 >= 2)
3888 rtx temp2;
3890 start_sequence ();
3891 temp2 = copy_to_mode_reg (mode, op0);
3892 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3893 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3894 temp = force_reg (mode, temp);
3896 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3897 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3898 mode, temp, temp2, mode, 0);
3899 if (temp2)
3901 rtx_insn *seq = get_insns ();
3902 end_sequence ();
3903 emit_insn (seq);
3904 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3906 end_sequence ();
3908 #endif
3910 if (BRANCH_COST (optimize_insn_for_speed_p (),
3911 false) >= 2)
3913 int ushift = GET_MODE_BITSIZE (mode) - logd;
3915 temp = gen_reg_rtx (mode);
3916 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3917 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3918 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3919 > COSTS_N_INSNS (1))
3920 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3921 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3922 else
3923 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3924 ushift, NULL_RTX, 1);
3925 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3926 0, OPTAB_LIB_WIDEN);
3927 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3930 label = gen_label_rtx ();
3931 temp = copy_to_mode_reg (mode, op0);
3932 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3933 expand_inc (temp, gen_int_mode (d - 1, mode));
3934 emit_label (label);
3935 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3938 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3939 if that is convenient, and returning where the result is.
3940 You may request either the quotient or the remainder as the result;
3941 specify REM_FLAG nonzero to get the remainder.
3943 CODE is the expression code for which kind of division this is;
3944 it controls how rounding is done. MODE is the machine mode to use.
3945 UNSIGNEDP nonzero means do unsigned division. */
3947 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3948 and then correct it by or'ing in missing high bits
3949 if result of ANDI is nonzero.
3950 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3951 This could optimize to a bfexts instruction.
3952 But C doesn't use these operations, so their optimizations are
3953 left for later. */
3954 /* ??? For modulo, we don't actually need the highpart of the first product,
3955 the low part will do nicely. And for small divisors, the second multiply
3956 can also be a low-part only multiply or even be completely left out.
3957 E.g. to calculate the remainder of a division by 3 with a 32 bit
3958 multiply, multiply with 0x55555556 and extract the upper two bits;
3959 the result is exact for inputs up to 0x1fffffff.
3960 The input range can be reduced by using cross-sum rules.
3961 For odd divisors >= 3, the following table gives right shift counts
3962 so that if a number is shifted by an integer multiple of the given
3963 amount, the remainder stays the same:
3964 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3965 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3966 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3967 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3968 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3970 Cross-sum rules for even numbers can be derived by leaving as many bits
3971 to the right alone as the divisor has zeros to the right.
3972 E.g. if x is an unsigned 32 bit number:
3973 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3977 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3978 rtx op0, rtx op1, rtx target, int unsignedp)
3980 machine_mode compute_mode;
3981 rtx tquotient;
3982 rtx quotient = 0, remainder = 0;
3983 rtx_insn *last;
3984 int size;
3985 rtx_insn *insn;
3986 optab optab1, optab2;
3987 int op1_is_constant, op1_is_pow2 = 0;
3988 int max_cost, extra_cost;
3989 static HOST_WIDE_INT last_div_const = 0;
3990 bool speed = optimize_insn_for_speed_p ();
3992 op1_is_constant = CONST_INT_P (op1);
3993 if (op1_is_constant)
3995 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3996 if (unsignedp)
3997 ext_op1 &= GET_MODE_MASK (mode);
3998 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3999 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
4003 This is the structure of expand_divmod:
4005 First comes code to fix up the operands so we can perform the operations
4006 correctly and efficiently.
4008 Second comes a switch statement with code specific for each rounding mode.
4009 For some special operands this code emits all RTL for the desired
4010 operation, for other cases, it generates only a quotient and stores it in
4011 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4012 to indicate that it has not done anything.
4014 Last comes code that finishes the operation. If QUOTIENT is set and
4015 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4016 QUOTIENT is not set, it is computed using trunc rounding.
4018 We try to generate special code for division and remainder when OP1 is a
4019 constant. If |OP1| = 2**n we can use shifts and some other fast
4020 operations. For other values of OP1, we compute a carefully selected
4021 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4022 by m.
4024 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4025 half of the product. Different strategies for generating the product are
4026 implemented in expmed_mult_highpart.
4028 If what we actually want is the remainder, we generate that by another
4029 by-constant multiplication and a subtraction. */
4031 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4032 code below will malfunction if we are, so check here and handle
4033 the special case if so. */
4034 if (op1 == const1_rtx)
4035 return rem_flag ? const0_rtx : op0;
4037 /* When dividing by -1, we could get an overflow.
4038 negv_optab can handle overflows. */
4039 if (! unsignedp && op1 == constm1_rtx)
4041 if (rem_flag)
4042 return const0_rtx;
4043 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4044 ? negv_optab : neg_optab, op0, target, 0);
4047 if (target
4048 /* Don't use the function value register as a target
4049 since we have to read it as well as write it,
4050 and function-inlining gets confused by this. */
4051 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4052 /* Don't clobber an operand while doing a multi-step calculation. */
4053 || ((rem_flag || op1_is_constant)
4054 && (reg_mentioned_p (target, op0)
4055 || (MEM_P (op0) && MEM_P (target))))
4056 || reg_mentioned_p (target, op1)
4057 || (MEM_P (op1) && MEM_P (target))))
4058 target = 0;
4060 /* Get the mode in which to perform this computation. Normally it will
4061 be MODE, but sometimes we can't do the desired operation in MODE.
4062 If so, pick a wider mode in which we can do the operation. Convert
4063 to that mode at the start to avoid repeated conversions.
4065 First see what operations we need. These depend on the expression
4066 we are evaluating. (We assume that divxx3 insns exist under the
4067 same conditions that modxx3 insns and that these insns don't normally
4068 fail. If these assumptions are not correct, we may generate less
4069 efficient code in some cases.)
4071 Then see if we find a mode in which we can open-code that operation
4072 (either a division, modulus, or shift). Finally, check for the smallest
4073 mode for which we can do the operation with a library call. */
4075 /* We might want to refine this now that we have division-by-constant
4076 optimization. Since expmed_mult_highpart tries so many variants, it is
4077 not straightforward to generalize this. Maybe we should make an array
4078 of possible modes in init_expmed? Save this for GCC 2.7. */
4080 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4081 ? (unsignedp ? lshr_optab : ashr_optab)
4082 : (unsignedp ? udiv_optab : sdiv_optab));
4083 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4084 ? optab1
4085 : (unsignedp ? udivmod_optab : sdivmod_optab));
4087 for (compute_mode = mode; compute_mode != VOIDmode;
4088 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4089 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4090 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4091 break;
4093 if (compute_mode == VOIDmode)
4094 for (compute_mode = mode; compute_mode != VOIDmode;
4095 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4096 if (optab_libfunc (optab1, compute_mode)
4097 || optab_libfunc (optab2, compute_mode))
4098 break;
4100 /* If we still couldn't find a mode, use MODE, but expand_binop will
4101 probably die. */
4102 if (compute_mode == VOIDmode)
4103 compute_mode = mode;
4105 if (target && GET_MODE (target) == compute_mode)
4106 tquotient = target;
4107 else
4108 tquotient = gen_reg_rtx (compute_mode);
4110 size = GET_MODE_BITSIZE (compute_mode);
4111 #if 0
4112 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4113 (mode), and thereby get better code when OP1 is a constant. Do that
4114 later. It will require going over all usages of SIZE below. */
4115 size = GET_MODE_BITSIZE (mode);
4116 #endif
4118 /* Only deduct something for a REM if the last divide done was
4119 for a different constant. Then set the constant of the last
4120 divide. */
4121 max_cost = (unsignedp
4122 ? udiv_cost (speed, compute_mode)
4123 : sdiv_cost (speed, compute_mode));
4124 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4125 && INTVAL (op1) == last_div_const))
4126 max_cost -= (mul_cost (speed, compute_mode)
4127 + add_cost (speed, compute_mode));
4129 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4131 /* Now convert to the best mode to use. */
4132 if (compute_mode != mode)
4134 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4135 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4137 /* convert_modes may have placed op1 into a register, so we
4138 must recompute the following. */
4139 op1_is_constant = CONST_INT_P (op1);
4140 op1_is_pow2 = (op1_is_constant
4141 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4142 || (! unsignedp
4143 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4146 /* If one of the operands is a volatile MEM, copy it into a register. */
4148 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4149 op0 = force_reg (compute_mode, op0);
4150 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4151 op1 = force_reg (compute_mode, op1);
4153 /* If we need the remainder or if OP1 is constant, we need to
4154 put OP0 in a register in case it has any queued subexpressions. */
4155 if (rem_flag || op1_is_constant)
4156 op0 = force_reg (compute_mode, op0);
4158 last = get_last_insn ();
4160 /* Promote floor rounding to trunc rounding for unsigned operations. */
4161 if (unsignedp)
4163 if (code == FLOOR_DIV_EXPR)
4164 code = TRUNC_DIV_EXPR;
4165 if (code == FLOOR_MOD_EXPR)
4166 code = TRUNC_MOD_EXPR;
4167 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4168 code = TRUNC_DIV_EXPR;
4171 if (op1 != const0_rtx)
4172 switch (code)
4174 case TRUNC_MOD_EXPR:
4175 case TRUNC_DIV_EXPR:
4176 if (op1_is_constant)
4178 if (unsignedp)
4180 unsigned HOST_WIDE_INT mh, ml;
4181 int pre_shift, post_shift;
4182 int dummy;
4183 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4184 & GET_MODE_MASK (compute_mode));
4186 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4188 pre_shift = floor_log2 (d);
4189 if (rem_flag)
4191 unsigned HOST_WIDE_INT mask
4192 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4193 remainder
4194 = expand_binop (compute_mode, and_optab, op0,
4195 gen_int_mode (mask, compute_mode),
4196 remainder, 1,
4197 OPTAB_LIB_WIDEN);
4198 if (remainder)
4199 return gen_lowpart (mode, remainder);
4201 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4202 pre_shift, tquotient, 1);
4204 else if (size <= HOST_BITS_PER_WIDE_INT)
4206 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4208 /* Most significant bit of divisor is set; emit an scc
4209 insn. */
4210 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4211 compute_mode, 1, 1);
4213 else
4215 /* Find a suitable multiplier and right shift count
4216 instead of multiplying with D. */
4218 mh = choose_multiplier (d, size, size,
4219 &ml, &post_shift, &dummy);
4221 /* If the suggested multiplier is more than SIZE bits,
4222 we can do better for even divisors, using an
4223 initial right shift. */
4224 if (mh != 0 && (d & 1) == 0)
4226 pre_shift = floor_log2 (d & -d);
4227 mh = choose_multiplier (d >> pre_shift, size,
4228 size - pre_shift,
4229 &ml, &post_shift, &dummy);
4230 gcc_assert (!mh);
4232 else
4233 pre_shift = 0;
4235 if (mh != 0)
4237 rtx t1, t2, t3, t4;
4239 if (post_shift - 1 >= BITS_PER_WORD)
4240 goto fail1;
4242 extra_cost
4243 = (shift_cost (speed, compute_mode, post_shift - 1)
4244 + shift_cost (speed, compute_mode, 1)
4245 + 2 * add_cost (speed, compute_mode));
4246 t1 = expmed_mult_highpart
4247 (compute_mode, op0,
4248 gen_int_mode (ml, compute_mode),
4249 NULL_RTX, 1, max_cost - extra_cost);
4250 if (t1 == 0)
4251 goto fail1;
4252 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4253 op0, t1),
4254 NULL_RTX);
4255 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4256 t2, 1, NULL_RTX, 1);
4257 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4258 t1, t3),
4259 NULL_RTX);
4260 quotient = expand_shift
4261 (RSHIFT_EXPR, compute_mode, t4,
4262 post_shift - 1, tquotient, 1);
4264 else
4266 rtx t1, t2;
4268 if (pre_shift >= BITS_PER_WORD
4269 || post_shift >= BITS_PER_WORD)
4270 goto fail1;
4272 t1 = expand_shift
4273 (RSHIFT_EXPR, compute_mode, op0,
4274 pre_shift, NULL_RTX, 1);
4275 extra_cost
4276 = (shift_cost (speed, compute_mode, pre_shift)
4277 + shift_cost (speed, compute_mode, post_shift));
4278 t2 = expmed_mult_highpart
4279 (compute_mode, t1,
4280 gen_int_mode (ml, compute_mode),
4281 NULL_RTX, 1, max_cost - extra_cost);
4282 if (t2 == 0)
4283 goto fail1;
4284 quotient = expand_shift
4285 (RSHIFT_EXPR, compute_mode, t2,
4286 post_shift, tquotient, 1);
4290 else /* Too wide mode to use tricky code */
4291 break;
4293 insn = get_last_insn ();
4294 if (insn != last)
4295 set_dst_reg_note (insn, REG_EQUAL,
4296 gen_rtx_UDIV (compute_mode, op0, op1),
4297 quotient);
4299 else /* TRUNC_DIV, signed */
4301 unsigned HOST_WIDE_INT ml;
4302 int lgup, post_shift;
4303 rtx mlr;
4304 HOST_WIDE_INT d = INTVAL (op1);
4305 unsigned HOST_WIDE_INT abs_d;
4307 /* Since d might be INT_MIN, we have to cast to
4308 unsigned HOST_WIDE_INT before negating to avoid
4309 undefined signed overflow. */
4310 abs_d = (d >= 0
4311 ? (unsigned HOST_WIDE_INT) d
4312 : - (unsigned HOST_WIDE_INT) d);
4314 /* n rem d = n rem -d */
4315 if (rem_flag && d < 0)
4317 d = abs_d;
4318 op1 = gen_int_mode (abs_d, compute_mode);
4321 if (d == 1)
4322 quotient = op0;
4323 else if (d == -1)
4324 quotient = expand_unop (compute_mode, neg_optab, op0,
4325 tquotient, 0);
4326 else if (HOST_BITS_PER_WIDE_INT >= size
4327 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4329 /* This case is not handled correctly below. */
4330 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4331 compute_mode, 1, 1);
4332 if (quotient == 0)
4333 goto fail1;
4335 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4336 && (rem_flag
4337 ? smod_pow2_cheap (speed, compute_mode)
4338 : sdiv_pow2_cheap (speed, compute_mode))
4339 /* We assume that cheap metric is true if the
4340 optab has an expander for this mode. */
4341 && ((optab_handler ((rem_flag ? smod_optab
4342 : sdiv_optab),
4343 compute_mode)
4344 != CODE_FOR_nothing)
4345 || (optab_handler (sdivmod_optab,
4346 compute_mode)
4347 != CODE_FOR_nothing)))
4349 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4351 if (rem_flag)
4353 remainder = expand_smod_pow2 (compute_mode, op0, d);
4354 if (remainder)
4355 return gen_lowpart (mode, remainder);
4358 if (sdiv_pow2_cheap (speed, compute_mode)
4359 && ((optab_handler (sdiv_optab, compute_mode)
4360 != CODE_FOR_nothing)
4361 || (optab_handler (sdivmod_optab, compute_mode)
4362 != CODE_FOR_nothing)))
4363 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4364 compute_mode, op0,
4365 gen_int_mode (abs_d,
4366 compute_mode),
4367 NULL_RTX, 0);
4368 else
4369 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4371 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4372 negate the quotient. */
4373 if (d < 0)
4375 insn = get_last_insn ();
4376 if (insn != last
4377 && abs_d < ((unsigned HOST_WIDE_INT) 1
4378 << (HOST_BITS_PER_WIDE_INT - 1)))
4379 set_dst_reg_note (insn, REG_EQUAL,
4380 gen_rtx_DIV (compute_mode, op0,
4381 gen_int_mode
4382 (abs_d,
4383 compute_mode)),
4384 quotient);
4386 quotient = expand_unop (compute_mode, neg_optab,
4387 quotient, quotient, 0);
4390 else if (size <= HOST_BITS_PER_WIDE_INT)
4392 choose_multiplier (abs_d, size, size - 1,
4393 &ml, &post_shift, &lgup);
4394 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4396 rtx t1, t2, t3;
4398 if (post_shift >= BITS_PER_WORD
4399 || size - 1 >= BITS_PER_WORD)
4400 goto fail1;
4402 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4403 + shift_cost (speed, compute_mode, size - 1)
4404 + add_cost (speed, compute_mode));
4405 t1 = expmed_mult_highpart
4406 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4407 NULL_RTX, 0, max_cost - extra_cost);
4408 if (t1 == 0)
4409 goto fail1;
4410 t2 = expand_shift
4411 (RSHIFT_EXPR, compute_mode, t1,
4412 post_shift, NULL_RTX, 0);
4413 t3 = expand_shift
4414 (RSHIFT_EXPR, compute_mode, op0,
4415 size - 1, NULL_RTX, 0);
4416 if (d < 0)
4417 quotient
4418 = force_operand (gen_rtx_MINUS (compute_mode,
4419 t3, t2),
4420 tquotient);
4421 else
4422 quotient
4423 = force_operand (gen_rtx_MINUS (compute_mode,
4424 t2, t3),
4425 tquotient);
4427 else
4429 rtx t1, t2, t3, t4;
4431 if (post_shift >= BITS_PER_WORD
4432 || size - 1 >= BITS_PER_WORD)
4433 goto fail1;
4435 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4436 mlr = gen_int_mode (ml, compute_mode);
4437 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4438 + shift_cost (speed, compute_mode, size - 1)
4439 + 2 * add_cost (speed, compute_mode));
4440 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4441 NULL_RTX, 0,
4442 max_cost - extra_cost);
4443 if (t1 == 0)
4444 goto fail1;
4445 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4446 t1, op0),
4447 NULL_RTX);
4448 t3 = expand_shift
4449 (RSHIFT_EXPR, compute_mode, t2,
4450 post_shift, NULL_RTX, 0);
4451 t4 = expand_shift
4452 (RSHIFT_EXPR, compute_mode, op0,
4453 size - 1, NULL_RTX, 0);
4454 if (d < 0)
4455 quotient
4456 = force_operand (gen_rtx_MINUS (compute_mode,
4457 t4, t3),
4458 tquotient);
4459 else
4460 quotient
4461 = force_operand (gen_rtx_MINUS (compute_mode,
4462 t3, t4),
4463 tquotient);
4466 else /* Too wide mode to use tricky code */
4467 break;
4469 insn = get_last_insn ();
4470 if (insn != last)
4471 set_dst_reg_note (insn, REG_EQUAL,
4472 gen_rtx_DIV (compute_mode, op0, op1),
4473 quotient);
4475 break;
4477 fail1:
4478 delete_insns_since (last);
4479 break;
4481 case FLOOR_DIV_EXPR:
4482 case FLOOR_MOD_EXPR:
4483 /* We will come here only for signed operations. */
4484 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4486 unsigned HOST_WIDE_INT mh, ml;
4487 int pre_shift, lgup, post_shift;
4488 HOST_WIDE_INT d = INTVAL (op1);
4490 if (d > 0)
4492 /* We could just as easily deal with negative constants here,
4493 but it does not seem worth the trouble for GCC 2.6. */
4494 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4496 pre_shift = floor_log2 (d);
4497 if (rem_flag)
4499 unsigned HOST_WIDE_INT mask
4500 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4501 remainder = expand_binop
4502 (compute_mode, and_optab, op0,
4503 gen_int_mode (mask, compute_mode),
4504 remainder, 0, OPTAB_LIB_WIDEN);
4505 if (remainder)
4506 return gen_lowpart (mode, remainder);
4508 quotient = expand_shift
4509 (RSHIFT_EXPR, compute_mode, op0,
4510 pre_shift, tquotient, 0);
4512 else
4514 rtx t1, t2, t3, t4;
4516 mh = choose_multiplier (d, size, size - 1,
4517 &ml, &post_shift, &lgup);
4518 gcc_assert (!mh);
4520 if (post_shift < BITS_PER_WORD
4521 && size - 1 < BITS_PER_WORD)
4523 t1 = expand_shift
4524 (RSHIFT_EXPR, compute_mode, op0,
4525 size - 1, NULL_RTX, 0);
4526 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4527 NULL_RTX, 0, OPTAB_WIDEN);
4528 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4529 + shift_cost (speed, compute_mode, size - 1)
4530 + 2 * add_cost (speed, compute_mode));
4531 t3 = expmed_mult_highpart
4532 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4533 NULL_RTX, 1, max_cost - extra_cost);
4534 if (t3 != 0)
4536 t4 = expand_shift
4537 (RSHIFT_EXPR, compute_mode, t3,
4538 post_shift, NULL_RTX, 1);
4539 quotient = expand_binop (compute_mode, xor_optab,
4540 t4, t1, tquotient, 0,
4541 OPTAB_WIDEN);
4546 else
4548 rtx nsign, t1, t2, t3, t4;
4549 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4550 op0, constm1_rtx), NULL_RTX);
4551 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4552 0, OPTAB_WIDEN);
4553 nsign = expand_shift
4554 (RSHIFT_EXPR, compute_mode, t2,
4555 size - 1, NULL_RTX, 0);
4556 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4557 NULL_RTX);
4558 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4559 NULL_RTX, 0);
4560 if (t4)
4562 rtx t5;
4563 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4564 NULL_RTX, 0);
4565 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4566 t4, t5),
4567 tquotient);
4572 if (quotient != 0)
4573 break;
4574 delete_insns_since (last);
4576 /* Try using an instruction that produces both the quotient and
4577 remainder, using truncation. We can easily compensate the quotient
4578 or remainder to get floor rounding, once we have the remainder.
4579 Notice that we compute also the final remainder value here,
4580 and return the result right away. */
4581 if (target == 0 || GET_MODE (target) != compute_mode)
4582 target = gen_reg_rtx (compute_mode);
4584 if (rem_flag)
4586 remainder
4587 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4588 quotient = gen_reg_rtx (compute_mode);
4590 else
4592 quotient
4593 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4594 remainder = gen_reg_rtx (compute_mode);
4597 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4598 quotient, remainder, 0))
4600 /* This could be computed with a branch-less sequence.
4601 Save that for later. */
4602 rtx tem;
4603 rtx_code_label *label = gen_label_rtx ();
4604 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4605 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4606 NULL_RTX, 0, OPTAB_WIDEN);
4607 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4608 expand_dec (quotient, const1_rtx);
4609 expand_inc (remainder, op1);
4610 emit_label (label);
4611 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4614 /* No luck with division elimination or divmod. Have to do it
4615 by conditionally adjusting op0 *and* the result. */
4617 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4618 rtx adjusted_op0;
4619 rtx tem;
4621 quotient = gen_reg_rtx (compute_mode);
4622 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4623 label1 = gen_label_rtx ();
4624 label2 = gen_label_rtx ();
4625 label3 = gen_label_rtx ();
4626 label4 = gen_label_rtx ();
4627 label5 = gen_label_rtx ();
4628 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4629 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4630 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4631 quotient, 0, OPTAB_LIB_WIDEN);
4632 if (tem != quotient)
4633 emit_move_insn (quotient, tem);
4634 emit_jump_insn (gen_jump (label5));
4635 emit_barrier ();
4636 emit_label (label1);
4637 expand_inc (adjusted_op0, const1_rtx);
4638 emit_jump_insn (gen_jump (label4));
4639 emit_barrier ();
4640 emit_label (label2);
4641 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4642 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4643 quotient, 0, OPTAB_LIB_WIDEN);
4644 if (tem != quotient)
4645 emit_move_insn (quotient, tem);
4646 emit_jump_insn (gen_jump (label5));
4647 emit_barrier ();
4648 emit_label (label3);
4649 expand_dec (adjusted_op0, const1_rtx);
4650 emit_label (label4);
4651 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4652 quotient, 0, OPTAB_LIB_WIDEN);
4653 if (tem != quotient)
4654 emit_move_insn (quotient, tem);
4655 expand_dec (quotient, const1_rtx);
4656 emit_label (label5);
4658 break;
4660 case CEIL_DIV_EXPR:
4661 case CEIL_MOD_EXPR:
4662 if (unsignedp)
4664 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4666 rtx t1, t2, t3;
4667 unsigned HOST_WIDE_INT d = INTVAL (op1);
4668 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4669 floor_log2 (d), tquotient, 1);
4670 t2 = expand_binop (compute_mode, and_optab, op0,
4671 gen_int_mode (d - 1, compute_mode),
4672 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4673 t3 = gen_reg_rtx (compute_mode);
4674 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4675 compute_mode, 1, 1);
4676 if (t3 == 0)
4678 rtx_code_label *lab;
4679 lab = gen_label_rtx ();
4680 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4681 expand_inc (t1, const1_rtx);
4682 emit_label (lab);
4683 quotient = t1;
4685 else
4686 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4687 t1, t3),
4688 tquotient);
4689 break;
4692 /* Try using an instruction that produces both the quotient and
4693 remainder, using truncation. We can easily compensate the
4694 quotient or remainder to get ceiling rounding, once we have the
4695 remainder. Notice that we compute also the final remainder
4696 value here, and return the result right away. */
4697 if (target == 0 || GET_MODE (target) != compute_mode)
4698 target = gen_reg_rtx (compute_mode);
4700 if (rem_flag)
4702 remainder = (REG_P (target)
4703 ? target : gen_reg_rtx (compute_mode));
4704 quotient = gen_reg_rtx (compute_mode);
4706 else
4708 quotient = (REG_P (target)
4709 ? target : gen_reg_rtx (compute_mode));
4710 remainder = gen_reg_rtx (compute_mode);
4713 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4714 remainder, 1))
4716 /* This could be computed with a branch-less sequence.
4717 Save that for later. */
4718 rtx_code_label *label = gen_label_rtx ();
4719 do_cmp_and_jump (remainder, const0_rtx, EQ,
4720 compute_mode, label);
4721 expand_inc (quotient, const1_rtx);
4722 expand_dec (remainder, op1);
4723 emit_label (label);
4724 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4727 /* No luck with division elimination or divmod. Have to do it
4728 by conditionally adjusting op0 *and* the result. */
4730 rtx_code_label *label1, *label2;
4731 rtx adjusted_op0, tem;
4733 quotient = gen_reg_rtx (compute_mode);
4734 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4735 label1 = gen_label_rtx ();
4736 label2 = gen_label_rtx ();
4737 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4738 compute_mode, label1);
4739 emit_move_insn (quotient, const0_rtx);
4740 emit_jump_insn (gen_jump (label2));
4741 emit_barrier ();
4742 emit_label (label1);
4743 expand_dec (adjusted_op0, const1_rtx);
4744 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4745 quotient, 1, OPTAB_LIB_WIDEN);
4746 if (tem != quotient)
4747 emit_move_insn (quotient, tem);
4748 expand_inc (quotient, const1_rtx);
4749 emit_label (label2);
4752 else /* signed */
4754 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4755 && INTVAL (op1) >= 0)
4757 /* This is extremely similar to the code for the unsigned case
4758 above. For 2.7 we should merge these variants, but for
4759 2.6.1 I don't want to touch the code for unsigned since that
4760 get used in C. The signed case will only be used by other
4761 languages (Ada). */
4763 rtx t1, t2, t3;
4764 unsigned HOST_WIDE_INT d = INTVAL (op1);
4765 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4766 floor_log2 (d), tquotient, 0);
4767 t2 = expand_binop (compute_mode, and_optab, op0,
4768 gen_int_mode (d - 1, compute_mode),
4769 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4770 t3 = gen_reg_rtx (compute_mode);
4771 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4772 compute_mode, 1, 1);
4773 if (t3 == 0)
4775 rtx_code_label *lab;
4776 lab = gen_label_rtx ();
4777 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4778 expand_inc (t1, const1_rtx);
4779 emit_label (lab);
4780 quotient = t1;
4782 else
4783 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4784 t1, t3),
4785 tquotient);
4786 break;
4789 /* Try using an instruction that produces both the quotient and
4790 remainder, using truncation. We can easily compensate the
4791 quotient or remainder to get ceiling rounding, once we have the
4792 remainder. Notice that we compute also the final remainder
4793 value here, and return the result right away. */
4794 if (target == 0 || GET_MODE (target) != compute_mode)
4795 target = gen_reg_rtx (compute_mode);
4796 if (rem_flag)
4798 remainder= (REG_P (target)
4799 ? target : gen_reg_rtx (compute_mode));
4800 quotient = gen_reg_rtx (compute_mode);
4802 else
4804 quotient = (REG_P (target)
4805 ? target : gen_reg_rtx (compute_mode));
4806 remainder = gen_reg_rtx (compute_mode);
4809 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4810 remainder, 0))
4812 /* This could be computed with a branch-less sequence.
4813 Save that for later. */
4814 rtx tem;
4815 rtx_code_label *label = gen_label_rtx ();
4816 do_cmp_and_jump (remainder, const0_rtx, EQ,
4817 compute_mode, label);
4818 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4819 NULL_RTX, 0, OPTAB_WIDEN);
4820 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4821 expand_inc (quotient, const1_rtx);
4822 expand_dec (remainder, op1);
4823 emit_label (label);
4824 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4827 /* No luck with division elimination or divmod. Have to do it
4828 by conditionally adjusting op0 *and* the result. */
4830 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4831 rtx adjusted_op0;
4832 rtx tem;
4834 quotient = gen_reg_rtx (compute_mode);
4835 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4836 label1 = gen_label_rtx ();
4837 label2 = gen_label_rtx ();
4838 label3 = gen_label_rtx ();
4839 label4 = gen_label_rtx ();
4840 label5 = gen_label_rtx ();
4841 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4842 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4843 compute_mode, label1);
4844 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4845 quotient, 0, OPTAB_LIB_WIDEN);
4846 if (tem != quotient)
4847 emit_move_insn (quotient, tem);
4848 emit_jump_insn (gen_jump (label5));
4849 emit_barrier ();
4850 emit_label (label1);
4851 expand_dec (adjusted_op0, const1_rtx);
4852 emit_jump_insn (gen_jump (label4));
4853 emit_barrier ();
4854 emit_label (label2);
4855 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4856 compute_mode, label3);
4857 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4858 quotient, 0, OPTAB_LIB_WIDEN);
4859 if (tem != quotient)
4860 emit_move_insn (quotient, tem);
4861 emit_jump_insn (gen_jump (label5));
4862 emit_barrier ();
4863 emit_label (label3);
4864 expand_inc (adjusted_op0, const1_rtx);
4865 emit_label (label4);
4866 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4867 quotient, 0, OPTAB_LIB_WIDEN);
4868 if (tem != quotient)
4869 emit_move_insn (quotient, tem);
4870 expand_inc (quotient, const1_rtx);
4871 emit_label (label5);
4874 break;
4876 case EXACT_DIV_EXPR:
4877 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4879 HOST_WIDE_INT d = INTVAL (op1);
4880 unsigned HOST_WIDE_INT ml;
4881 int pre_shift;
4882 rtx t1;
4884 pre_shift = floor_log2 (d & -d);
4885 ml = invert_mod2n (d >> pre_shift, size);
4886 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4887 pre_shift, NULL_RTX, unsignedp);
4888 quotient = expand_mult (compute_mode, t1,
4889 gen_int_mode (ml, compute_mode),
4890 NULL_RTX, 1);
4892 insn = get_last_insn ();
4893 set_dst_reg_note (insn, REG_EQUAL,
4894 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4895 compute_mode, op0, op1),
4896 quotient);
4898 break;
4900 case ROUND_DIV_EXPR:
4901 case ROUND_MOD_EXPR:
4902 if (unsignedp)
4904 rtx tem;
4905 rtx_code_label *label;
4906 label = gen_label_rtx ();
4907 quotient = gen_reg_rtx (compute_mode);
4908 remainder = gen_reg_rtx (compute_mode);
4909 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4911 rtx tem;
4912 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4913 quotient, 1, OPTAB_LIB_WIDEN);
4914 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4915 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4916 remainder, 1, OPTAB_LIB_WIDEN);
4918 tem = plus_constant (compute_mode, op1, -1);
4919 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4920 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4921 expand_inc (quotient, const1_rtx);
4922 expand_dec (remainder, op1);
4923 emit_label (label);
4925 else
4927 rtx abs_rem, abs_op1, tem, mask;
4928 rtx_code_label *label;
4929 label = gen_label_rtx ();
4930 quotient = gen_reg_rtx (compute_mode);
4931 remainder = gen_reg_rtx (compute_mode);
4932 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4934 rtx tem;
4935 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4936 quotient, 0, OPTAB_LIB_WIDEN);
4937 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4938 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4939 remainder, 0, OPTAB_LIB_WIDEN);
4941 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4942 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4943 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4944 1, NULL_RTX, 1);
4945 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4946 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4947 NULL_RTX, 0, OPTAB_WIDEN);
4948 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4949 size - 1, NULL_RTX, 0);
4950 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4951 NULL_RTX, 0, OPTAB_WIDEN);
4952 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4953 NULL_RTX, 0, OPTAB_WIDEN);
4954 expand_inc (quotient, tem);
4955 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4956 NULL_RTX, 0, OPTAB_WIDEN);
4957 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4958 NULL_RTX, 0, OPTAB_WIDEN);
4959 expand_dec (remainder, tem);
4960 emit_label (label);
4962 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4964 default:
4965 gcc_unreachable ();
4968 if (quotient == 0)
4970 if (target && GET_MODE (target) != compute_mode)
4971 target = 0;
4973 if (rem_flag)
4975 /* Try to produce the remainder without producing the quotient.
4976 If we seem to have a divmod pattern that does not require widening,
4977 don't try widening here. We should really have a WIDEN argument
4978 to expand_twoval_binop, since what we'd really like to do here is
4979 1) try a mod insn in compute_mode
4980 2) try a divmod insn in compute_mode
4981 3) try a div insn in compute_mode and multiply-subtract to get
4982 remainder
4983 4) try the same things with widening allowed. */
4984 remainder
4985 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4986 op0, op1, target,
4987 unsignedp,
4988 ((optab_handler (optab2, compute_mode)
4989 != CODE_FOR_nothing)
4990 ? OPTAB_DIRECT : OPTAB_WIDEN));
4991 if (remainder == 0)
4993 /* No luck there. Can we do remainder and divide at once
4994 without a library call? */
4995 remainder = gen_reg_rtx (compute_mode);
4996 if (! expand_twoval_binop ((unsignedp
4997 ? udivmod_optab
4998 : sdivmod_optab),
4999 op0, op1,
5000 NULL_RTX, remainder, unsignedp))
5001 remainder = 0;
5004 if (remainder)
5005 return gen_lowpart (mode, remainder);
5008 /* Produce the quotient. Try a quotient insn, but not a library call.
5009 If we have a divmod in this mode, use it in preference to widening
5010 the div (for this test we assume it will not fail). Note that optab2
5011 is set to the one of the two optabs that the call below will use. */
5012 quotient
5013 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5014 op0, op1, rem_flag ? NULL_RTX : target,
5015 unsignedp,
5016 ((optab_handler (optab2, compute_mode)
5017 != CODE_FOR_nothing)
5018 ? OPTAB_DIRECT : OPTAB_WIDEN));
5020 if (quotient == 0)
5022 /* No luck there. Try a quotient-and-remainder insn,
5023 keeping the quotient alone. */
5024 quotient = gen_reg_rtx (compute_mode);
5025 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5026 op0, op1,
5027 quotient, NULL_RTX, unsignedp))
5029 quotient = 0;
5030 if (! rem_flag)
5031 /* Still no luck. If we are not computing the remainder,
5032 use a library call for the quotient. */
5033 quotient = sign_expand_binop (compute_mode,
5034 udiv_optab, sdiv_optab,
5035 op0, op1, target,
5036 unsignedp, OPTAB_LIB_WIDEN);
5041 if (rem_flag)
5043 if (target && GET_MODE (target) != compute_mode)
5044 target = 0;
5046 if (quotient == 0)
5048 /* No divide instruction either. Use library for remainder. */
5049 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5050 op0, op1, target,
5051 unsignedp, OPTAB_LIB_WIDEN);
5052 /* No remainder function. Try a quotient-and-remainder
5053 function, keeping the remainder. */
5054 if (!remainder)
5056 remainder = gen_reg_rtx (compute_mode);
5057 if (!expand_twoval_binop_libfunc
5058 (unsignedp ? udivmod_optab : sdivmod_optab,
5059 op0, op1,
5060 NULL_RTX, remainder,
5061 unsignedp ? UMOD : MOD))
5062 remainder = NULL_RTX;
5065 else
5067 /* We divided. Now finish doing X - Y * (X / Y). */
5068 remainder = expand_mult (compute_mode, quotient, op1,
5069 NULL_RTX, unsignedp);
5070 remainder = expand_binop (compute_mode, sub_optab, op0,
5071 remainder, target, unsignedp,
5072 OPTAB_LIB_WIDEN);
5076 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5079 /* Return a tree node with data type TYPE, describing the value of X.
5080 Usually this is an VAR_DECL, if there is no obvious better choice.
5081 X may be an expression, however we only support those expressions
5082 generated by loop.c. */
5084 tree
5085 make_tree (tree type, rtx x)
5087 tree t;
5089 switch (GET_CODE (x))
5091 case CONST_INT:
5092 case CONST_WIDE_INT:
5093 t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5094 return t;
5096 case CONST_DOUBLE:
5097 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5098 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5099 t = wide_int_to_tree (type,
5100 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5101 HOST_BITS_PER_WIDE_INT * 2));
5102 else
5104 REAL_VALUE_TYPE d;
5106 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5107 t = build_real (type, d);
5110 return t;
5112 case CONST_VECTOR:
5114 int units = CONST_VECTOR_NUNITS (x);
5115 tree itype = TREE_TYPE (type);
5116 tree *elts;
5117 int i;
5119 /* Build a tree with vector elements. */
5120 elts = XALLOCAVEC (tree, units);
5121 for (i = units - 1; i >= 0; --i)
5123 rtx elt = CONST_VECTOR_ELT (x, i);
5124 elts[i] = make_tree (itype, elt);
5127 return build_vector (type, elts);
5130 case PLUS:
5131 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5132 make_tree (type, XEXP (x, 1)));
5134 case MINUS:
5135 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5136 make_tree (type, XEXP (x, 1)));
5138 case NEG:
5139 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5141 case MULT:
5142 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5143 make_tree (type, XEXP (x, 1)));
5145 case ASHIFT:
5146 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5147 make_tree (type, XEXP (x, 1)));
5149 case LSHIFTRT:
5150 t = unsigned_type_for (type);
5151 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5152 make_tree (t, XEXP (x, 0)),
5153 make_tree (type, XEXP (x, 1))));
5155 case ASHIFTRT:
5156 t = signed_type_for (type);
5157 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5158 make_tree (t, XEXP (x, 0)),
5159 make_tree (type, XEXP (x, 1))));
5161 case DIV:
5162 if (TREE_CODE (type) != REAL_TYPE)
5163 t = signed_type_for (type);
5164 else
5165 t = type;
5167 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5168 make_tree (t, XEXP (x, 0)),
5169 make_tree (t, XEXP (x, 1))));
5170 case UDIV:
5171 t = unsigned_type_for (type);
5172 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5173 make_tree (t, XEXP (x, 0)),
5174 make_tree (t, XEXP (x, 1))));
5176 case SIGN_EXTEND:
5177 case ZERO_EXTEND:
5178 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5179 GET_CODE (x) == ZERO_EXTEND);
5180 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5182 case CONST:
5183 return make_tree (type, XEXP (x, 0));
5185 case SYMBOL_REF:
5186 t = SYMBOL_REF_DECL (x);
5187 if (t)
5188 return fold_convert (type, build_fold_addr_expr (t));
5189 /* else fall through. */
5191 default:
5192 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5194 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5195 address mode to pointer mode. */
5196 if (POINTER_TYPE_P (type))
5197 x = convert_memory_address_addr_space
5198 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5200 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5201 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5202 t->decl_with_rtl.rtl = x;
5204 return t;
5208 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5209 and returning TARGET.
5211 If TARGET is 0, a pseudo-register or constant is returned. */
5214 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5216 rtx tem = 0;
5218 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5219 tem = simplify_binary_operation (AND, mode, op0, op1);
5220 if (tem == 0)
5221 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5223 if (target == 0)
5224 target = tem;
5225 else if (tem != target)
5226 emit_move_insn (target, tem);
5227 return target;
5230 /* Helper function for emit_store_flag. */
5232 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5233 machine_mode mode, machine_mode compare_mode,
5234 int unsignedp, rtx x, rtx y, int normalizep,
5235 machine_mode target_mode)
5237 struct expand_operand ops[4];
5238 rtx op0, comparison, subtarget;
5239 rtx_insn *last;
5240 machine_mode result_mode = targetm.cstore_mode (icode);
5242 last = get_last_insn ();
5243 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5244 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5245 if (!x || !y)
5247 delete_insns_since (last);
5248 return NULL_RTX;
5251 if (target_mode == VOIDmode)
5252 target_mode = result_mode;
5253 if (!target)
5254 target = gen_reg_rtx (target_mode);
5256 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5258 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5259 create_fixed_operand (&ops[1], comparison);
5260 create_fixed_operand (&ops[2], x);
5261 create_fixed_operand (&ops[3], y);
5262 if (!maybe_expand_insn (icode, 4, ops))
5264 delete_insns_since (last);
5265 return NULL_RTX;
5267 subtarget = ops[0].value;
5269 /* If we are converting to a wider mode, first convert to
5270 TARGET_MODE, then normalize. This produces better combining
5271 opportunities on machines that have a SIGN_EXTRACT when we are
5272 testing a single bit. This mostly benefits the 68k.
5274 If STORE_FLAG_VALUE does not have the sign bit set when
5275 interpreted in MODE, we can do this conversion as unsigned, which
5276 is usually more efficient. */
5277 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5279 convert_move (target, subtarget,
5280 val_signbit_known_clear_p (result_mode,
5281 STORE_FLAG_VALUE));
5282 op0 = target;
5283 result_mode = target_mode;
5285 else
5286 op0 = subtarget;
5288 /* If we want to keep subexpressions around, don't reuse our last
5289 target. */
5290 if (optimize)
5291 subtarget = 0;
5293 /* Now normalize to the proper value in MODE. Sometimes we don't
5294 have to do anything. */
5295 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5297 /* STORE_FLAG_VALUE might be the most negative number, so write
5298 the comparison this way to avoid a compiler-time warning. */
5299 else if (- normalizep == STORE_FLAG_VALUE)
5300 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5302 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5303 it hard to use a value of just the sign bit due to ANSI integer
5304 constant typing rules. */
5305 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5306 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5307 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5308 normalizep == 1);
5309 else
5311 gcc_assert (STORE_FLAG_VALUE & 1);
5313 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5314 if (normalizep == -1)
5315 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5318 /* If we were converting to a smaller mode, do the conversion now. */
5319 if (target_mode != result_mode)
5321 convert_move (target, op0, 0);
5322 return target;
5324 else
5325 return op0;
5329 /* A subroutine of emit_store_flag only including "tricks" that do not
5330 need a recursive call. These are kept separate to avoid infinite
5331 loops. */
5333 static rtx
5334 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5335 machine_mode mode, int unsignedp, int normalizep,
5336 machine_mode target_mode)
5338 rtx subtarget;
5339 enum insn_code icode;
5340 machine_mode compare_mode;
5341 enum mode_class mclass;
5342 enum rtx_code scode;
5343 rtx tem;
5345 if (unsignedp)
5346 code = unsigned_condition (code);
5347 scode = swap_condition (code);
5349 /* If one operand is constant, make it the second one. Only do this
5350 if the other operand is not constant as well. */
5352 if (swap_commutative_operands_p (op0, op1))
5354 tem = op0;
5355 op0 = op1;
5356 op1 = tem;
5357 code = swap_condition (code);
5360 if (mode == VOIDmode)
5361 mode = GET_MODE (op0);
5363 /* For some comparisons with 1 and -1, we can convert this to
5364 comparisons with zero. This will often produce more opportunities for
5365 store-flag insns. */
5367 switch (code)
5369 case LT:
5370 if (op1 == const1_rtx)
5371 op1 = const0_rtx, code = LE;
5372 break;
5373 case LE:
5374 if (op1 == constm1_rtx)
5375 op1 = const0_rtx, code = LT;
5376 break;
5377 case GE:
5378 if (op1 == const1_rtx)
5379 op1 = const0_rtx, code = GT;
5380 break;
5381 case GT:
5382 if (op1 == constm1_rtx)
5383 op1 = const0_rtx, code = GE;
5384 break;
5385 case GEU:
5386 if (op1 == const1_rtx)
5387 op1 = const0_rtx, code = NE;
5388 break;
5389 case LTU:
5390 if (op1 == const1_rtx)
5391 op1 = const0_rtx, code = EQ;
5392 break;
5393 default:
5394 break;
5397 /* If we are comparing a double-word integer with zero or -1, we can
5398 convert the comparison into one involving a single word. */
5399 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5400 && GET_MODE_CLASS (mode) == MODE_INT
5401 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5403 if ((code == EQ || code == NE)
5404 && (op1 == const0_rtx || op1 == constm1_rtx))
5406 rtx op00, op01;
5408 /* Do a logical OR or AND of the two words and compare the
5409 result. */
5410 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5411 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5412 tem = expand_binop (word_mode,
5413 op1 == const0_rtx ? ior_optab : and_optab,
5414 op00, op01, NULL_RTX, unsignedp,
5415 OPTAB_DIRECT);
5417 if (tem != 0)
5418 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5419 unsignedp, normalizep);
5421 else if ((code == LT || code == GE) && op1 == const0_rtx)
5423 rtx op0h;
5425 /* If testing the sign bit, can just test on high word. */
5426 op0h = simplify_gen_subreg (word_mode, op0, mode,
5427 subreg_highpart_offset (word_mode,
5428 mode));
5429 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5430 unsignedp, normalizep);
5432 else
5433 tem = NULL_RTX;
5435 if (tem)
5437 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5438 return tem;
5439 if (!target)
5440 target = gen_reg_rtx (target_mode);
5442 convert_move (target, tem,
5443 !val_signbit_known_set_p (word_mode,
5444 (normalizep ? normalizep
5445 : STORE_FLAG_VALUE)));
5446 return target;
5450 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5451 complement of A (for GE) and shifting the sign bit to the low bit. */
5452 if (op1 == const0_rtx && (code == LT || code == GE)
5453 && GET_MODE_CLASS (mode) == MODE_INT
5454 && (normalizep || STORE_FLAG_VALUE == 1
5455 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5457 subtarget = target;
5459 if (!target)
5460 target_mode = mode;
5462 /* If the result is to be wider than OP0, it is best to convert it
5463 first. If it is to be narrower, it is *incorrect* to convert it
5464 first. */
5465 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5467 op0 = convert_modes (target_mode, mode, op0, 0);
5468 mode = target_mode;
5471 if (target_mode != mode)
5472 subtarget = 0;
5474 if (code == GE)
5475 op0 = expand_unop (mode, one_cmpl_optab, op0,
5476 ((STORE_FLAG_VALUE == 1 || normalizep)
5477 ? 0 : subtarget), 0);
5479 if (STORE_FLAG_VALUE == 1 || normalizep)
5480 /* If we are supposed to produce a 0/1 value, we want to do
5481 a logical shift from the sign bit to the low-order bit; for
5482 a -1/0 value, we do an arithmetic shift. */
5483 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5484 GET_MODE_BITSIZE (mode) - 1,
5485 subtarget, normalizep != -1);
5487 if (mode != target_mode)
5488 op0 = convert_modes (target_mode, mode, op0, 0);
5490 return op0;
5493 mclass = GET_MODE_CLASS (mode);
5494 for (compare_mode = mode; compare_mode != VOIDmode;
5495 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5497 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5498 icode = optab_handler (cstore_optab, optab_mode);
5499 if (icode != CODE_FOR_nothing)
5501 do_pending_stack_adjust ();
5502 tem = emit_cstore (target, icode, code, mode, compare_mode,
5503 unsignedp, op0, op1, normalizep, target_mode);
5504 if (tem)
5505 return tem;
5507 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5509 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5510 unsignedp, op1, op0, normalizep, target_mode);
5511 if (tem)
5512 return tem;
5514 break;
5518 return 0;
5521 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5522 and storing in TARGET. Normally return TARGET.
5523 Return 0 if that cannot be done.
5525 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5526 it is VOIDmode, they cannot both be CONST_INT.
5528 UNSIGNEDP is for the case where we have to widen the operands
5529 to perform the operation. It says to use zero-extension.
5531 NORMALIZEP is 1 if we should convert the result to be either zero
5532 or one. Normalize is -1 if we should convert the result to be
5533 either zero or -1. If NORMALIZEP is zero, the result will be left
5534 "raw" out of the scc insn. */
5537 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5538 machine_mode mode, int unsignedp, int normalizep)
5540 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5541 enum rtx_code rcode;
5542 rtx subtarget;
5543 rtx tem, trueval;
5544 rtx_insn *last;
5546 /* If we compare constants, we shouldn't use a store-flag operation,
5547 but a constant load. We can get there via the vanilla route that
5548 usually generates a compare-branch sequence, but will in this case
5549 fold the comparison to a constant, and thus elide the branch. */
5550 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5551 return NULL_RTX;
5553 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5554 target_mode);
5555 if (tem)
5556 return tem;
5558 /* If we reached here, we can't do this with a scc insn, however there
5559 are some comparisons that can be done in other ways. Don't do any
5560 of these cases if branches are very cheap. */
5561 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5562 return 0;
5564 /* See what we need to return. We can only return a 1, -1, or the
5565 sign bit. */
5567 if (normalizep == 0)
5569 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5570 normalizep = STORE_FLAG_VALUE;
5572 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5574 else
5575 return 0;
5578 last = get_last_insn ();
5580 /* If optimizing, use different pseudo registers for each insn, instead
5581 of reusing the same pseudo. This leads to better CSE, but slows
5582 down the compiler, since there are more pseudos */
5583 subtarget = (!optimize
5584 && (target_mode == mode)) ? target : NULL_RTX;
5585 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5587 /* For floating-point comparisons, try the reverse comparison or try
5588 changing the "orderedness" of the comparison. */
5589 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5591 enum rtx_code first_code;
5592 bool and_them;
5594 rcode = reverse_condition_maybe_unordered (code);
5595 if (can_compare_p (rcode, mode, ccp_store_flag)
5596 && (code == ORDERED || code == UNORDERED
5597 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5598 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5600 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5601 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5603 /* For the reverse comparison, use either an addition or a XOR. */
5604 if (want_add
5605 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5606 optimize_insn_for_speed_p ()) == 0)
5608 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5609 STORE_FLAG_VALUE, target_mode);
5610 if (tem)
5611 return expand_binop (target_mode, add_optab, tem,
5612 gen_int_mode (normalizep, target_mode),
5613 target, 0, OPTAB_WIDEN);
5615 else if (!want_add
5616 && rtx_cost (trueval, XOR, 1,
5617 optimize_insn_for_speed_p ()) == 0)
5619 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5620 normalizep, target_mode);
5621 if (tem)
5622 return expand_binop (target_mode, xor_optab, tem, trueval,
5623 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5627 delete_insns_since (last);
5629 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5630 if (code == ORDERED || code == UNORDERED)
5631 return 0;
5633 and_them = split_comparison (code, mode, &first_code, &code);
5635 /* If there are no NaNs, the first comparison should always fall through.
5636 Effectively change the comparison to the other one. */
5637 if (!HONOR_NANS (mode))
5639 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5640 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5641 target_mode);
5644 #ifdef HAVE_conditional_move
5645 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5646 conditional move. */
5647 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5648 normalizep, target_mode);
5649 if (tem == 0)
5650 return 0;
5652 if (and_them)
5653 tem = emit_conditional_move (target, code, op0, op1, mode,
5654 tem, const0_rtx, GET_MODE (tem), 0);
5655 else
5656 tem = emit_conditional_move (target, code, op0, op1, mode,
5657 trueval, tem, GET_MODE (tem), 0);
5659 if (tem == 0)
5660 delete_insns_since (last);
5661 return tem;
5662 #else
5663 return 0;
5664 #endif
5667 /* The remaining tricks only apply to integer comparisons. */
5669 if (GET_MODE_CLASS (mode) != MODE_INT)
5670 return 0;
5672 /* If this is an equality comparison of integers, we can try to exclusive-or
5673 (or subtract) the two operands and use a recursive call to try the
5674 comparison with zero. Don't do any of these cases if branches are
5675 very cheap. */
5677 if ((code == EQ || code == NE) && op1 != const0_rtx)
5679 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5680 OPTAB_WIDEN);
5682 if (tem == 0)
5683 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5684 OPTAB_WIDEN);
5685 if (tem != 0)
5686 tem = emit_store_flag (target, code, tem, const0_rtx,
5687 mode, unsignedp, normalizep);
5688 if (tem != 0)
5689 return tem;
5691 delete_insns_since (last);
5694 /* For integer comparisons, try the reverse comparison. However, for
5695 small X and if we'd have anyway to extend, implementing "X != 0"
5696 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5697 rcode = reverse_condition (code);
5698 if (can_compare_p (rcode, mode, ccp_store_flag)
5699 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5700 && code == NE
5701 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5702 && op1 == const0_rtx))
5704 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5705 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5707 /* Again, for the reverse comparison, use either an addition or a XOR. */
5708 if (want_add
5709 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5710 optimize_insn_for_speed_p ()) == 0)
5712 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5713 STORE_FLAG_VALUE, target_mode);
5714 if (tem != 0)
5715 tem = expand_binop (target_mode, add_optab, tem,
5716 gen_int_mode (normalizep, target_mode),
5717 target, 0, OPTAB_WIDEN);
5719 else if (!want_add
5720 && rtx_cost (trueval, XOR, 1,
5721 optimize_insn_for_speed_p ()) == 0)
5723 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5724 normalizep, target_mode);
5725 if (tem != 0)
5726 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5727 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5730 if (tem != 0)
5731 return tem;
5732 delete_insns_since (last);
5735 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5736 the constant zero. Reject all other comparisons at this point. Only
5737 do LE and GT if branches are expensive since they are expensive on
5738 2-operand machines. */
5740 if (op1 != const0_rtx
5741 || (code != EQ && code != NE
5742 && (BRANCH_COST (optimize_insn_for_speed_p (),
5743 false) <= 1 || (code != LE && code != GT))))
5744 return 0;
5746 /* Try to put the result of the comparison in the sign bit. Assume we can't
5747 do the necessary operation below. */
5749 tem = 0;
5751 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5752 the sign bit set. */
5754 if (code == LE)
5756 /* This is destructive, so SUBTARGET can't be OP0. */
5757 if (rtx_equal_p (subtarget, op0))
5758 subtarget = 0;
5760 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5761 OPTAB_WIDEN);
5762 if (tem)
5763 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5764 OPTAB_WIDEN);
5767 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5768 number of bits in the mode of OP0, minus one. */
5770 if (code == GT)
5772 if (rtx_equal_p (subtarget, op0))
5773 subtarget = 0;
5775 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5776 GET_MODE_BITSIZE (mode) - 1,
5777 subtarget, 0);
5778 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5779 OPTAB_WIDEN);
5782 if (code == EQ || code == NE)
5784 /* For EQ or NE, one way to do the comparison is to apply an operation
5785 that converts the operand into a positive number if it is nonzero
5786 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5787 for NE we negate. This puts the result in the sign bit. Then we
5788 normalize with a shift, if needed.
5790 Two operations that can do the above actions are ABS and FFS, so try
5791 them. If that doesn't work, and MODE is smaller than a full word,
5792 we can use zero-extension to the wider mode (an unsigned conversion)
5793 as the operation. */
5795 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5796 that is compensated by the subsequent overflow when subtracting
5797 one / negating. */
5799 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5800 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5801 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5802 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5803 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5805 tem = convert_modes (word_mode, mode, op0, 1);
5806 mode = word_mode;
5809 if (tem != 0)
5811 if (code == EQ)
5812 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5813 0, OPTAB_WIDEN);
5814 else
5815 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5818 /* If we couldn't do it that way, for NE we can "or" the two's complement
5819 of the value with itself. For EQ, we take the one's complement of
5820 that "or", which is an extra insn, so we only handle EQ if branches
5821 are expensive. */
5823 if (tem == 0
5824 && (code == NE
5825 || BRANCH_COST (optimize_insn_for_speed_p (),
5826 false) > 1))
5828 if (rtx_equal_p (subtarget, op0))
5829 subtarget = 0;
5831 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5832 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5833 OPTAB_WIDEN);
5835 if (tem && code == EQ)
5836 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5840 if (tem && normalizep)
5841 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5842 GET_MODE_BITSIZE (mode) - 1,
5843 subtarget, normalizep == 1);
5845 if (tem)
5847 if (!target)
5849 else if (GET_MODE (tem) != target_mode)
5851 convert_move (target, tem, 0);
5852 tem = target;
5854 else if (!subtarget)
5856 emit_move_insn (target, tem);
5857 tem = target;
5860 else
5861 delete_insns_since (last);
5863 return tem;
5866 /* Like emit_store_flag, but always succeeds. */
5869 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5870 machine_mode mode, int unsignedp, int normalizep)
5872 rtx tem;
5873 rtx_code_label *label;
5874 rtx trueval, falseval;
5876 /* First see if emit_store_flag can do the job. */
5877 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5878 if (tem != 0)
5879 return tem;
5881 if (!target)
5882 target = gen_reg_rtx (word_mode);
5884 /* If this failed, we have to do this with set/compare/jump/set code.
5885 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5886 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5887 if (code == NE
5888 && GET_MODE_CLASS (mode) == MODE_INT
5889 && REG_P (target)
5890 && op0 == target
5891 && op1 == const0_rtx)
5893 label = gen_label_rtx ();
5894 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5895 mode, NULL_RTX, NULL_RTX, label, -1);
5896 emit_move_insn (target, trueval);
5897 emit_label (label);
5898 return target;
5901 if (!REG_P (target)
5902 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5903 target = gen_reg_rtx (GET_MODE (target));
5905 /* Jump in the right direction if the target cannot implement CODE
5906 but can jump on its reverse condition. */
5907 falseval = const0_rtx;
5908 if (! can_compare_p (code, mode, ccp_jump)
5909 && (! FLOAT_MODE_P (mode)
5910 || code == ORDERED || code == UNORDERED
5911 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5912 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5914 enum rtx_code rcode;
5915 if (FLOAT_MODE_P (mode))
5916 rcode = reverse_condition_maybe_unordered (code);
5917 else
5918 rcode = reverse_condition (code);
5920 /* Canonicalize to UNORDERED for the libcall. */
5921 if (can_compare_p (rcode, mode, ccp_jump)
5922 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5924 falseval = trueval;
5925 trueval = const0_rtx;
5926 code = rcode;
5930 emit_move_insn (target, trueval);
5931 label = gen_label_rtx ();
5932 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5933 NULL_RTX, label, -1);
5935 emit_move_insn (target, falseval);
5936 emit_label (label);
5938 return target;
5941 /* Perform possibly multi-word comparison and conditional jump to LABEL
5942 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5943 now a thin wrapper around do_compare_rtx_and_jump. */
5945 static void
5946 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5947 rtx_code_label *label)
5949 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5950 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5951 NULL_RTX, NULL_RTX, label, -1);