2015-06-11 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / expmed.c
blob70123bfa5c419dd3683302dd7a19be1803ba39b9
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2015 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "input.h"
29 #include "alias.h"
30 #include "symtab.h"
31 #include "tree.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "tm_p.h"
35 #include "flags.h"
36 #include "insn-config.h"
37 #include "hard-reg-set.h"
38 #include "function.h"
39 #include "expmed.h"
40 #include "dojump.h"
41 #include "explow.h"
42 #include "calls.h"
43 #include "emit-rtl.h"
44 #include "varasm.h"
45 #include "stmt.h"
46 #include "expr.h"
47 #include "insn-codes.h"
48 #include "optabs.h"
49 #include "recog.h"
50 #include "langhooks.h"
51 #include "predict.h"
52 #include "basic-block.h"
53 #include "df.h"
54 #include "target.h"
56 struct target_expmed default_target_expmed;
57 #if SWITCHABLE_TARGET
58 struct target_expmed *this_target_expmed = &default_target_expmed;
59 #endif
61 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT,
63 unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT,
65 rtx);
66 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
67 unsigned HOST_WIDE_INT,
68 rtx);
69 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
70 unsigned HOST_WIDE_INT,
71 unsigned HOST_WIDE_INT,
72 unsigned HOST_WIDE_INT,
73 rtx);
74 static rtx extract_fixed_bit_field (machine_mode, rtx,
75 unsigned HOST_WIDE_INT,
76 unsigned HOST_WIDE_INT, rtx, int);
77 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
78 unsigned HOST_WIDE_INT,
79 unsigned HOST_WIDE_INT, rtx, int);
80 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
81 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
82 unsigned HOST_WIDE_INT, int);
83 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
84 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
85 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
87 /* Return a constant integer mask value of mode MODE with BITSIZE ones
88 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
89 The mask is truncated if necessary to the width of mode MODE. The
90 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
92 static inline rtx
93 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
95 return immed_wide_int_const
96 (wi::shifted_mask (bitpos, bitsize, complement,
97 GET_MODE_PRECISION (mode)), mode);
100 /* Test whether a value is zero of a power of two. */
101 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
102 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
104 struct init_expmed_rtl
106 rtx reg;
107 rtx plus;
108 rtx neg;
109 rtx mult;
110 rtx sdiv;
111 rtx udiv;
112 rtx sdiv_32;
113 rtx smod_32;
114 rtx wide_mult;
115 rtx wide_lshr;
116 rtx wide_trunc;
117 rtx shift;
118 rtx shift_mult;
119 rtx shift_add;
120 rtx shift_sub0;
121 rtx shift_sub1;
122 rtx zext;
123 rtx trunc;
125 rtx pow2[MAX_BITS_PER_WORD];
126 rtx cint[MAX_BITS_PER_WORD];
129 static void
130 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
131 machine_mode from_mode, bool speed)
133 int to_size, from_size;
134 rtx which;
136 to_size = GET_MODE_PRECISION (to_mode);
137 from_size = GET_MODE_PRECISION (from_mode);
139 /* Most partial integers have a precision less than the "full"
140 integer it requires for storage. In case one doesn't, for
141 comparison purposes here, reduce the bit size by one in that
142 case. */
143 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
144 && exact_log2 (to_size) != -1)
145 to_size --;
146 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
147 && exact_log2 (from_size) != -1)
148 from_size --;
150 /* Assume cost of zero-extend and sign-extend is the same. */
151 which = (to_size < from_size ? all->trunc : all->zext);
153 PUT_MODE (all->reg, from_mode);
154 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
157 static void
158 init_expmed_one_mode (struct init_expmed_rtl *all,
159 machine_mode mode, int speed)
161 int m, n, mode_bitsize;
162 machine_mode mode_from;
164 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
166 PUT_MODE (all->reg, mode);
167 PUT_MODE (all->plus, mode);
168 PUT_MODE (all->neg, mode);
169 PUT_MODE (all->mult, mode);
170 PUT_MODE (all->sdiv, mode);
171 PUT_MODE (all->udiv, mode);
172 PUT_MODE (all->sdiv_32, mode);
173 PUT_MODE (all->smod_32, mode);
174 PUT_MODE (all->wide_trunc, mode);
175 PUT_MODE (all->shift, mode);
176 PUT_MODE (all->shift_mult, mode);
177 PUT_MODE (all->shift_add, mode);
178 PUT_MODE (all->shift_sub0, mode);
179 PUT_MODE (all->shift_sub1, mode);
180 PUT_MODE (all->zext, mode);
181 PUT_MODE (all->trunc, mode);
183 set_add_cost (speed, mode, set_src_cost (all->plus, speed));
184 set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
185 set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
186 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
187 set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
189 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
190 <= 2 * add_cost (speed, mode)));
191 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
192 <= 4 * add_cost (speed, mode)));
194 set_shift_cost (speed, mode, 0, 0);
196 int cost = add_cost (speed, mode);
197 set_shiftadd_cost (speed, mode, 0, cost);
198 set_shiftsub0_cost (speed, mode, 0, cost);
199 set_shiftsub1_cost (speed, mode, 0, cost);
202 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
203 for (m = 1; m < n; m++)
205 XEXP (all->shift, 1) = all->cint[m];
206 XEXP (all->shift_mult, 1) = all->pow2[m];
208 set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
209 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
210 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
211 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
214 if (SCALAR_INT_MODE_P (mode))
216 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
217 mode_from = (machine_mode)(mode_from + 1))
218 init_expmed_one_conv (all, mode, mode_from, speed);
220 if (GET_MODE_CLASS (mode) == MODE_INT)
222 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
223 if (wider_mode != VOIDmode)
225 PUT_MODE (all->zext, wider_mode);
226 PUT_MODE (all->wide_mult, wider_mode);
227 PUT_MODE (all->wide_lshr, wider_mode);
228 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
230 set_mul_widen_cost (speed, wider_mode,
231 set_src_cost (all->wide_mult, speed));
232 set_mul_highpart_cost (speed, mode,
233 set_src_cost (all->wide_trunc, speed));
238 void
239 init_expmed (void)
241 struct init_expmed_rtl all;
242 machine_mode mode = QImode;
243 int m, speed;
245 memset (&all, 0, sizeof all);
246 for (m = 1; m < MAX_BITS_PER_WORD; m++)
248 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
249 all.cint[m] = GEN_INT (m);
252 /* Avoid using hard regs in ways which may be unsupported. */
253 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
254 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
255 all.neg = gen_rtx_NEG (mode, all.reg);
256 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
257 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
258 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
259 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
260 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
261 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
262 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
263 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
264 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
265 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
266 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
267 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
268 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
269 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
270 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
272 for (speed = 0; speed < 2; speed++)
274 crtl->maybe_hot_insn_p = speed;
275 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
277 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
278 mode = (machine_mode)(mode + 1))
279 init_expmed_one_mode (&all, mode, speed);
281 if (MIN_MODE_PARTIAL_INT != VOIDmode)
282 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
283 mode = (machine_mode)(mode + 1))
284 init_expmed_one_mode (&all, mode, speed);
286 if (MIN_MODE_VECTOR_INT != VOIDmode)
287 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
288 mode = (machine_mode)(mode + 1))
289 init_expmed_one_mode (&all, mode, speed);
292 if (alg_hash_used_p ())
294 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
295 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
297 else
298 set_alg_hash_used_p (true);
299 default_rtl_profile ();
301 ggc_free (all.trunc);
302 ggc_free (all.shift_sub1);
303 ggc_free (all.shift_sub0);
304 ggc_free (all.shift_add);
305 ggc_free (all.shift_mult);
306 ggc_free (all.shift);
307 ggc_free (all.wide_trunc);
308 ggc_free (all.wide_lshr);
309 ggc_free (all.wide_mult);
310 ggc_free (all.zext);
311 ggc_free (all.smod_32);
312 ggc_free (all.sdiv_32);
313 ggc_free (all.udiv);
314 ggc_free (all.sdiv);
315 ggc_free (all.mult);
316 ggc_free (all.neg);
317 ggc_free (all.plus);
318 ggc_free (all.reg);
321 /* Return an rtx representing minus the value of X.
322 MODE is the intended mode of the result,
323 useful if X is a CONST_INT. */
326 negate_rtx (machine_mode mode, rtx x)
328 rtx result = simplify_unary_operation (NEG, mode, x, mode);
330 if (result == 0)
331 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
333 return result;
336 /* Adjust bitfield memory MEM so that it points to the first unit of mode
337 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
338 If MODE is BLKmode, return a reference to every byte in the bitfield.
339 Set *NEW_BITNUM to the bit position of the field within the new memory. */
341 static rtx
342 narrow_bit_field_mem (rtx mem, machine_mode mode,
343 unsigned HOST_WIDE_INT bitsize,
344 unsigned HOST_WIDE_INT bitnum,
345 unsigned HOST_WIDE_INT *new_bitnum)
347 if (mode == BLKmode)
349 *new_bitnum = bitnum % BITS_PER_UNIT;
350 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
351 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
352 / BITS_PER_UNIT);
353 return adjust_bitfield_address_size (mem, mode, offset, size);
355 else
357 unsigned int unit = GET_MODE_BITSIZE (mode);
358 *new_bitnum = bitnum % unit;
359 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
360 return adjust_bitfield_address (mem, mode, offset);
364 /* The caller wants to perform insertion or extraction PATTERN on a
365 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
366 BITREGION_START and BITREGION_END are as for store_bit_field
367 and FIELDMODE is the natural mode of the field.
369 Search for a mode that is compatible with the memory access
370 restrictions and (where applicable) with a register insertion or
371 extraction. Return the new memory on success, storing the adjusted
372 bit position in *NEW_BITNUM. Return null otherwise. */
374 static rtx
375 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
376 rtx op0, HOST_WIDE_INT bitsize,
377 HOST_WIDE_INT bitnum,
378 unsigned HOST_WIDE_INT bitregion_start,
379 unsigned HOST_WIDE_INT bitregion_end,
380 machine_mode fieldmode,
381 unsigned HOST_WIDE_INT *new_bitnum)
383 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
384 bitregion_end, MEM_ALIGN (op0),
385 MEM_VOLATILE_P (op0));
386 machine_mode best_mode;
387 if (iter.next_mode (&best_mode))
389 /* We can use a memory in BEST_MODE. See whether this is true for
390 any wider modes. All other things being equal, we prefer to
391 use the widest mode possible because it tends to expose more
392 CSE opportunities. */
393 if (!iter.prefer_smaller_modes ())
395 /* Limit the search to the mode required by the corresponding
396 register insertion or extraction instruction, if any. */
397 machine_mode limit_mode = word_mode;
398 extraction_insn insn;
399 if (get_best_reg_extraction_insn (&insn, pattern,
400 GET_MODE_BITSIZE (best_mode),
401 fieldmode))
402 limit_mode = insn.field_mode;
404 machine_mode wider_mode;
405 while (iter.next_mode (&wider_mode)
406 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
407 best_mode = wider_mode;
409 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
410 new_bitnum);
412 return NULL_RTX;
415 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
416 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
417 offset is then BITNUM / BITS_PER_UNIT. */
419 static bool
420 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
421 unsigned HOST_WIDE_INT bitsize,
422 machine_mode struct_mode)
424 if (BYTES_BIG_ENDIAN)
425 return (bitnum % BITS_PER_UNIT == 0
426 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
427 || (bitnum + bitsize) % BITS_PER_WORD == 0));
428 else
429 return bitnum % BITS_PER_WORD == 0;
432 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
433 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
434 Return false if the access would touch memory outside the range
435 BITREGION_START to BITREGION_END for conformance to the C++ memory
436 model. */
438 static bool
439 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
440 unsigned HOST_WIDE_INT bitnum,
441 machine_mode fieldmode,
442 unsigned HOST_WIDE_INT bitregion_start,
443 unsigned HOST_WIDE_INT bitregion_end)
445 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
447 /* -fstrict-volatile-bitfields must be enabled and we must have a
448 volatile MEM. */
449 if (!MEM_P (op0)
450 || !MEM_VOLATILE_P (op0)
451 || flag_strict_volatile_bitfields <= 0)
452 return false;
454 /* Non-integral modes likely only happen with packed structures.
455 Punt. */
456 if (!SCALAR_INT_MODE_P (fieldmode))
457 return false;
459 /* The bit size must not be larger than the field mode, and
460 the field mode must not be larger than a word. */
461 if (bitsize > modesize || modesize > BITS_PER_WORD)
462 return false;
464 /* Check for cases of unaligned fields that must be split. */
465 if (bitnum % modesize + bitsize > modesize)
466 return false;
468 /* The memory must be sufficiently aligned for a MODESIZE access.
469 This condition guarantees, that the memory access will not
470 touch anything after the end of the structure. */
471 if (MEM_ALIGN (op0) < modesize)
472 return false;
474 /* Check for cases where the C++ memory model applies. */
475 if (bitregion_end != 0
476 && (bitnum - bitnum % modesize < bitregion_start
477 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
478 return false;
480 return true;
483 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
484 bit number BITNUM can be treated as a simple value of mode MODE. */
486 static bool
487 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
488 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
490 return (MEM_P (op0)
491 && bitnum % BITS_PER_UNIT == 0
492 && bitsize == GET_MODE_BITSIZE (mode)
493 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
494 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
495 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
498 /* Try to use instruction INSV to store VALUE into a field of OP0.
499 BITSIZE and BITNUM are as for store_bit_field. */
501 static bool
502 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
503 unsigned HOST_WIDE_INT bitsize,
504 unsigned HOST_WIDE_INT bitnum,
505 rtx value)
507 struct expand_operand ops[4];
508 rtx value1;
509 rtx xop0 = op0;
510 rtx_insn *last = get_last_insn ();
511 bool copy_back = false;
513 machine_mode op_mode = insv->field_mode;
514 unsigned int unit = GET_MODE_BITSIZE (op_mode);
515 if (bitsize == 0 || bitsize > unit)
516 return false;
518 if (MEM_P (xop0))
519 /* Get a reference to the first byte of the field. */
520 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
521 &bitnum);
522 else
524 /* Convert from counting within OP0 to counting in OP_MODE. */
525 if (BYTES_BIG_ENDIAN)
526 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
528 /* If xop0 is a register, we need it in OP_MODE
529 to make it acceptable to the format of insv. */
530 if (GET_CODE (xop0) == SUBREG)
531 /* We can't just change the mode, because this might clobber op0,
532 and we will need the original value of op0 if insv fails. */
533 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
534 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
535 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
538 /* If the destination is a paradoxical subreg such that we need a
539 truncate to the inner mode, perform the insertion on a temporary and
540 truncate the result to the original destination. Note that we can't
541 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
542 X) 0)) is (reg:N X). */
543 if (GET_CODE (xop0) == SUBREG
544 && REG_P (SUBREG_REG (xop0))
545 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
546 op_mode))
548 rtx tem = gen_reg_rtx (op_mode);
549 emit_move_insn (tem, xop0);
550 xop0 = tem;
551 copy_back = true;
554 /* There are similar overflow check at the start of store_bit_field_1,
555 but that only check the situation where the field lies completely
556 outside the register, while there do have situation where the field
557 lies partialy in the register, we need to adjust bitsize for this
558 partial overflow situation. Without this fix, pr48335-2.c on big-endian
559 will broken on those arch support bit insert instruction, like arm, aarch64
560 etc. */
561 if (bitsize + bitnum > unit && bitnum < unit)
563 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
564 "destination object, data truncated into %wu-bit",
565 bitsize, unit - bitnum);
566 bitsize = unit - bitnum;
569 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
570 "backwards" from the size of the unit we are inserting into.
571 Otherwise, we count bits from the most significant on a
572 BYTES/BITS_BIG_ENDIAN machine. */
574 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
575 bitnum = unit - bitsize - bitnum;
577 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
578 value1 = value;
579 if (GET_MODE (value) != op_mode)
581 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
583 /* Optimization: Don't bother really extending VALUE
584 if it has all the bits we will actually use. However,
585 if we must narrow it, be sure we do it correctly. */
587 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
589 rtx tmp;
591 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
592 if (! tmp)
593 tmp = simplify_gen_subreg (op_mode,
594 force_reg (GET_MODE (value),
595 value1),
596 GET_MODE (value), 0);
597 value1 = tmp;
599 else
600 value1 = gen_lowpart (op_mode, value1);
602 else if (CONST_INT_P (value))
603 value1 = gen_int_mode (INTVAL (value), op_mode);
604 else
605 /* Parse phase is supposed to make VALUE's data type
606 match that of the component reference, which is a type
607 at least as wide as the field; so VALUE should have
608 a mode that corresponds to that type. */
609 gcc_assert (CONSTANT_P (value));
612 create_fixed_operand (&ops[0], xop0);
613 create_integer_operand (&ops[1], bitsize);
614 create_integer_operand (&ops[2], bitnum);
615 create_input_operand (&ops[3], value1, op_mode);
616 if (maybe_expand_insn (insv->icode, 4, ops))
618 if (copy_back)
619 convert_move (op0, xop0, true);
620 return true;
622 delete_insns_since (last);
623 return false;
626 /* A subroutine of store_bit_field, with the same arguments. Return true
627 if the operation could be implemented.
629 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
630 no other way of implementing the operation. If FALLBACK_P is false,
631 return false instead. */
633 static bool
634 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
635 unsigned HOST_WIDE_INT bitnum,
636 unsigned HOST_WIDE_INT bitregion_start,
637 unsigned HOST_WIDE_INT bitregion_end,
638 machine_mode fieldmode,
639 rtx value, bool fallback_p)
641 rtx op0 = str_rtx;
642 rtx orig_value;
644 while (GET_CODE (op0) == SUBREG)
646 /* The following line once was done only if WORDS_BIG_ENDIAN,
647 but I think that is a mistake. WORDS_BIG_ENDIAN is
648 meaningful at a much higher level; when structures are copied
649 between memory and regs, the higher-numbered regs
650 always get higher addresses. */
651 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
652 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
653 int byte_offset = 0;
655 /* Paradoxical subregs need special handling on big endian machines. */
656 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
658 int difference = inner_mode_size - outer_mode_size;
660 if (WORDS_BIG_ENDIAN)
661 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
662 if (BYTES_BIG_ENDIAN)
663 byte_offset += difference % UNITS_PER_WORD;
665 else
666 byte_offset = SUBREG_BYTE (op0);
668 bitnum += byte_offset * BITS_PER_UNIT;
669 op0 = SUBREG_REG (op0);
672 /* No action is needed if the target is a register and if the field
673 lies completely outside that register. This can occur if the source
674 code contains an out-of-bounds access to a small array. */
675 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
676 return true;
678 /* Use vec_set patterns for inserting parts of vectors whenever
679 available. */
680 if (VECTOR_MODE_P (GET_MODE (op0))
681 && !MEM_P (op0)
682 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
683 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
684 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
685 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
687 struct expand_operand ops[3];
688 machine_mode outermode = GET_MODE (op0);
689 machine_mode innermode = GET_MODE_INNER (outermode);
690 enum insn_code icode = optab_handler (vec_set_optab, outermode);
691 int pos = bitnum / GET_MODE_BITSIZE (innermode);
693 create_fixed_operand (&ops[0], op0);
694 create_input_operand (&ops[1], value, innermode);
695 create_integer_operand (&ops[2], pos);
696 if (maybe_expand_insn (icode, 3, ops))
697 return true;
700 /* If the target is a register, overwriting the entire object, or storing
701 a full-word or multi-word field can be done with just a SUBREG. */
702 if (!MEM_P (op0)
703 && bitsize == GET_MODE_BITSIZE (fieldmode)
704 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
705 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
707 /* Use the subreg machinery either to narrow OP0 to the required
708 words or to cope with mode punning between equal-sized modes.
709 In the latter case, use subreg on the rhs side, not lhs. */
710 rtx sub;
712 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
714 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
715 if (sub)
717 emit_move_insn (op0, sub);
718 return true;
721 else
723 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
724 bitnum / BITS_PER_UNIT);
725 if (sub)
727 emit_move_insn (sub, value);
728 return true;
733 /* If the target is memory, storing any naturally aligned field can be
734 done with a simple store. For targets that support fast unaligned
735 memory, any naturally sized, unit aligned field can be done directly. */
736 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
738 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
739 emit_move_insn (op0, value);
740 return true;
743 /* Make sure we are playing with integral modes. Pun with subregs
744 if we aren't. This must come after the entire register case above,
745 since that case is valid for any mode. The following cases are only
746 valid for integral modes. */
748 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
749 if (imode != GET_MODE (op0))
751 if (MEM_P (op0))
752 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
753 else
755 gcc_assert (imode != BLKmode);
756 op0 = gen_lowpart (imode, op0);
761 /* Storing an lsb-aligned field in a register
762 can be done with a movstrict instruction. */
764 if (!MEM_P (op0)
765 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
766 && bitsize == GET_MODE_BITSIZE (fieldmode)
767 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
769 struct expand_operand ops[2];
770 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
771 rtx arg0 = op0;
772 unsigned HOST_WIDE_INT subreg_off;
774 if (GET_CODE (arg0) == SUBREG)
776 /* Else we've got some float mode source being extracted into
777 a different float mode destination -- this combination of
778 subregs results in Severe Tire Damage. */
779 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
780 || GET_MODE_CLASS (fieldmode) == MODE_INT
781 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
782 arg0 = SUBREG_REG (arg0);
785 subreg_off = bitnum / BITS_PER_UNIT;
786 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
788 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
790 create_fixed_operand (&ops[0], arg0);
791 /* Shrink the source operand to FIELDMODE. */
792 create_convert_operand_to (&ops[1], value, fieldmode, false);
793 if (maybe_expand_insn (icode, 2, ops))
794 return true;
798 /* Handle fields bigger than a word. */
800 if (bitsize > BITS_PER_WORD)
802 /* Here we transfer the words of the field
803 in the order least significant first.
804 This is because the most significant word is the one which may
805 be less than full.
806 However, only do that if the value is not BLKmode. */
808 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
809 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
810 unsigned int i;
811 rtx_insn *last;
813 /* This is the mode we must force value to, so that there will be enough
814 subwords to extract. Note that fieldmode will often (always?) be
815 VOIDmode, because that is what store_field uses to indicate that this
816 is a bit field, but passing VOIDmode to operand_subword_force
817 is not allowed. */
818 fieldmode = GET_MODE (value);
819 if (fieldmode == VOIDmode)
820 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
822 last = get_last_insn ();
823 for (i = 0; i < nwords; i++)
825 /* If I is 0, use the low-order word in both field and target;
826 if I is 1, use the next to lowest word; and so on. */
827 unsigned int wordnum = (backwards
828 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
829 - i - 1
830 : i);
831 unsigned int bit_offset = (backwards
832 ? MAX ((int) bitsize - ((int) i + 1)
833 * BITS_PER_WORD,
835 : (int) i * BITS_PER_WORD);
836 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
837 unsigned HOST_WIDE_INT new_bitsize =
838 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
840 /* If the remaining chunk doesn't have full wordsize we have
841 to make sure that for big endian machines the higher order
842 bits are used. */
843 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
844 value_word = simplify_expand_binop (word_mode, lshr_optab,
845 value_word,
846 GEN_INT (BITS_PER_WORD
847 - new_bitsize),
848 NULL_RTX, true,
849 OPTAB_LIB_WIDEN);
851 if (!store_bit_field_1 (op0, new_bitsize,
852 bitnum + bit_offset,
853 bitregion_start, bitregion_end,
854 word_mode,
855 value_word, fallback_p))
857 delete_insns_since (last);
858 return false;
861 return true;
864 /* If VALUE has a floating-point or complex mode, access it as an
865 integer of the corresponding size. This can occur on a machine
866 with 64 bit registers that uses SFmode for float. It can also
867 occur for unaligned float or complex fields. */
868 orig_value = value;
869 if (GET_MODE (value) != VOIDmode
870 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
871 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
873 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
874 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
877 /* If OP0 is a multi-word register, narrow it to the affected word.
878 If the region spans two words, defer to store_split_bit_field. */
879 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
881 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
882 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
883 gcc_assert (op0);
884 bitnum %= BITS_PER_WORD;
885 if (bitnum + bitsize > BITS_PER_WORD)
887 if (!fallback_p)
888 return false;
890 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
891 bitregion_end, value);
892 return true;
896 /* From here on we can assume that the field to be stored in fits
897 within a word. If the destination is a register, it too fits
898 in a word. */
900 extraction_insn insv;
901 if (!MEM_P (op0)
902 && get_best_reg_extraction_insn (&insv, EP_insv,
903 GET_MODE_BITSIZE (GET_MODE (op0)),
904 fieldmode)
905 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
906 return true;
908 /* If OP0 is a memory, try copying it to a register and seeing if a
909 cheap register alternative is available. */
910 if (MEM_P (op0))
912 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
913 fieldmode)
914 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
915 return true;
917 rtx_insn *last = get_last_insn ();
919 /* Try loading part of OP0 into a register, inserting the bitfield
920 into that, and then copying the result back to OP0. */
921 unsigned HOST_WIDE_INT bitpos;
922 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
923 bitregion_start, bitregion_end,
924 fieldmode, &bitpos);
925 if (xop0)
927 rtx tempreg = copy_to_reg (xop0);
928 if (store_bit_field_1 (tempreg, bitsize, bitpos,
929 bitregion_start, bitregion_end,
930 fieldmode, orig_value, false))
932 emit_move_insn (xop0, tempreg);
933 return true;
935 delete_insns_since (last);
939 if (!fallback_p)
940 return false;
942 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
943 bitregion_end, value);
944 return true;
947 /* Generate code to store value from rtx VALUE
948 into a bit-field within structure STR_RTX
949 containing BITSIZE bits starting at bit BITNUM.
951 BITREGION_START is bitpos of the first bitfield in this region.
952 BITREGION_END is the bitpos of the ending bitfield in this region.
953 These two fields are 0, if the C++ memory model does not apply,
954 or we are not interested in keeping track of bitfield regions.
956 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
958 void
959 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
960 unsigned HOST_WIDE_INT bitnum,
961 unsigned HOST_WIDE_INT bitregion_start,
962 unsigned HOST_WIDE_INT bitregion_end,
963 machine_mode fieldmode,
964 rtx value)
966 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
967 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
968 bitregion_start, bitregion_end))
970 /* Storing of a full word can be done with a simple store.
971 We know here that the field can be accessed with one single
972 instruction. For targets that support unaligned memory,
973 an unaligned access may be necessary. */
974 if (bitsize == GET_MODE_BITSIZE (fieldmode))
976 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
977 bitnum / BITS_PER_UNIT);
978 gcc_assert (bitnum % BITS_PER_UNIT == 0);
979 emit_move_insn (str_rtx, value);
981 else
983 rtx temp;
985 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
986 &bitnum);
987 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
988 temp = copy_to_reg (str_rtx);
989 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
990 fieldmode, value, true))
991 gcc_unreachable ();
993 emit_move_insn (str_rtx, temp);
996 return;
999 /* Under the C++0x memory model, we must not touch bits outside the
1000 bit region. Adjust the address to start at the beginning of the
1001 bit region. */
1002 if (MEM_P (str_rtx) && bitregion_start > 0)
1004 machine_mode bestmode;
1005 HOST_WIDE_INT offset, size;
1007 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1009 offset = bitregion_start / BITS_PER_UNIT;
1010 bitnum -= bitregion_start;
1011 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1012 bitregion_end -= bitregion_start;
1013 bitregion_start = 0;
1014 bestmode = get_best_mode (bitsize, bitnum,
1015 bitregion_start, bitregion_end,
1016 MEM_ALIGN (str_rtx), VOIDmode,
1017 MEM_VOLATILE_P (str_rtx));
1018 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1021 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1022 bitregion_start, bitregion_end,
1023 fieldmode, value, true))
1024 gcc_unreachable ();
1027 /* Use shifts and boolean operations to store VALUE into a bit field of
1028 width BITSIZE in OP0, starting at bit BITNUM. */
1030 static void
1031 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1032 unsigned HOST_WIDE_INT bitnum,
1033 unsigned HOST_WIDE_INT bitregion_start,
1034 unsigned HOST_WIDE_INT bitregion_end,
1035 rtx value)
1037 /* There is a case not handled here:
1038 a structure with a known alignment of just a halfword
1039 and a field split across two aligned halfwords within the structure.
1040 Or likewise a structure with a known alignment of just a byte
1041 and a field split across two bytes.
1042 Such cases are not supposed to be able to occur. */
1044 if (MEM_P (op0))
1046 machine_mode mode = GET_MODE (op0);
1047 if (GET_MODE_BITSIZE (mode) == 0
1048 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1049 mode = word_mode;
1050 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1051 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1053 if (mode == VOIDmode)
1055 /* The only way this should occur is if the field spans word
1056 boundaries. */
1057 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1058 bitregion_end, value);
1059 return;
1062 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1065 store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1068 /* Helper function for store_fixed_bit_field, stores
1069 the bit field always using the MODE of OP0. */
1071 static void
1072 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1073 unsigned HOST_WIDE_INT bitnum,
1074 rtx value)
1076 machine_mode mode;
1077 rtx temp;
1078 int all_zero = 0;
1079 int all_one = 0;
1081 mode = GET_MODE (op0);
1082 gcc_assert (SCALAR_INT_MODE_P (mode));
1084 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1085 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1087 if (BYTES_BIG_ENDIAN)
1088 /* BITNUM is the distance between our msb
1089 and that of the containing datum.
1090 Convert it to the distance from the lsb. */
1091 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1093 /* Now BITNUM is always the distance between our lsb
1094 and that of OP0. */
1096 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1097 we must first convert its mode to MODE. */
1099 if (CONST_INT_P (value))
1101 unsigned HOST_WIDE_INT v = UINTVAL (value);
1103 if (bitsize < HOST_BITS_PER_WIDE_INT)
1104 v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1106 if (v == 0)
1107 all_zero = 1;
1108 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1109 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1110 || (bitsize == HOST_BITS_PER_WIDE_INT
1111 && v == (unsigned HOST_WIDE_INT) -1))
1112 all_one = 1;
1114 value = lshift_value (mode, v, bitnum);
1116 else
1118 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1119 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1121 if (GET_MODE (value) != mode)
1122 value = convert_to_mode (mode, value, 1);
1124 if (must_and)
1125 value = expand_binop (mode, and_optab, value,
1126 mask_rtx (mode, 0, bitsize, 0),
1127 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1128 if (bitnum > 0)
1129 value = expand_shift (LSHIFT_EXPR, mode, value,
1130 bitnum, NULL_RTX, 1);
1133 /* Now clear the chosen bits in OP0,
1134 except that if VALUE is -1 we need not bother. */
1135 /* We keep the intermediates in registers to allow CSE to combine
1136 consecutive bitfield assignments. */
1138 temp = force_reg (mode, op0);
1140 if (! all_one)
1142 temp = expand_binop (mode, and_optab, temp,
1143 mask_rtx (mode, bitnum, bitsize, 1),
1144 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1145 temp = force_reg (mode, temp);
1148 /* Now logical-or VALUE into OP0, unless it is zero. */
1150 if (! all_zero)
1152 temp = expand_binop (mode, ior_optab, temp, value,
1153 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1154 temp = force_reg (mode, temp);
1157 if (op0 != temp)
1159 op0 = copy_rtx (op0);
1160 emit_move_insn (op0, temp);
1164 /* Store a bit field that is split across multiple accessible memory objects.
1166 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1167 BITSIZE is the field width; BITPOS the position of its first bit
1168 (within the word).
1169 VALUE is the value to store.
1171 This does not yet handle fields wider than BITS_PER_WORD. */
1173 static void
1174 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1175 unsigned HOST_WIDE_INT bitpos,
1176 unsigned HOST_WIDE_INT bitregion_start,
1177 unsigned HOST_WIDE_INT bitregion_end,
1178 rtx value)
1180 unsigned int unit;
1181 unsigned int bitsdone = 0;
1183 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1184 much at a time. */
1185 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1186 unit = BITS_PER_WORD;
1187 else
1188 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1190 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1191 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1192 again, and we will mutually recurse forever. */
1193 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1194 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1196 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1197 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1198 that VALUE might be a floating-point constant. */
1199 if (CONSTANT_P (value) && !CONST_INT_P (value))
1201 rtx word = gen_lowpart_common (word_mode, value);
1203 if (word && (value != word))
1204 value = word;
1205 else
1206 value = gen_lowpart_common (word_mode,
1207 force_reg (GET_MODE (value) != VOIDmode
1208 ? GET_MODE (value)
1209 : word_mode, value));
1212 while (bitsdone < bitsize)
1214 unsigned HOST_WIDE_INT thissize;
1215 rtx part, word;
1216 unsigned HOST_WIDE_INT thispos;
1217 unsigned HOST_WIDE_INT offset;
1219 offset = (bitpos + bitsdone) / unit;
1220 thispos = (bitpos + bitsdone) % unit;
1222 /* When region of bytes we can touch is restricted, decrease
1223 UNIT close to the end of the region as needed. If op0 is a REG
1224 or SUBREG of REG, don't do this, as there can't be data races
1225 on a register and we can expand shorter code in some cases. */
1226 if (bitregion_end
1227 && unit > BITS_PER_UNIT
1228 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1229 && !REG_P (op0)
1230 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1232 unit = unit / 2;
1233 continue;
1236 /* THISSIZE must not overrun a word boundary. Otherwise,
1237 store_fixed_bit_field will call us again, and we will mutually
1238 recurse forever. */
1239 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1240 thissize = MIN (thissize, unit - thispos);
1242 if (BYTES_BIG_ENDIAN)
1244 /* Fetch successively less significant portions. */
1245 if (CONST_INT_P (value))
1246 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1247 >> (bitsize - bitsdone - thissize))
1248 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1249 else
1251 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1252 /* The args are chosen so that the last part includes the
1253 lsb. Give extract_bit_field the value it needs (with
1254 endianness compensation) to fetch the piece we want. */
1255 part = extract_fixed_bit_field (word_mode, value, thissize,
1256 total_bits - bitsize + bitsdone,
1257 NULL_RTX, 1);
1260 else
1262 /* Fetch successively more significant portions. */
1263 if (CONST_INT_P (value))
1264 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1265 >> bitsdone)
1266 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1267 else
1268 part = extract_fixed_bit_field (word_mode, value, thissize,
1269 bitsdone, NULL_RTX, 1);
1272 /* If OP0 is a register, then handle OFFSET here.
1274 When handling multiword bitfields, extract_bit_field may pass
1275 down a word_mode SUBREG of a larger REG for a bitfield that actually
1276 crosses a word boundary. Thus, for a SUBREG, we must find
1277 the current word starting from the base register. */
1278 if (GET_CODE (op0) == SUBREG)
1280 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1281 + (offset * unit / BITS_PER_WORD);
1282 machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1283 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1284 word = word_offset ? const0_rtx : op0;
1285 else
1286 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1287 GET_MODE (SUBREG_REG (op0)));
1288 offset &= BITS_PER_WORD / unit - 1;
1290 else if (REG_P (op0))
1292 machine_mode op0_mode = GET_MODE (op0);
1293 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1294 word = offset ? const0_rtx : op0;
1295 else
1296 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1297 GET_MODE (op0));
1298 offset &= BITS_PER_WORD / unit - 1;
1300 else
1301 word = op0;
1303 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1304 it is just an out-of-bounds access. Ignore it. */
1305 if (word != const0_rtx)
1306 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1307 bitregion_start, bitregion_end, part);
1308 bitsdone += thissize;
1312 /* A subroutine of extract_bit_field_1 that converts return value X
1313 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1314 to extract_bit_field. */
1316 static rtx
1317 convert_extracted_bit_field (rtx x, machine_mode mode,
1318 machine_mode tmode, bool unsignedp)
1320 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1321 return x;
1323 /* If the x mode is not a scalar integral, first convert to the
1324 integer mode of that size and then access it as a floating-point
1325 value via a SUBREG. */
1326 if (!SCALAR_INT_MODE_P (tmode))
1328 machine_mode smode;
1330 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1331 x = convert_to_mode (smode, x, unsignedp);
1332 x = force_reg (smode, x);
1333 return gen_lowpart (tmode, x);
1336 return convert_to_mode (tmode, x, unsignedp);
1339 /* Try to use an ext(z)v pattern to extract a field from OP0.
1340 Return the extracted value on success, otherwise return null.
1341 EXT_MODE is the mode of the extraction and the other arguments
1342 are as for extract_bit_field. */
1344 static rtx
1345 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1346 unsigned HOST_WIDE_INT bitsize,
1347 unsigned HOST_WIDE_INT bitnum,
1348 int unsignedp, rtx target,
1349 machine_mode mode, machine_mode tmode)
1351 struct expand_operand ops[4];
1352 rtx spec_target = target;
1353 rtx spec_target_subreg = 0;
1354 machine_mode ext_mode = extv->field_mode;
1355 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1357 if (bitsize == 0 || unit < bitsize)
1358 return NULL_RTX;
1360 if (MEM_P (op0))
1361 /* Get a reference to the first byte of the field. */
1362 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1363 &bitnum);
1364 else
1366 /* Convert from counting within OP0 to counting in EXT_MODE. */
1367 if (BYTES_BIG_ENDIAN)
1368 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1370 /* If op0 is a register, we need it in EXT_MODE to make it
1371 acceptable to the format of ext(z)v. */
1372 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1373 return NULL_RTX;
1374 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1375 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1378 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1379 "backwards" from the size of the unit we are extracting from.
1380 Otherwise, we count bits from the most significant on a
1381 BYTES/BITS_BIG_ENDIAN machine. */
1383 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1384 bitnum = unit - bitsize - bitnum;
1386 if (target == 0)
1387 target = spec_target = gen_reg_rtx (tmode);
1389 if (GET_MODE (target) != ext_mode)
1391 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1392 between the mode of the extraction (word_mode) and the target
1393 mode. Instead, create a temporary and use convert_move to set
1394 the target. */
1395 if (REG_P (target)
1396 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1398 target = gen_lowpart (ext_mode, target);
1399 if (GET_MODE_PRECISION (ext_mode)
1400 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1401 spec_target_subreg = target;
1403 else
1404 target = gen_reg_rtx (ext_mode);
1407 create_output_operand (&ops[0], target, ext_mode);
1408 create_fixed_operand (&ops[1], op0);
1409 create_integer_operand (&ops[2], bitsize);
1410 create_integer_operand (&ops[3], bitnum);
1411 if (maybe_expand_insn (extv->icode, 4, ops))
1413 target = ops[0].value;
1414 if (target == spec_target)
1415 return target;
1416 if (target == spec_target_subreg)
1417 return spec_target;
1418 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1420 return NULL_RTX;
1423 /* A subroutine of extract_bit_field, with the same arguments.
1424 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1425 if we can find no other means of implementing the operation.
1426 if FALLBACK_P is false, return NULL instead. */
1428 static rtx
1429 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1430 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1431 machine_mode mode, machine_mode tmode,
1432 bool fallback_p)
1434 rtx op0 = str_rtx;
1435 machine_mode int_mode;
1436 machine_mode mode1;
1438 if (tmode == VOIDmode)
1439 tmode = mode;
1441 while (GET_CODE (op0) == SUBREG)
1443 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1444 op0 = SUBREG_REG (op0);
1447 /* If we have an out-of-bounds access to a register, just return an
1448 uninitialized register of the required mode. This can occur if the
1449 source code contains an out-of-bounds access to a small array. */
1450 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1451 return gen_reg_rtx (tmode);
1453 if (REG_P (op0)
1454 && mode == GET_MODE (op0)
1455 && bitnum == 0
1456 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1458 /* We're trying to extract a full register from itself. */
1459 return op0;
1462 /* See if we can get a better vector mode before extracting. */
1463 if (VECTOR_MODE_P (GET_MODE (op0))
1464 && !MEM_P (op0)
1465 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1467 machine_mode new_mode;
1469 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1470 new_mode = MIN_MODE_VECTOR_FLOAT;
1471 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1472 new_mode = MIN_MODE_VECTOR_FRACT;
1473 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1474 new_mode = MIN_MODE_VECTOR_UFRACT;
1475 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1476 new_mode = MIN_MODE_VECTOR_ACCUM;
1477 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1478 new_mode = MIN_MODE_VECTOR_UACCUM;
1479 else
1480 new_mode = MIN_MODE_VECTOR_INT;
1482 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1483 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1484 && targetm.vector_mode_supported_p (new_mode))
1485 break;
1486 if (new_mode != VOIDmode)
1487 op0 = gen_lowpart (new_mode, op0);
1490 /* Use vec_extract patterns for extracting parts of vectors whenever
1491 available. */
1492 if (VECTOR_MODE_P (GET_MODE (op0))
1493 && !MEM_P (op0)
1494 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1495 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1496 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1498 struct expand_operand ops[3];
1499 machine_mode outermode = GET_MODE (op0);
1500 machine_mode innermode = GET_MODE_INNER (outermode);
1501 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1502 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1504 create_output_operand (&ops[0], target, innermode);
1505 create_input_operand (&ops[1], op0, outermode);
1506 create_integer_operand (&ops[2], pos);
1507 if (maybe_expand_insn (icode, 3, ops))
1509 target = ops[0].value;
1510 if (GET_MODE (target) != mode)
1511 return gen_lowpart (tmode, target);
1512 return target;
1516 /* Make sure we are playing with integral modes. Pun with subregs
1517 if we aren't. */
1519 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1520 if (imode != GET_MODE (op0))
1522 if (MEM_P (op0))
1523 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1524 else if (imode != BLKmode)
1526 op0 = gen_lowpart (imode, op0);
1528 /* If we got a SUBREG, force it into a register since we
1529 aren't going to be able to do another SUBREG on it. */
1530 if (GET_CODE (op0) == SUBREG)
1531 op0 = force_reg (imode, op0);
1533 else if (REG_P (op0))
1535 rtx reg, subreg;
1536 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1537 MODE_INT);
1538 reg = gen_reg_rtx (imode);
1539 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1540 emit_move_insn (subreg, op0);
1541 op0 = reg;
1542 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1544 else
1546 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1547 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1548 emit_move_insn (mem, op0);
1549 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1554 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1555 If that's wrong, the solution is to test for it and set TARGET to 0
1556 if needed. */
1558 /* Get the mode of the field to use for atomic access or subreg
1559 conversion. */
1560 mode1 = mode;
1561 if (SCALAR_INT_MODE_P (tmode))
1563 machine_mode try_mode = mode_for_size (bitsize,
1564 GET_MODE_CLASS (tmode), 0);
1565 if (try_mode != BLKmode)
1566 mode1 = try_mode;
1568 gcc_assert (mode1 != BLKmode);
1570 /* Extraction of a full MODE1 value can be done with a subreg as long
1571 as the least significant bit of the value is the least significant
1572 bit of either OP0 or a word of OP0. */
1573 if (!MEM_P (op0)
1574 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1575 && bitsize == GET_MODE_BITSIZE (mode1)
1576 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1578 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1579 bitnum / BITS_PER_UNIT);
1580 if (sub)
1581 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1584 /* Extraction of a full MODE1 value can be done with a load as long as
1585 the field is on a byte boundary and is sufficiently aligned. */
1586 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1588 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1589 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1592 /* Handle fields bigger than a word. */
1594 if (bitsize > BITS_PER_WORD)
1596 /* Here we transfer the words of the field
1597 in the order least significant first.
1598 This is because the most significant word is the one which may
1599 be less than full. */
1601 unsigned int backwards = WORDS_BIG_ENDIAN;
1602 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1603 unsigned int i;
1604 rtx_insn *last;
1606 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1607 target = gen_reg_rtx (mode);
1609 /* In case we're about to clobber a base register or something
1610 (see gcc.c-torture/execute/20040625-1.c). */
1611 if (reg_mentioned_p (target, str_rtx))
1612 target = gen_reg_rtx (mode);
1614 /* Indicate for flow that the entire target reg is being set. */
1615 emit_clobber (target);
1617 last = get_last_insn ();
1618 for (i = 0; i < nwords; i++)
1620 /* If I is 0, use the low-order word in both field and target;
1621 if I is 1, use the next to lowest word; and so on. */
1622 /* Word number in TARGET to use. */
1623 unsigned int wordnum
1624 = (backwards
1625 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1626 : i);
1627 /* Offset from start of field in OP0. */
1628 unsigned int bit_offset = (backwards
1629 ? MAX ((int) bitsize - ((int) i + 1)
1630 * BITS_PER_WORD,
1632 : (int) i * BITS_PER_WORD);
1633 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1634 rtx result_part
1635 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1636 bitsize - i * BITS_PER_WORD),
1637 bitnum + bit_offset, 1, target_part,
1638 mode, word_mode, fallback_p);
1640 gcc_assert (target_part);
1641 if (!result_part)
1643 delete_insns_since (last);
1644 return NULL;
1647 if (result_part != target_part)
1648 emit_move_insn (target_part, result_part);
1651 if (unsignedp)
1653 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1654 need to be zero'd out. */
1655 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1657 unsigned int i, total_words;
1659 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1660 for (i = nwords; i < total_words; i++)
1661 emit_move_insn
1662 (operand_subword (target,
1663 backwards ? total_words - i - 1 : i,
1664 1, VOIDmode),
1665 const0_rtx);
1667 return target;
1670 /* Signed bit field: sign-extend with two arithmetic shifts. */
1671 target = expand_shift (LSHIFT_EXPR, mode, target,
1672 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1673 return expand_shift (RSHIFT_EXPR, mode, target,
1674 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1677 /* If OP0 is a multi-word register, narrow it to the affected word.
1678 If the region spans two words, defer to extract_split_bit_field. */
1679 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1681 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1682 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1683 bitnum %= BITS_PER_WORD;
1684 if (bitnum + bitsize > BITS_PER_WORD)
1686 if (!fallback_p)
1687 return NULL_RTX;
1688 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1689 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1693 /* From here on we know the desired field is smaller than a word.
1694 If OP0 is a register, it too fits within a word. */
1695 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1696 extraction_insn extv;
1697 if (!MEM_P (op0)
1698 /* ??? We could limit the structure size to the part of OP0 that
1699 contains the field, with appropriate checks for endianness
1700 and TRULY_NOOP_TRUNCATION. */
1701 && get_best_reg_extraction_insn (&extv, pattern,
1702 GET_MODE_BITSIZE (GET_MODE (op0)),
1703 tmode))
1705 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1706 unsignedp, target, mode,
1707 tmode);
1708 if (result)
1709 return result;
1712 /* If OP0 is a memory, try copying it to a register and seeing if a
1713 cheap register alternative is available. */
1714 if (MEM_P (op0))
1716 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1717 tmode))
1719 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1720 bitnum, unsignedp,
1721 target, mode,
1722 tmode);
1723 if (result)
1724 return result;
1727 rtx_insn *last = get_last_insn ();
1729 /* Try loading part of OP0 into a register and extracting the
1730 bitfield from that. */
1731 unsigned HOST_WIDE_INT bitpos;
1732 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1733 0, 0, tmode, &bitpos);
1734 if (xop0)
1736 xop0 = copy_to_reg (xop0);
1737 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1738 unsignedp, target,
1739 mode, tmode, false);
1740 if (result)
1741 return result;
1742 delete_insns_since (last);
1746 if (!fallback_p)
1747 return NULL;
1749 /* Find a correspondingly-sized integer field, so we can apply
1750 shifts and masks to it. */
1751 int_mode = int_mode_for_mode (tmode);
1752 if (int_mode == BLKmode)
1753 int_mode = int_mode_for_mode (mode);
1754 /* Should probably push op0 out to memory and then do a load. */
1755 gcc_assert (int_mode != BLKmode);
1757 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1758 target, unsignedp);
1759 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1762 /* Generate code to extract a byte-field from STR_RTX
1763 containing BITSIZE bits, starting at BITNUM,
1764 and put it in TARGET if possible (if TARGET is nonzero).
1765 Regardless of TARGET, we return the rtx for where the value is placed.
1767 STR_RTX is the structure containing the byte (a REG or MEM).
1768 UNSIGNEDP is nonzero if this is an unsigned bit field.
1769 MODE is the natural mode of the field value once extracted.
1770 TMODE is the mode the caller would like the value to have;
1771 but the value may be returned with type MODE instead.
1773 If a TARGET is specified and we can store in it at no extra cost,
1774 we do so, and return TARGET.
1775 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1776 if they are equally easy. */
1779 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1780 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1781 machine_mode mode, machine_mode tmode)
1783 machine_mode mode1;
1785 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1786 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1787 mode1 = GET_MODE (str_rtx);
1788 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1789 mode1 = GET_MODE (target);
1790 else
1791 mode1 = tmode;
1793 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1795 /* Extraction of a full MODE1 value can be done with a simple load.
1796 We know here that the field can be accessed with one single
1797 instruction. For targets that support unaligned memory,
1798 an unaligned access may be necessary. */
1799 if (bitsize == GET_MODE_BITSIZE (mode1))
1801 rtx result = adjust_bitfield_address (str_rtx, mode1,
1802 bitnum / BITS_PER_UNIT);
1803 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1804 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1807 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1808 &bitnum);
1809 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1810 str_rtx = copy_to_reg (str_rtx);
1813 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1814 target, mode, tmode, true);
1817 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1818 from bit BITNUM of OP0.
1820 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1821 If TARGET is nonzero, attempts to store the value there
1822 and return TARGET, but this is not guaranteed.
1823 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1825 static rtx
1826 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1827 unsigned HOST_WIDE_INT bitsize,
1828 unsigned HOST_WIDE_INT bitnum, rtx target,
1829 int unsignedp)
1831 if (MEM_P (op0))
1833 machine_mode mode
1834 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1835 MEM_VOLATILE_P (op0));
1837 if (mode == VOIDmode)
1838 /* The only way this should occur is if the field spans word
1839 boundaries. */
1840 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1842 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1845 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1846 target, unsignedp);
1849 /* Helper function for extract_fixed_bit_field, extracts
1850 the bit field always using the MODE of OP0. */
1852 static rtx
1853 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1854 unsigned HOST_WIDE_INT bitsize,
1855 unsigned HOST_WIDE_INT bitnum, rtx target,
1856 int unsignedp)
1858 machine_mode mode = GET_MODE (op0);
1859 gcc_assert (SCALAR_INT_MODE_P (mode));
1861 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1862 for invalid input, such as extract equivalent of f5 from
1863 gcc.dg/pr48335-2.c. */
1865 if (BYTES_BIG_ENDIAN)
1866 /* BITNUM is the distance between our msb and that of OP0.
1867 Convert it to the distance from the lsb. */
1868 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1870 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1871 We have reduced the big-endian case to the little-endian case. */
1873 if (unsignedp)
1875 if (bitnum)
1877 /* If the field does not already start at the lsb,
1878 shift it so it does. */
1879 /* Maybe propagate the target for the shift. */
1880 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1881 if (tmode != mode)
1882 subtarget = 0;
1883 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1885 /* Convert the value to the desired mode. */
1886 if (mode != tmode)
1887 op0 = convert_to_mode (tmode, op0, 1);
1889 /* Unless the msb of the field used to be the msb when we shifted,
1890 mask out the upper bits. */
1892 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1893 return expand_binop (GET_MODE (op0), and_optab, op0,
1894 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1895 target, 1, OPTAB_LIB_WIDEN);
1896 return op0;
1899 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1900 then arithmetic-shift its lsb to the lsb of the word. */
1901 op0 = force_reg (mode, op0);
1903 /* Find the narrowest integer mode that contains the field. */
1905 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1906 mode = GET_MODE_WIDER_MODE (mode))
1907 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1909 op0 = convert_to_mode (mode, op0, 0);
1910 break;
1913 if (mode != tmode)
1914 target = 0;
1916 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1918 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1919 /* Maybe propagate the target for the shift. */
1920 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1921 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1924 return expand_shift (RSHIFT_EXPR, mode, op0,
1925 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1928 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1929 VALUE << BITPOS. */
1931 static rtx
1932 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1933 int bitpos)
1935 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1938 /* Extract a bit field that is split across two words
1939 and return an RTX for the result.
1941 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1942 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1943 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1945 static rtx
1946 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1947 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1949 unsigned int unit;
1950 unsigned int bitsdone = 0;
1951 rtx result = NULL_RTX;
1952 int first = 1;
1954 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1955 much at a time. */
1956 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1957 unit = BITS_PER_WORD;
1958 else
1959 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1961 while (bitsdone < bitsize)
1963 unsigned HOST_WIDE_INT thissize;
1964 rtx part, word;
1965 unsigned HOST_WIDE_INT thispos;
1966 unsigned HOST_WIDE_INT offset;
1968 offset = (bitpos + bitsdone) / unit;
1969 thispos = (bitpos + bitsdone) % unit;
1971 /* THISSIZE must not overrun a word boundary. Otherwise,
1972 extract_fixed_bit_field will call us again, and we will mutually
1973 recurse forever. */
1974 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1975 thissize = MIN (thissize, unit - thispos);
1977 /* If OP0 is a register, then handle OFFSET here.
1979 When handling multiword bitfields, extract_bit_field may pass
1980 down a word_mode SUBREG of a larger REG for a bitfield that actually
1981 crosses a word boundary. Thus, for a SUBREG, we must find
1982 the current word starting from the base register. */
1983 if (GET_CODE (op0) == SUBREG)
1985 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1986 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1987 GET_MODE (SUBREG_REG (op0)));
1988 offset = 0;
1990 else if (REG_P (op0))
1992 word = operand_subword_force (op0, offset, GET_MODE (op0));
1993 offset = 0;
1995 else
1996 word = op0;
1998 /* Extract the parts in bit-counting order,
1999 whose meaning is determined by BYTES_PER_UNIT.
2000 OFFSET is in UNITs, and UNIT is in bits. */
2001 part = extract_fixed_bit_field (word_mode, word, thissize,
2002 offset * unit + thispos, 0, 1);
2003 bitsdone += thissize;
2005 /* Shift this part into place for the result. */
2006 if (BYTES_BIG_ENDIAN)
2008 if (bitsize != bitsdone)
2009 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2010 bitsize - bitsdone, 0, 1);
2012 else
2014 if (bitsdone != thissize)
2015 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2016 bitsdone - thissize, 0, 1);
2019 if (first)
2020 result = part;
2021 else
2022 /* Combine the parts with bitwise or. This works
2023 because we extracted each part as an unsigned bit field. */
2024 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2025 OPTAB_LIB_WIDEN);
2027 first = 0;
2030 /* Unsigned bit field: we are done. */
2031 if (unsignedp)
2032 return result;
2033 /* Signed bit field: sign-extend with two arithmetic shifts. */
2034 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2035 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2036 return expand_shift (RSHIFT_EXPR, word_mode, result,
2037 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2040 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2041 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2042 MODE, fill the upper bits with zeros. Fail if the layout of either
2043 mode is unknown (as for CC modes) or if the extraction would involve
2044 unprofitable mode punning. Return the value on success, otherwise
2045 return null.
2047 This is different from gen_lowpart* in these respects:
2049 - the returned value must always be considered an rvalue
2051 - when MODE is wider than SRC_MODE, the extraction involves
2052 a zero extension
2054 - when MODE is smaller than SRC_MODE, the extraction involves
2055 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2057 In other words, this routine performs a computation, whereas the
2058 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2059 operations. */
2062 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2064 machine_mode int_mode, src_int_mode;
2066 if (mode == src_mode)
2067 return src;
2069 if (CONSTANT_P (src))
2071 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2072 fails, it will happily create (subreg (symbol_ref)) or similar
2073 invalid SUBREGs. */
2074 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2075 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2076 if (ret)
2077 return ret;
2079 if (GET_MODE (src) == VOIDmode
2080 || !validate_subreg (mode, src_mode, src, byte))
2081 return NULL_RTX;
2083 src = force_reg (GET_MODE (src), src);
2084 return gen_rtx_SUBREG (mode, src, byte);
2087 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2088 return NULL_RTX;
2090 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2091 && MODES_TIEABLE_P (mode, src_mode))
2093 rtx x = gen_lowpart_common (mode, src);
2094 if (x)
2095 return x;
2098 src_int_mode = int_mode_for_mode (src_mode);
2099 int_mode = int_mode_for_mode (mode);
2100 if (src_int_mode == BLKmode || int_mode == BLKmode)
2101 return NULL_RTX;
2103 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2104 return NULL_RTX;
2105 if (!MODES_TIEABLE_P (int_mode, mode))
2106 return NULL_RTX;
2108 src = gen_lowpart (src_int_mode, src);
2109 src = convert_modes (int_mode, src_int_mode, src, true);
2110 src = gen_lowpart (mode, src);
2111 return src;
2114 /* Add INC into TARGET. */
2116 void
2117 expand_inc (rtx target, rtx inc)
2119 rtx value = expand_binop (GET_MODE (target), add_optab,
2120 target, inc,
2121 target, 0, OPTAB_LIB_WIDEN);
2122 if (value != target)
2123 emit_move_insn (target, value);
2126 /* Subtract DEC from TARGET. */
2128 void
2129 expand_dec (rtx target, rtx dec)
2131 rtx value = expand_binop (GET_MODE (target), sub_optab,
2132 target, dec,
2133 target, 0, OPTAB_LIB_WIDEN);
2134 if (value != target)
2135 emit_move_insn (target, value);
2138 /* Output a shift instruction for expression code CODE,
2139 with SHIFTED being the rtx for the value to shift,
2140 and AMOUNT the rtx for the amount to shift by.
2141 Store the result in the rtx TARGET, if that is convenient.
2142 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2143 Return the rtx for where the value is. */
2145 static rtx
2146 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2147 rtx amount, rtx target, int unsignedp)
2149 rtx op1, temp = 0;
2150 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2151 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2152 optab lshift_optab = ashl_optab;
2153 optab rshift_arith_optab = ashr_optab;
2154 optab rshift_uns_optab = lshr_optab;
2155 optab lrotate_optab = rotl_optab;
2156 optab rrotate_optab = rotr_optab;
2157 machine_mode op1_mode;
2158 machine_mode scalar_mode = mode;
2159 int attempt;
2160 bool speed = optimize_insn_for_speed_p ();
2162 if (VECTOR_MODE_P (mode))
2163 scalar_mode = GET_MODE_INNER (mode);
2164 op1 = amount;
2165 op1_mode = GET_MODE (op1);
2167 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2168 shift amount is a vector, use the vector/vector shift patterns. */
2169 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2171 lshift_optab = vashl_optab;
2172 rshift_arith_optab = vashr_optab;
2173 rshift_uns_optab = vlshr_optab;
2174 lrotate_optab = vrotl_optab;
2175 rrotate_optab = vrotr_optab;
2178 /* Previously detected shift-counts computed by NEGATE_EXPR
2179 and shifted in the other direction; but that does not work
2180 on all machines. */
2182 if (SHIFT_COUNT_TRUNCATED)
2184 if (CONST_INT_P (op1)
2185 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2186 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2187 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2188 % GET_MODE_BITSIZE (scalar_mode));
2189 else if (GET_CODE (op1) == SUBREG
2190 && subreg_lowpart_p (op1)
2191 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2192 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2193 op1 = SUBREG_REG (op1);
2196 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2197 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2198 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2199 amount instead. */
2200 if (rotate
2201 && CONST_INT_P (op1)
2202 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2203 GET_MODE_BITSIZE (scalar_mode) - 1))
2205 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2206 left = !left;
2207 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2210 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2211 Note that this is not the case for bigger values. For instance a rotation
2212 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2213 0x04030201 (bswapsi). */
2214 if (rotate
2215 && CONST_INT_P (op1)
2216 && INTVAL (op1) == BITS_PER_UNIT
2217 && GET_MODE_SIZE (scalar_mode) == 2
2218 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2219 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2220 unsignedp);
2222 if (op1 == const0_rtx)
2223 return shifted;
2225 /* Check whether its cheaper to implement a left shift by a constant
2226 bit count by a sequence of additions. */
2227 if (code == LSHIFT_EXPR
2228 && CONST_INT_P (op1)
2229 && INTVAL (op1) > 0
2230 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2231 && INTVAL (op1) < MAX_BITS_PER_WORD
2232 && (shift_cost (speed, mode, INTVAL (op1))
2233 > INTVAL (op1) * add_cost (speed, mode))
2234 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2236 int i;
2237 for (i = 0; i < INTVAL (op1); i++)
2239 temp = force_reg (mode, shifted);
2240 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2241 unsignedp, OPTAB_LIB_WIDEN);
2243 return shifted;
2246 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2248 enum optab_methods methods;
2250 if (attempt == 0)
2251 methods = OPTAB_DIRECT;
2252 else if (attempt == 1)
2253 methods = OPTAB_WIDEN;
2254 else
2255 methods = OPTAB_LIB_WIDEN;
2257 if (rotate)
2259 /* Widening does not work for rotation. */
2260 if (methods == OPTAB_WIDEN)
2261 continue;
2262 else if (methods == OPTAB_LIB_WIDEN)
2264 /* If we have been unable to open-code this by a rotation,
2265 do it as the IOR of two shifts. I.e., to rotate A
2266 by N bits, compute
2267 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2268 where C is the bitsize of A.
2270 It is theoretically possible that the target machine might
2271 not be able to perform either shift and hence we would
2272 be making two libcalls rather than just the one for the
2273 shift (similarly if IOR could not be done). We will allow
2274 this extremely unlikely lossage to avoid complicating the
2275 code below. */
2277 rtx subtarget = target == shifted ? 0 : target;
2278 rtx new_amount, other_amount;
2279 rtx temp1;
2281 new_amount = op1;
2282 if (op1 == const0_rtx)
2283 return shifted;
2284 else if (CONST_INT_P (op1))
2285 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2286 - INTVAL (op1));
2287 else
2289 other_amount
2290 = simplify_gen_unary (NEG, GET_MODE (op1),
2291 op1, GET_MODE (op1));
2292 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2293 other_amount
2294 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2295 gen_int_mode (mask, GET_MODE (op1)));
2298 shifted = force_reg (mode, shifted);
2300 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2301 mode, shifted, new_amount, 0, 1);
2302 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2303 mode, shifted, other_amount,
2304 subtarget, 1);
2305 return expand_binop (mode, ior_optab, temp, temp1, target,
2306 unsignedp, methods);
2309 temp = expand_binop (mode,
2310 left ? lrotate_optab : rrotate_optab,
2311 shifted, op1, target, unsignedp, methods);
2313 else if (unsignedp)
2314 temp = expand_binop (mode,
2315 left ? lshift_optab : rshift_uns_optab,
2316 shifted, op1, target, unsignedp, methods);
2318 /* Do arithmetic shifts.
2319 Also, if we are going to widen the operand, we can just as well
2320 use an arithmetic right-shift instead of a logical one. */
2321 if (temp == 0 && ! rotate
2322 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2324 enum optab_methods methods1 = methods;
2326 /* If trying to widen a log shift to an arithmetic shift,
2327 don't accept an arithmetic shift of the same size. */
2328 if (unsignedp)
2329 methods1 = OPTAB_MUST_WIDEN;
2331 /* Arithmetic shift */
2333 temp = expand_binop (mode,
2334 left ? lshift_optab : rshift_arith_optab,
2335 shifted, op1, target, unsignedp, methods1);
2338 /* We used to try extzv here for logical right shifts, but that was
2339 only useful for one machine, the VAX, and caused poor code
2340 generation there for lshrdi3, so the code was deleted and a
2341 define_expand for lshrsi3 was added to vax.md. */
2344 gcc_assert (temp);
2345 return temp;
2348 /* Output a shift instruction for expression code CODE,
2349 with SHIFTED being the rtx for the value to shift,
2350 and AMOUNT the amount to shift by.
2351 Store the result in the rtx TARGET, if that is convenient.
2352 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2353 Return the rtx for where the value is. */
2356 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2357 int amount, rtx target, int unsignedp)
2359 return expand_shift_1 (code, mode,
2360 shifted, GEN_INT (amount), target, unsignedp);
2363 /* Output a shift instruction for expression code CODE,
2364 with SHIFTED being the rtx for the value to shift,
2365 and AMOUNT the tree for the amount to shift by.
2366 Store the result in the rtx TARGET, if that is convenient.
2367 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2368 Return the rtx for where the value is. */
2371 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2372 tree amount, rtx target, int unsignedp)
2374 return expand_shift_1 (code, mode,
2375 shifted, expand_normal (amount), target, unsignedp);
2379 /* Indicates the type of fixup needed after a constant multiplication.
2380 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2381 the result should be negated, and ADD_VARIANT means that the
2382 multiplicand should be added to the result. */
2383 enum mult_variant {basic_variant, negate_variant, add_variant};
2385 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2386 const struct mult_cost *, machine_mode mode);
2387 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2388 struct algorithm *, enum mult_variant *, int);
2389 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2390 const struct algorithm *, enum mult_variant);
2391 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2392 static rtx extract_high_half (machine_mode, rtx);
2393 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2394 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2395 int, int);
2396 /* Compute and return the best algorithm for multiplying by T.
2397 The algorithm must cost less than cost_limit
2398 If retval.cost >= COST_LIMIT, no algorithm was found and all
2399 other field of the returned struct are undefined.
2400 MODE is the machine mode of the multiplication. */
2402 static void
2403 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2404 const struct mult_cost *cost_limit, machine_mode mode)
2406 int m;
2407 struct algorithm *alg_in, *best_alg;
2408 struct mult_cost best_cost;
2409 struct mult_cost new_limit;
2410 int op_cost, op_latency;
2411 unsigned HOST_WIDE_INT orig_t = t;
2412 unsigned HOST_WIDE_INT q;
2413 int maxm, hash_index;
2414 bool cache_hit = false;
2415 enum alg_code cache_alg = alg_zero;
2416 bool speed = optimize_insn_for_speed_p ();
2417 machine_mode imode;
2418 struct alg_hash_entry *entry_ptr;
2420 /* Indicate that no algorithm is yet found. If no algorithm
2421 is found, this value will be returned and indicate failure. */
2422 alg_out->cost.cost = cost_limit->cost + 1;
2423 alg_out->cost.latency = cost_limit->latency + 1;
2425 if (cost_limit->cost < 0
2426 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2427 return;
2429 /* Be prepared for vector modes. */
2430 imode = GET_MODE_INNER (mode);
2431 if (imode == VOIDmode)
2432 imode = mode;
2434 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2436 /* Restrict the bits of "t" to the multiplication's mode. */
2437 t &= GET_MODE_MASK (imode);
2439 /* t == 1 can be done in zero cost. */
2440 if (t == 1)
2442 alg_out->ops = 1;
2443 alg_out->cost.cost = 0;
2444 alg_out->cost.latency = 0;
2445 alg_out->op[0] = alg_m;
2446 return;
2449 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2450 fail now. */
2451 if (t == 0)
2453 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2454 return;
2455 else
2457 alg_out->ops = 1;
2458 alg_out->cost.cost = zero_cost (speed);
2459 alg_out->cost.latency = zero_cost (speed);
2460 alg_out->op[0] = alg_zero;
2461 return;
2465 /* We'll be needing a couple extra algorithm structures now. */
2467 alg_in = XALLOCA (struct algorithm);
2468 best_alg = XALLOCA (struct algorithm);
2469 best_cost = *cost_limit;
2471 /* Compute the hash index. */
2472 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2474 /* See if we already know what to do for T. */
2475 entry_ptr = alg_hash_entry_ptr (hash_index);
2476 if (entry_ptr->t == t
2477 && entry_ptr->mode == mode
2478 && entry_ptr->mode == mode
2479 && entry_ptr->speed == speed
2480 && entry_ptr->alg != alg_unknown)
2482 cache_alg = entry_ptr->alg;
2484 if (cache_alg == alg_impossible)
2486 /* The cache tells us that it's impossible to synthesize
2487 multiplication by T within entry_ptr->cost. */
2488 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2489 /* COST_LIMIT is at least as restrictive as the one
2490 recorded in the hash table, in which case we have no
2491 hope of synthesizing a multiplication. Just
2492 return. */
2493 return;
2495 /* If we get here, COST_LIMIT is less restrictive than the
2496 one recorded in the hash table, so we may be able to
2497 synthesize a multiplication. Proceed as if we didn't
2498 have the cache entry. */
2500 else
2502 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2503 /* The cached algorithm shows that this multiplication
2504 requires more cost than COST_LIMIT. Just return. This
2505 way, we don't clobber this cache entry with
2506 alg_impossible but retain useful information. */
2507 return;
2509 cache_hit = true;
2511 switch (cache_alg)
2513 case alg_shift:
2514 goto do_alg_shift;
2516 case alg_add_t_m2:
2517 case alg_sub_t_m2:
2518 goto do_alg_addsub_t_m2;
2520 case alg_add_factor:
2521 case alg_sub_factor:
2522 goto do_alg_addsub_factor;
2524 case alg_add_t2_m:
2525 goto do_alg_add_t2_m;
2527 case alg_sub_t2_m:
2528 goto do_alg_sub_t2_m;
2530 default:
2531 gcc_unreachable ();
2536 /* If we have a group of zero bits at the low-order part of T, try
2537 multiplying by the remaining bits and then doing a shift. */
2539 if ((t & 1) == 0)
2541 do_alg_shift:
2542 m = floor_log2 (t & -t); /* m = number of low zero bits */
2543 if (m < maxm)
2545 q = t >> m;
2546 /* The function expand_shift will choose between a shift and
2547 a sequence of additions, so the observed cost is given as
2548 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2549 op_cost = m * add_cost (speed, mode);
2550 if (shift_cost (speed, mode, m) < op_cost)
2551 op_cost = shift_cost (speed, mode, m);
2552 new_limit.cost = best_cost.cost - op_cost;
2553 new_limit.latency = best_cost.latency - op_cost;
2554 synth_mult (alg_in, q, &new_limit, mode);
2556 alg_in->cost.cost += op_cost;
2557 alg_in->cost.latency += op_cost;
2558 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2560 best_cost = alg_in->cost;
2561 std::swap (alg_in, best_alg);
2562 best_alg->log[best_alg->ops] = m;
2563 best_alg->op[best_alg->ops] = alg_shift;
2566 /* See if treating ORIG_T as a signed number yields a better
2567 sequence. Try this sequence only for a negative ORIG_T
2568 as it would be useless for a non-negative ORIG_T. */
2569 if ((HOST_WIDE_INT) orig_t < 0)
2571 /* Shift ORIG_T as follows because a right shift of a
2572 negative-valued signed type is implementation
2573 defined. */
2574 q = ~(~orig_t >> m);
2575 /* The function expand_shift will choose between a shift
2576 and a sequence of additions, so the observed cost is
2577 given as MIN (m * add_cost(speed, mode),
2578 shift_cost(speed, mode, m)). */
2579 op_cost = m * add_cost (speed, mode);
2580 if (shift_cost (speed, mode, m) < op_cost)
2581 op_cost = shift_cost (speed, mode, m);
2582 new_limit.cost = best_cost.cost - op_cost;
2583 new_limit.latency = best_cost.latency - op_cost;
2584 synth_mult (alg_in, q, &new_limit, mode);
2586 alg_in->cost.cost += op_cost;
2587 alg_in->cost.latency += op_cost;
2588 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2590 best_cost = alg_in->cost;
2591 std::swap (alg_in, best_alg);
2592 best_alg->log[best_alg->ops] = m;
2593 best_alg->op[best_alg->ops] = alg_shift;
2597 if (cache_hit)
2598 goto done;
2601 /* If we have an odd number, add or subtract one. */
2602 if ((t & 1) != 0)
2604 unsigned HOST_WIDE_INT w;
2606 do_alg_addsub_t_m2:
2607 for (w = 1; (w & t) != 0; w <<= 1)
2609 /* If T was -1, then W will be zero after the loop. This is another
2610 case where T ends with ...111. Handling this with (T + 1) and
2611 subtract 1 produces slightly better code and results in algorithm
2612 selection much faster than treating it like the ...0111 case
2613 below. */
2614 if (w == 0
2615 || (w > 2
2616 /* Reject the case where t is 3.
2617 Thus we prefer addition in that case. */
2618 && t != 3))
2620 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2622 op_cost = add_cost (speed, mode);
2623 new_limit.cost = best_cost.cost - op_cost;
2624 new_limit.latency = best_cost.latency - op_cost;
2625 synth_mult (alg_in, t + 1, &new_limit, mode);
2627 alg_in->cost.cost += op_cost;
2628 alg_in->cost.latency += op_cost;
2629 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2631 best_cost = alg_in->cost;
2632 std::swap (alg_in, best_alg);
2633 best_alg->log[best_alg->ops] = 0;
2634 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2637 else
2639 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2641 op_cost = add_cost (speed, mode);
2642 new_limit.cost = best_cost.cost - op_cost;
2643 new_limit.latency = best_cost.latency - op_cost;
2644 synth_mult (alg_in, t - 1, &new_limit, mode);
2646 alg_in->cost.cost += op_cost;
2647 alg_in->cost.latency += op_cost;
2648 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2650 best_cost = alg_in->cost;
2651 std::swap (alg_in, best_alg);
2652 best_alg->log[best_alg->ops] = 0;
2653 best_alg->op[best_alg->ops] = alg_add_t_m2;
2657 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2658 quickly with a - a * n for some appropriate constant n. */
2659 m = exact_log2 (-orig_t + 1);
2660 if (m >= 0 && m < maxm)
2662 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2663 /* If the target has a cheap shift-and-subtract insn use
2664 that in preference to a shift insn followed by a sub insn.
2665 Assume that the shift-and-sub is "atomic" with a latency
2666 equal to it's cost, otherwise assume that on superscalar
2667 hardware the shift may be executed concurrently with the
2668 earlier steps in the algorithm. */
2669 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2671 op_cost = shiftsub1_cost (speed, mode, m);
2672 op_latency = op_cost;
2674 else
2675 op_latency = add_cost (speed, mode);
2677 new_limit.cost = best_cost.cost - op_cost;
2678 new_limit.latency = best_cost.latency - op_latency;
2679 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2680 &new_limit, mode);
2682 alg_in->cost.cost += op_cost;
2683 alg_in->cost.latency += op_latency;
2684 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2686 best_cost = alg_in->cost;
2687 std::swap (alg_in, best_alg);
2688 best_alg->log[best_alg->ops] = m;
2689 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2693 if (cache_hit)
2694 goto done;
2697 /* Look for factors of t of the form
2698 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2699 If we find such a factor, we can multiply by t using an algorithm that
2700 multiplies by q, shift the result by m and add/subtract it to itself.
2702 We search for large factors first and loop down, even if large factors
2703 are less probable than small; if we find a large factor we will find a
2704 good sequence quickly, and therefore be able to prune (by decreasing
2705 COST_LIMIT) the search. */
2707 do_alg_addsub_factor:
2708 for (m = floor_log2 (t - 1); m >= 2; m--)
2710 unsigned HOST_WIDE_INT d;
2712 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2713 if (t % d == 0 && t > d && m < maxm
2714 && (!cache_hit || cache_alg == alg_add_factor))
2716 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2717 if (shiftadd_cost (speed, mode, m) <= op_cost)
2718 op_cost = shiftadd_cost (speed, mode, m);
2720 op_latency = op_cost;
2723 new_limit.cost = best_cost.cost - op_cost;
2724 new_limit.latency = best_cost.latency - op_latency;
2725 synth_mult (alg_in, t / d, &new_limit, mode);
2727 alg_in->cost.cost += op_cost;
2728 alg_in->cost.latency += op_latency;
2729 if (alg_in->cost.latency < op_cost)
2730 alg_in->cost.latency = op_cost;
2731 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2733 best_cost = alg_in->cost;
2734 std::swap (alg_in, best_alg);
2735 best_alg->log[best_alg->ops] = m;
2736 best_alg->op[best_alg->ops] = alg_add_factor;
2738 /* Other factors will have been taken care of in the recursion. */
2739 break;
2742 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2743 if (t % d == 0 && t > d && m < maxm
2744 && (!cache_hit || cache_alg == alg_sub_factor))
2746 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2747 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2748 op_cost = shiftsub0_cost (speed, mode, m);
2750 op_latency = op_cost;
2752 new_limit.cost = best_cost.cost - op_cost;
2753 new_limit.latency = best_cost.latency - op_latency;
2754 synth_mult (alg_in, t / d, &new_limit, mode);
2756 alg_in->cost.cost += op_cost;
2757 alg_in->cost.latency += op_latency;
2758 if (alg_in->cost.latency < op_cost)
2759 alg_in->cost.latency = op_cost;
2760 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2762 best_cost = alg_in->cost;
2763 std::swap (alg_in, best_alg);
2764 best_alg->log[best_alg->ops] = m;
2765 best_alg->op[best_alg->ops] = alg_sub_factor;
2767 break;
2770 if (cache_hit)
2771 goto done;
2773 /* Try shift-and-add (load effective address) instructions,
2774 i.e. do a*3, a*5, a*9. */
2775 if ((t & 1) != 0)
2777 do_alg_add_t2_m:
2778 q = t - 1;
2779 q = q & -q;
2780 m = exact_log2 (q);
2781 if (m >= 0 && m < maxm)
2783 op_cost = shiftadd_cost (speed, mode, m);
2784 new_limit.cost = best_cost.cost - op_cost;
2785 new_limit.latency = best_cost.latency - op_cost;
2786 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2788 alg_in->cost.cost += op_cost;
2789 alg_in->cost.latency += op_cost;
2790 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2792 best_cost = alg_in->cost;
2793 std::swap (alg_in, best_alg);
2794 best_alg->log[best_alg->ops] = m;
2795 best_alg->op[best_alg->ops] = alg_add_t2_m;
2798 if (cache_hit)
2799 goto done;
2801 do_alg_sub_t2_m:
2802 q = t + 1;
2803 q = q & -q;
2804 m = exact_log2 (q);
2805 if (m >= 0 && m < maxm)
2807 op_cost = shiftsub0_cost (speed, mode, m);
2808 new_limit.cost = best_cost.cost - op_cost;
2809 new_limit.latency = best_cost.latency - op_cost;
2810 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2812 alg_in->cost.cost += op_cost;
2813 alg_in->cost.latency += op_cost;
2814 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2816 best_cost = alg_in->cost;
2817 std::swap (alg_in, best_alg);
2818 best_alg->log[best_alg->ops] = m;
2819 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2822 if (cache_hit)
2823 goto done;
2826 done:
2827 /* If best_cost has not decreased, we have not found any algorithm. */
2828 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2830 /* We failed to find an algorithm. Record alg_impossible for
2831 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2832 we are asked to find an algorithm for T within the same or
2833 lower COST_LIMIT, we can immediately return to the
2834 caller. */
2835 entry_ptr->t = t;
2836 entry_ptr->mode = mode;
2837 entry_ptr->speed = speed;
2838 entry_ptr->alg = alg_impossible;
2839 entry_ptr->cost = *cost_limit;
2840 return;
2843 /* Cache the result. */
2844 if (!cache_hit)
2846 entry_ptr->t = t;
2847 entry_ptr->mode = mode;
2848 entry_ptr->speed = speed;
2849 entry_ptr->alg = best_alg->op[best_alg->ops];
2850 entry_ptr->cost.cost = best_cost.cost;
2851 entry_ptr->cost.latency = best_cost.latency;
2854 /* If we are getting a too long sequence for `struct algorithm'
2855 to record, make this search fail. */
2856 if (best_alg->ops == MAX_BITS_PER_WORD)
2857 return;
2859 /* Copy the algorithm from temporary space to the space at alg_out.
2860 We avoid using structure assignment because the majority of
2861 best_alg is normally undefined, and this is a critical function. */
2862 alg_out->ops = best_alg->ops + 1;
2863 alg_out->cost = best_cost;
2864 memcpy (alg_out->op, best_alg->op,
2865 alg_out->ops * sizeof *alg_out->op);
2866 memcpy (alg_out->log, best_alg->log,
2867 alg_out->ops * sizeof *alg_out->log);
2870 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2871 Try three variations:
2873 - a shift/add sequence based on VAL itself
2874 - a shift/add sequence based on -VAL, followed by a negation
2875 - a shift/add sequence based on VAL - 1, followed by an addition.
2877 Return true if the cheapest of these cost less than MULT_COST,
2878 describing the algorithm in *ALG and final fixup in *VARIANT. */
2880 static bool
2881 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2882 struct algorithm *alg, enum mult_variant *variant,
2883 int mult_cost)
2885 struct algorithm alg2;
2886 struct mult_cost limit;
2887 int op_cost;
2888 bool speed = optimize_insn_for_speed_p ();
2890 /* Fail quickly for impossible bounds. */
2891 if (mult_cost < 0)
2892 return false;
2894 /* Ensure that mult_cost provides a reasonable upper bound.
2895 Any constant multiplication can be performed with less
2896 than 2 * bits additions. */
2897 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2898 if (mult_cost > op_cost)
2899 mult_cost = op_cost;
2901 *variant = basic_variant;
2902 limit.cost = mult_cost;
2903 limit.latency = mult_cost;
2904 synth_mult (alg, val, &limit, mode);
2906 /* This works only if the inverted value actually fits in an
2907 `unsigned int' */
2908 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2910 op_cost = neg_cost (speed, mode);
2911 if (MULT_COST_LESS (&alg->cost, mult_cost))
2913 limit.cost = alg->cost.cost - op_cost;
2914 limit.latency = alg->cost.latency - op_cost;
2916 else
2918 limit.cost = mult_cost - op_cost;
2919 limit.latency = mult_cost - op_cost;
2922 synth_mult (&alg2, -val, &limit, mode);
2923 alg2.cost.cost += op_cost;
2924 alg2.cost.latency += op_cost;
2925 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2926 *alg = alg2, *variant = negate_variant;
2929 /* This proves very useful for division-by-constant. */
2930 op_cost = add_cost (speed, mode);
2931 if (MULT_COST_LESS (&alg->cost, mult_cost))
2933 limit.cost = alg->cost.cost - op_cost;
2934 limit.latency = alg->cost.latency - op_cost;
2936 else
2938 limit.cost = mult_cost - op_cost;
2939 limit.latency = mult_cost - op_cost;
2942 synth_mult (&alg2, val - 1, &limit, mode);
2943 alg2.cost.cost += op_cost;
2944 alg2.cost.latency += op_cost;
2945 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2946 *alg = alg2, *variant = add_variant;
2948 return MULT_COST_LESS (&alg->cost, mult_cost);
2951 /* A subroutine of expand_mult, used for constant multiplications.
2952 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2953 convenient. Use the shift/add sequence described by ALG and apply
2954 the final fixup specified by VARIANT. */
2956 static rtx
2957 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2958 rtx target, const struct algorithm *alg,
2959 enum mult_variant variant)
2961 HOST_WIDE_INT val_so_far;
2962 rtx_insn *insn;
2963 rtx accum, tem;
2964 int opno;
2965 machine_mode nmode;
2967 /* Avoid referencing memory over and over and invalid sharing
2968 on SUBREGs. */
2969 op0 = force_reg (mode, op0);
2971 /* ACCUM starts out either as OP0 or as a zero, depending on
2972 the first operation. */
2974 if (alg->op[0] == alg_zero)
2976 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2977 val_so_far = 0;
2979 else if (alg->op[0] == alg_m)
2981 accum = copy_to_mode_reg (mode, op0);
2982 val_so_far = 1;
2984 else
2985 gcc_unreachable ();
2987 for (opno = 1; opno < alg->ops; opno++)
2989 int log = alg->log[opno];
2990 rtx shift_subtarget = optimize ? 0 : accum;
2991 rtx add_target
2992 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2993 && !optimize)
2994 ? target : 0;
2995 rtx accum_target = optimize ? 0 : accum;
2996 rtx accum_inner;
2998 switch (alg->op[opno])
3000 case alg_shift:
3001 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3002 /* REG_EQUAL note will be attached to the following insn. */
3003 emit_move_insn (accum, tem);
3004 val_so_far <<= log;
3005 break;
3007 case alg_add_t_m2:
3008 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3009 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3010 add_target ? add_target : accum_target);
3011 val_so_far += (HOST_WIDE_INT) 1 << log;
3012 break;
3014 case alg_sub_t_m2:
3015 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3016 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3017 add_target ? add_target : accum_target);
3018 val_so_far -= (HOST_WIDE_INT) 1 << log;
3019 break;
3021 case alg_add_t2_m:
3022 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3023 log, shift_subtarget, 0);
3024 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3025 add_target ? add_target : accum_target);
3026 val_so_far = (val_so_far << log) + 1;
3027 break;
3029 case alg_sub_t2_m:
3030 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3031 log, shift_subtarget, 0);
3032 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3033 add_target ? add_target : accum_target);
3034 val_so_far = (val_so_far << log) - 1;
3035 break;
3037 case alg_add_factor:
3038 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3039 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3040 add_target ? add_target : accum_target);
3041 val_so_far += val_so_far << log;
3042 break;
3044 case alg_sub_factor:
3045 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3046 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3047 (add_target
3048 ? add_target : (optimize ? 0 : tem)));
3049 val_so_far = (val_so_far << log) - val_so_far;
3050 break;
3052 default:
3053 gcc_unreachable ();
3056 if (SCALAR_INT_MODE_P (mode))
3058 /* Write a REG_EQUAL note on the last insn so that we can cse
3059 multiplication sequences. Note that if ACCUM is a SUBREG,
3060 we've set the inner register and must properly indicate that. */
3061 tem = op0, nmode = mode;
3062 accum_inner = accum;
3063 if (GET_CODE (accum) == SUBREG)
3065 accum_inner = SUBREG_REG (accum);
3066 nmode = GET_MODE (accum_inner);
3067 tem = gen_lowpart (nmode, op0);
3070 insn = get_last_insn ();
3071 set_dst_reg_note (insn, REG_EQUAL,
3072 gen_rtx_MULT (nmode, tem,
3073 gen_int_mode (val_so_far, nmode)),
3074 accum_inner);
3078 if (variant == negate_variant)
3080 val_so_far = -val_so_far;
3081 accum = expand_unop (mode, neg_optab, accum, target, 0);
3083 else if (variant == add_variant)
3085 val_so_far = val_so_far + 1;
3086 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3089 /* Compare only the bits of val and val_so_far that are significant
3090 in the result mode, to avoid sign-/zero-extension confusion. */
3091 nmode = GET_MODE_INNER (mode);
3092 if (nmode == VOIDmode)
3093 nmode = mode;
3094 val &= GET_MODE_MASK (nmode);
3095 val_so_far &= GET_MODE_MASK (nmode);
3096 gcc_assert (val == val_so_far);
3098 return accum;
3101 /* Perform a multiplication and return an rtx for the result.
3102 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3103 TARGET is a suggestion for where to store the result (an rtx).
3105 We check specially for a constant integer as OP1.
3106 If you want this check for OP0 as well, then before calling
3107 you should swap the two operands if OP0 would be constant. */
3110 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3111 int unsignedp)
3113 enum mult_variant variant;
3114 struct algorithm algorithm;
3115 rtx scalar_op1;
3116 int max_cost;
3117 bool speed = optimize_insn_for_speed_p ();
3118 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3120 if (CONSTANT_P (op0))
3121 std::swap (op0, op1);
3123 /* For vectors, there are several simplifications that can be made if
3124 all elements of the vector constant are identical. */
3125 scalar_op1 = op1;
3126 if (GET_CODE (op1) == CONST_VECTOR)
3128 int i, n = CONST_VECTOR_NUNITS (op1);
3129 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3130 for (i = 1; i < n; ++i)
3131 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3132 goto skip_scalar;
3135 if (INTEGRAL_MODE_P (mode))
3137 rtx fake_reg;
3138 HOST_WIDE_INT coeff;
3139 bool is_neg;
3140 int mode_bitsize;
3142 if (op1 == CONST0_RTX (mode))
3143 return op1;
3144 if (op1 == CONST1_RTX (mode))
3145 return op0;
3146 if (op1 == CONSTM1_RTX (mode))
3147 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3148 op0, target, 0);
3150 if (do_trapv)
3151 goto skip_synth;
3153 /* If mode is integer vector mode, check if the backend supports
3154 vector lshift (by scalar or vector) at all. If not, we can't use
3155 synthetized multiply. */
3156 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3157 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3158 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3159 goto skip_synth;
3161 /* These are the operations that are potentially turned into
3162 a sequence of shifts and additions. */
3163 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3165 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3166 less than or equal in size to `unsigned int' this doesn't matter.
3167 If the mode is larger than `unsigned int', then synth_mult works
3168 only if the constant value exactly fits in an `unsigned int' without
3169 any truncation. This means that multiplying by negative values does
3170 not work; results are off by 2^32 on a 32 bit machine. */
3171 if (CONST_INT_P (scalar_op1))
3173 coeff = INTVAL (scalar_op1);
3174 is_neg = coeff < 0;
3176 #if TARGET_SUPPORTS_WIDE_INT
3177 else if (CONST_WIDE_INT_P (scalar_op1))
3178 #else
3179 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3180 #endif
3182 int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3183 /* Perfect power of 2 (other than 1, which is handled above). */
3184 if (shift > 0)
3185 return expand_shift (LSHIFT_EXPR, mode, op0,
3186 shift, target, unsignedp);
3187 else
3188 goto skip_synth;
3190 else
3191 goto skip_synth;
3193 /* We used to test optimize here, on the grounds that it's better to
3194 produce a smaller program when -O is not used. But this causes
3195 such a terrible slowdown sometimes that it seems better to always
3196 use synth_mult. */
3198 /* Special case powers of two. */
3199 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3200 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3201 return expand_shift (LSHIFT_EXPR, mode, op0,
3202 floor_log2 (coeff), target, unsignedp);
3204 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3206 /* Attempt to handle multiplication of DImode values by negative
3207 coefficients, by performing the multiplication by a positive
3208 multiplier and then inverting the result. */
3209 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3211 /* Its safe to use -coeff even for INT_MIN, as the
3212 result is interpreted as an unsigned coefficient.
3213 Exclude cost of op0 from max_cost to match the cost
3214 calculation of the synth_mult. */
3215 coeff = -(unsigned HOST_WIDE_INT) coeff;
3216 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3217 - neg_cost (speed, mode));
3218 if (max_cost <= 0)
3219 goto skip_synth;
3221 /* Special case powers of two. */
3222 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3224 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3225 floor_log2 (coeff), target, unsignedp);
3226 return expand_unop (mode, neg_optab, temp, target, 0);
3229 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3230 max_cost))
3232 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3233 &algorithm, variant);
3234 return expand_unop (mode, neg_optab, temp, target, 0);
3236 goto skip_synth;
3239 /* Exclude cost of op0 from max_cost to match the cost
3240 calculation of the synth_mult. */
3241 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3242 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3243 return expand_mult_const (mode, op0, coeff, target,
3244 &algorithm, variant);
3246 skip_synth:
3248 /* Expand x*2.0 as x+x. */
3249 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3251 REAL_VALUE_TYPE d;
3252 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3254 if (REAL_VALUES_EQUAL (d, dconst2))
3256 op0 = force_reg (GET_MODE (op0), op0);
3257 return expand_binop (mode, add_optab, op0, op0,
3258 target, unsignedp, OPTAB_LIB_WIDEN);
3261 skip_scalar:
3263 /* This used to use umul_optab if unsigned, but for non-widening multiply
3264 there is no difference between signed and unsigned. */
3265 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3266 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3267 gcc_assert (op0);
3268 return op0;
3271 /* Return a cost estimate for multiplying a register by the given
3272 COEFFicient in the given MODE and SPEED. */
3275 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3277 int max_cost;
3278 struct algorithm algorithm;
3279 enum mult_variant variant;
3281 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3282 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3283 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3284 return algorithm.cost.cost;
3285 else
3286 return max_cost;
3289 /* Perform a widening multiplication and return an rtx for the result.
3290 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3291 TARGET is a suggestion for where to store the result (an rtx).
3292 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3293 or smul_widen_optab.
3295 We check specially for a constant integer as OP1, comparing the
3296 cost of a widening multiply against the cost of a sequence of shifts
3297 and adds. */
3300 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3301 int unsignedp, optab this_optab)
3303 bool speed = optimize_insn_for_speed_p ();
3304 rtx cop1;
3306 if (CONST_INT_P (op1)
3307 && GET_MODE (op0) != VOIDmode
3308 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3309 this_optab == umul_widen_optab))
3310 && CONST_INT_P (cop1)
3311 && (INTVAL (cop1) >= 0
3312 || HWI_COMPUTABLE_MODE_P (mode)))
3314 HOST_WIDE_INT coeff = INTVAL (cop1);
3315 int max_cost;
3316 enum mult_variant variant;
3317 struct algorithm algorithm;
3319 if (coeff == 0)
3320 return CONST0_RTX (mode);
3322 /* Special case powers of two. */
3323 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3325 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3326 return expand_shift (LSHIFT_EXPR, mode, op0,
3327 floor_log2 (coeff), target, unsignedp);
3330 /* Exclude cost of op0 from max_cost to match the cost
3331 calculation of the synth_mult. */
3332 max_cost = mul_widen_cost (speed, mode);
3333 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3334 max_cost))
3336 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3337 return expand_mult_const (mode, op0, coeff, target,
3338 &algorithm, variant);
3341 return expand_binop (mode, this_optab, op0, op1, target,
3342 unsignedp, OPTAB_LIB_WIDEN);
3345 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3346 replace division by D, and put the least significant N bits of the result
3347 in *MULTIPLIER_PTR and return the most significant bit.
3349 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3350 needed precision is in PRECISION (should be <= N).
3352 PRECISION should be as small as possible so this function can choose
3353 multiplier more freely.
3355 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3356 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3358 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3359 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3361 unsigned HOST_WIDE_INT
3362 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3363 unsigned HOST_WIDE_INT *multiplier_ptr,
3364 int *post_shift_ptr, int *lgup_ptr)
3366 int lgup, post_shift;
3367 int pow, pow2;
3369 /* lgup = ceil(log2(divisor)); */
3370 lgup = ceil_log2 (d);
3372 gcc_assert (lgup <= n);
3374 pow = n + lgup;
3375 pow2 = n + lgup - precision;
3377 /* mlow = 2^(N + lgup)/d */
3378 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3379 wide_int mlow = wi::udiv_trunc (val, d);
3381 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3382 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3383 wide_int mhigh = wi::udiv_trunc (val, d);
3385 /* If precision == N, then mlow, mhigh exceed 2^N
3386 (but they do not exceed 2^(N+1)). */
3388 /* Reduce to lowest terms. */
3389 for (post_shift = lgup; post_shift > 0; post_shift--)
3391 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3392 HOST_BITS_PER_WIDE_INT);
3393 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3394 HOST_BITS_PER_WIDE_INT);
3395 if (ml_lo >= mh_lo)
3396 break;
3398 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3399 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3402 *post_shift_ptr = post_shift;
3403 *lgup_ptr = lgup;
3404 if (n < HOST_BITS_PER_WIDE_INT)
3406 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3407 *multiplier_ptr = mhigh.to_uhwi () & mask;
3408 return mhigh.to_uhwi () >= mask;
3410 else
3412 *multiplier_ptr = mhigh.to_uhwi ();
3413 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3417 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3418 congruent to 1 (mod 2**N). */
3420 static unsigned HOST_WIDE_INT
3421 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3423 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3425 /* The algorithm notes that the choice y = x satisfies
3426 x*y == 1 mod 2^3, since x is assumed odd.
3427 Each iteration doubles the number of bits of significance in y. */
3429 unsigned HOST_WIDE_INT mask;
3430 unsigned HOST_WIDE_INT y = x;
3431 int nbit = 3;
3433 mask = (n == HOST_BITS_PER_WIDE_INT
3434 ? ~(unsigned HOST_WIDE_INT) 0
3435 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3437 while (nbit < n)
3439 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3440 nbit *= 2;
3442 return y;
3445 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3446 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3447 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3448 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3449 become signed.
3451 The result is put in TARGET if that is convenient.
3453 MODE is the mode of operation. */
3456 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3457 rtx op1, rtx target, int unsignedp)
3459 rtx tem;
3460 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3462 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3463 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3464 tem = expand_and (mode, tem, op1, NULL_RTX);
3465 adj_operand
3466 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3467 adj_operand);
3469 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3470 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3471 tem = expand_and (mode, tem, op0, NULL_RTX);
3472 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3473 target);
3475 return target;
3478 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3480 static rtx
3481 extract_high_half (machine_mode mode, rtx op)
3483 machine_mode wider_mode;
3485 if (mode == word_mode)
3486 return gen_highpart (mode, op);
3488 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3490 wider_mode = GET_MODE_WIDER_MODE (mode);
3491 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3492 GET_MODE_BITSIZE (mode), 0, 1);
3493 return convert_modes (mode, wider_mode, op, 0);
3496 /* Like expmed_mult_highpart, but only consider using a multiplication
3497 optab. OP1 is an rtx for the constant operand. */
3499 static rtx
3500 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3501 rtx target, int unsignedp, int max_cost)
3503 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3504 machine_mode wider_mode;
3505 optab moptab;
3506 rtx tem;
3507 int size;
3508 bool speed = optimize_insn_for_speed_p ();
3510 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3512 wider_mode = GET_MODE_WIDER_MODE (mode);
3513 size = GET_MODE_BITSIZE (mode);
3515 /* Firstly, try using a multiplication insn that only generates the needed
3516 high part of the product, and in the sign flavor of unsignedp. */
3517 if (mul_highpart_cost (speed, mode) < max_cost)
3519 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3520 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3521 unsignedp, OPTAB_DIRECT);
3522 if (tem)
3523 return tem;
3526 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3527 Need to adjust the result after the multiplication. */
3528 if (size - 1 < BITS_PER_WORD
3529 && (mul_highpart_cost (speed, mode)
3530 + 2 * shift_cost (speed, mode, size-1)
3531 + 4 * add_cost (speed, mode) < max_cost))
3533 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3534 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3535 unsignedp, OPTAB_DIRECT);
3536 if (tem)
3537 /* We used the wrong signedness. Adjust the result. */
3538 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3539 tem, unsignedp);
3542 /* Try widening multiplication. */
3543 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3544 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3545 && mul_widen_cost (speed, wider_mode) < max_cost)
3547 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3548 unsignedp, OPTAB_WIDEN);
3549 if (tem)
3550 return extract_high_half (mode, tem);
3553 /* Try widening the mode and perform a non-widening multiplication. */
3554 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3555 && size - 1 < BITS_PER_WORD
3556 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3557 < max_cost))
3559 rtx_insn *insns;
3560 rtx wop0, wop1;
3562 /* We need to widen the operands, for example to ensure the
3563 constant multiplier is correctly sign or zero extended.
3564 Use a sequence to clean-up any instructions emitted by
3565 the conversions if things don't work out. */
3566 start_sequence ();
3567 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3568 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3569 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3570 unsignedp, OPTAB_WIDEN);
3571 insns = get_insns ();
3572 end_sequence ();
3574 if (tem)
3576 emit_insn (insns);
3577 return extract_high_half (mode, tem);
3581 /* Try widening multiplication of opposite signedness, and adjust. */
3582 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3583 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3584 && size - 1 < BITS_PER_WORD
3585 && (mul_widen_cost (speed, wider_mode)
3586 + 2 * shift_cost (speed, mode, size-1)
3587 + 4 * add_cost (speed, mode) < max_cost))
3589 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3590 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3591 if (tem != 0)
3593 tem = extract_high_half (mode, tem);
3594 /* We used the wrong signedness. Adjust the result. */
3595 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3596 target, unsignedp);
3600 return 0;
3603 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3604 putting the high half of the result in TARGET if that is convenient,
3605 and return where the result is. If the operation can not be performed,
3606 0 is returned.
3608 MODE is the mode of operation and result.
3610 UNSIGNEDP nonzero means unsigned multiply.
3612 MAX_COST is the total allowed cost for the expanded RTL. */
3614 static rtx
3615 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3616 rtx target, int unsignedp, int max_cost)
3618 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3619 unsigned HOST_WIDE_INT cnst1;
3620 int extra_cost;
3621 bool sign_adjust = false;
3622 enum mult_variant variant;
3623 struct algorithm alg;
3624 rtx tem;
3625 bool speed = optimize_insn_for_speed_p ();
3627 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3628 /* We can't support modes wider than HOST_BITS_PER_INT. */
3629 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3631 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3633 /* We can't optimize modes wider than BITS_PER_WORD.
3634 ??? We might be able to perform double-word arithmetic if
3635 mode == word_mode, however all the cost calculations in
3636 synth_mult etc. assume single-word operations. */
3637 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3638 return expmed_mult_highpart_optab (mode, op0, op1, target,
3639 unsignedp, max_cost);
3641 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3643 /* Check whether we try to multiply by a negative constant. */
3644 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3646 sign_adjust = true;
3647 extra_cost += add_cost (speed, mode);
3650 /* See whether shift/add multiplication is cheap enough. */
3651 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3652 max_cost - extra_cost))
3654 /* See whether the specialized multiplication optabs are
3655 cheaper than the shift/add version. */
3656 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3657 alg.cost.cost + extra_cost);
3658 if (tem)
3659 return tem;
3661 tem = convert_to_mode (wider_mode, op0, unsignedp);
3662 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3663 tem = extract_high_half (mode, tem);
3665 /* Adjust result for signedness. */
3666 if (sign_adjust)
3667 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3669 return tem;
3671 return expmed_mult_highpart_optab (mode, op0, op1, target,
3672 unsignedp, max_cost);
3676 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3678 static rtx
3679 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3681 rtx result, temp, shift;
3682 rtx_code_label *label;
3683 int logd;
3684 int prec = GET_MODE_PRECISION (mode);
3686 logd = floor_log2 (d);
3687 result = gen_reg_rtx (mode);
3689 /* Avoid conditional branches when they're expensive. */
3690 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3691 && optimize_insn_for_speed_p ())
3693 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3694 mode, 0, -1);
3695 if (signmask)
3697 HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3698 signmask = force_reg (mode, signmask);
3699 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3701 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3702 which instruction sequence to use. If logical right shifts
3703 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3704 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3706 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3707 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3708 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3709 > COSTS_N_INSNS (2)))
3711 temp = expand_binop (mode, xor_optab, op0, signmask,
3712 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3713 temp = expand_binop (mode, sub_optab, temp, signmask,
3714 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3715 temp = expand_binop (mode, and_optab, temp,
3716 gen_int_mode (masklow, mode),
3717 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3718 temp = expand_binop (mode, xor_optab, temp, signmask,
3719 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3720 temp = expand_binop (mode, sub_optab, temp, signmask,
3721 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3723 else
3725 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3726 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3727 signmask = force_reg (mode, signmask);
3729 temp = expand_binop (mode, add_optab, op0, signmask,
3730 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3731 temp = expand_binop (mode, and_optab, temp,
3732 gen_int_mode (masklow, mode),
3733 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3734 temp = expand_binop (mode, sub_optab, temp, signmask,
3735 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3737 return temp;
3741 /* Mask contains the mode's signbit and the significant bits of the
3742 modulus. By including the signbit in the operation, many targets
3743 can avoid an explicit compare operation in the following comparison
3744 against zero. */
3745 wide_int mask = wi::mask (logd, false, prec);
3746 mask = wi::set_bit (mask, prec - 1);
3748 temp = expand_binop (mode, and_optab, op0,
3749 immed_wide_int_const (mask, mode),
3750 result, 1, OPTAB_LIB_WIDEN);
3751 if (temp != result)
3752 emit_move_insn (result, temp);
3754 label = gen_label_rtx ();
3755 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3757 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3758 0, OPTAB_LIB_WIDEN);
3760 mask = wi::mask (logd, true, prec);
3761 temp = expand_binop (mode, ior_optab, temp,
3762 immed_wide_int_const (mask, mode),
3763 result, 1, OPTAB_LIB_WIDEN);
3764 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3765 0, OPTAB_LIB_WIDEN);
3766 if (temp != result)
3767 emit_move_insn (result, temp);
3768 emit_label (label);
3769 return result;
3772 /* Expand signed division of OP0 by a power of two D in mode MODE.
3773 This routine is only called for positive values of D. */
3775 static rtx
3776 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3778 rtx temp;
3779 rtx_code_label *label;
3780 int logd;
3782 logd = floor_log2 (d);
3784 if (d == 2
3785 && BRANCH_COST (optimize_insn_for_speed_p (),
3786 false) >= 1)
3788 temp = gen_reg_rtx (mode);
3789 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3790 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3791 0, OPTAB_LIB_WIDEN);
3792 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3795 if (HAVE_conditional_move
3796 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3798 rtx temp2;
3800 start_sequence ();
3801 temp2 = copy_to_mode_reg (mode, op0);
3802 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3803 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3804 temp = force_reg (mode, temp);
3806 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3807 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3808 mode, temp, temp2, mode, 0);
3809 if (temp2)
3811 rtx_insn *seq = get_insns ();
3812 end_sequence ();
3813 emit_insn (seq);
3814 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3816 end_sequence ();
3819 if (BRANCH_COST (optimize_insn_for_speed_p (),
3820 false) >= 2)
3822 int ushift = GET_MODE_BITSIZE (mode) - logd;
3824 temp = gen_reg_rtx (mode);
3825 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3826 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3827 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3828 > COSTS_N_INSNS (1))
3829 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3830 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3831 else
3832 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3833 ushift, NULL_RTX, 1);
3834 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3835 0, OPTAB_LIB_WIDEN);
3836 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3839 label = gen_label_rtx ();
3840 temp = copy_to_mode_reg (mode, op0);
3841 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3842 expand_inc (temp, gen_int_mode (d - 1, mode));
3843 emit_label (label);
3844 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3847 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3848 if that is convenient, and returning where the result is.
3849 You may request either the quotient or the remainder as the result;
3850 specify REM_FLAG nonzero to get the remainder.
3852 CODE is the expression code for which kind of division this is;
3853 it controls how rounding is done. MODE is the machine mode to use.
3854 UNSIGNEDP nonzero means do unsigned division. */
3856 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3857 and then correct it by or'ing in missing high bits
3858 if result of ANDI is nonzero.
3859 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3860 This could optimize to a bfexts instruction.
3861 But C doesn't use these operations, so their optimizations are
3862 left for later. */
3863 /* ??? For modulo, we don't actually need the highpart of the first product,
3864 the low part will do nicely. And for small divisors, the second multiply
3865 can also be a low-part only multiply or even be completely left out.
3866 E.g. to calculate the remainder of a division by 3 with a 32 bit
3867 multiply, multiply with 0x55555556 and extract the upper two bits;
3868 the result is exact for inputs up to 0x1fffffff.
3869 The input range can be reduced by using cross-sum rules.
3870 For odd divisors >= 3, the following table gives right shift counts
3871 so that if a number is shifted by an integer multiple of the given
3872 amount, the remainder stays the same:
3873 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3874 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3875 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3876 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3877 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3879 Cross-sum rules for even numbers can be derived by leaving as many bits
3880 to the right alone as the divisor has zeros to the right.
3881 E.g. if x is an unsigned 32 bit number:
3882 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3886 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3887 rtx op0, rtx op1, rtx target, int unsignedp)
3889 machine_mode compute_mode;
3890 rtx tquotient;
3891 rtx quotient = 0, remainder = 0;
3892 rtx_insn *last;
3893 int size;
3894 rtx_insn *insn;
3895 optab optab1, optab2;
3896 int op1_is_constant, op1_is_pow2 = 0;
3897 int max_cost, extra_cost;
3898 static HOST_WIDE_INT last_div_const = 0;
3899 bool speed = optimize_insn_for_speed_p ();
3901 op1_is_constant = CONST_INT_P (op1);
3902 if (op1_is_constant)
3904 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3905 if (unsignedp)
3906 ext_op1 &= GET_MODE_MASK (mode);
3907 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3908 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3912 This is the structure of expand_divmod:
3914 First comes code to fix up the operands so we can perform the operations
3915 correctly and efficiently.
3917 Second comes a switch statement with code specific for each rounding mode.
3918 For some special operands this code emits all RTL for the desired
3919 operation, for other cases, it generates only a quotient and stores it in
3920 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3921 to indicate that it has not done anything.
3923 Last comes code that finishes the operation. If QUOTIENT is set and
3924 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3925 QUOTIENT is not set, it is computed using trunc rounding.
3927 We try to generate special code for division and remainder when OP1 is a
3928 constant. If |OP1| = 2**n we can use shifts and some other fast
3929 operations. For other values of OP1, we compute a carefully selected
3930 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3931 by m.
3933 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3934 half of the product. Different strategies for generating the product are
3935 implemented in expmed_mult_highpart.
3937 If what we actually want is the remainder, we generate that by another
3938 by-constant multiplication and a subtraction. */
3940 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3941 code below will malfunction if we are, so check here and handle
3942 the special case if so. */
3943 if (op1 == const1_rtx)
3944 return rem_flag ? const0_rtx : op0;
3946 /* When dividing by -1, we could get an overflow.
3947 negv_optab can handle overflows. */
3948 if (! unsignedp && op1 == constm1_rtx)
3950 if (rem_flag)
3951 return const0_rtx;
3952 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3953 ? negv_optab : neg_optab, op0, target, 0);
3956 if (target
3957 /* Don't use the function value register as a target
3958 since we have to read it as well as write it,
3959 and function-inlining gets confused by this. */
3960 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3961 /* Don't clobber an operand while doing a multi-step calculation. */
3962 || ((rem_flag || op1_is_constant)
3963 && (reg_mentioned_p (target, op0)
3964 || (MEM_P (op0) && MEM_P (target))))
3965 || reg_mentioned_p (target, op1)
3966 || (MEM_P (op1) && MEM_P (target))))
3967 target = 0;
3969 /* Get the mode in which to perform this computation. Normally it will
3970 be MODE, but sometimes we can't do the desired operation in MODE.
3971 If so, pick a wider mode in which we can do the operation. Convert
3972 to that mode at the start to avoid repeated conversions.
3974 First see what operations we need. These depend on the expression
3975 we are evaluating. (We assume that divxx3 insns exist under the
3976 same conditions that modxx3 insns and that these insns don't normally
3977 fail. If these assumptions are not correct, we may generate less
3978 efficient code in some cases.)
3980 Then see if we find a mode in which we can open-code that operation
3981 (either a division, modulus, or shift). Finally, check for the smallest
3982 mode for which we can do the operation with a library call. */
3984 /* We might want to refine this now that we have division-by-constant
3985 optimization. Since expmed_mult_highpart tries so many variants, it is
3986 not straightforward to generalize this. Maybe we should make an array
3987 of possible modes in init_expmed? Save this for GCC 2.7. */
3989 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3990 ? (unsignedp ? lshr_optab : ashr_optab)
3991 : (unsignedp ? udiv_optab : sdiv_optab));
3992 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3993 ? optab1
3994 : (unsignedp ? udivmod_optab : sdivmod_optab));
3996 for (compute_mode = mode; compute_mode != VOIDmode;
3997 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3998 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3999 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4000 break;
4002 if (compute_mode == VOIDmode)
4003 for (compute_mode = mode; compute_mode != VOIDmode;
4004 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4005 if (optab_libfunc (optab1, compute_mode)
4006 || optab_libfunc (optab2, compute_mode))
4007 break;
4009 /* If we still couldn't find a mode, use MODE, but expand_binop will
4010 probably die. */
4011 if (compute_mode == VOIDmode)
4012 compute_mode = mode;
4014 if (target && GET_MODE (target) == compute_mode)
4015 tquotient = target;
4016 else
4017 tquotient = gen_reg_rtx (compute_mode);
4019 size = GET_MODE_BITSIZE (compute_mode);
4020 #if 0
4021 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4022 (mode), and thereby get better code when OP1 is a constant. Do that
4023 later. It will require going over all usages of SIZE below. */
4024 size = GET_MODE_BITSIZE (mode);
4025 #endif
4027 /* Only deduct something for a REM if the last divide done was
4028 for a different constant. Then set the constant of the last
4029 divide. */
4030 max_cost = (unsignedp
4031 ? udiv_cost (speed, compute_mode)
4032 : sdiv_cost (speed, compute_mode));
4033 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4034 && INTVAL (op1) == last_div_const))
4035 max_cost -= (mul_cost (speed, compute_mode)
4036 + add_cost (speed, compute_mode));
4038 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4040 /* Now convert to the best mode to use. */
4041 if (compute_mode != mode)
4043 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4044 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4046 /* convert_modes may have placed op1 into a register, so we
4047 must recompute the following. */
4048 op1_is_constant = CONST_INT_P (op1);
4049 op1_is_pow2 = (op1_is_constant
4050 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4051 || (! unsignedp
4052 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4055 /* If one of the operands is a volatile MEM, copy it into a register. */
4057 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4058 op0 = force_reg (compute_mode, op0);
4059 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4060 op1 = force_reg (compute_mode, op1);
4062 /* If we need the remainder or if OP1 is constant, we need to
4063 put OP0 in a register in case it has any queued subexpressions. */
4064 if (rem_flag || op1_is_constant)
4065 op0 = force_reg (compute_mode, op0);
4067 last = get_last_insn ();
4069 /* Promote floor rounding to trunc rounding for unsigned operations. */
4070 if (unsignedp)
4072 if (code == FLOOR_DIV_EXPR)
4073 code = TRUNC_DIV_EXPR;
4074 if (code == FLOOR_MOD_EXPR)
4075 code = TRUNC_MOD_EXPR;
4076 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4077 code = TRUNC_DIV_EXPR;
4080 if (op1 != const0_rtx)
4081 switch (code)
4083 case TRUNC_MOD_EXPR:
4084 case TRUNC_DIV_EXPR:
4085 if (op1_is_constant)
4087 if (unsignedp)
4089 unsigned HOST_WIDE_INT mh, ml;
4090 int pre_shift, post_shift;
4091 int dummy;
4092 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4093 & GET_MODE_MASK (compute_mode));
4095 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4097 pre_shift = floor_log2 (d);
4098 if (rem_flag)
4100 unsigned HOST_WIDE_INT mask
4101 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4102 remainder
4103 = expand_binop (compute_mode, and_optab, op0,
4104 gen_int_mode (mask, compute_mode),
4105 remainder, 1,
4106 OPTAB_LIB_WIDEN);
4107 if (remainder)
4108 return gen_lowpart (mode, remainder);
4110 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4111 pre_shift, tquotient, 1);
4113 else if (size <= HOST_BITS_PER_WIDE_INT)
4115 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4117 /* Most significant bit of divisor is set; emit an scc
4118 insn. */
4119 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4120 compute_mode, 1, 1);
4122 else
4124 /* Find a suitable multiplier and right shift count
4125 instead of multiplying with D. */
4127 mh = choose_multiplier (d, size, size,
4128 &ml, &post_shift, &dummy);
4130 /* If the suggested multiplier is more than SIZE bits,
4131 we can do better for even divisors, using an
4132 initial right shift. */
4133 if (mh != 0 && (d & 1) == 0)
4135 pre_shift = floor_log2 (d & -d);
4136 mh = choose_multiplier (d >> pre_shift, size,
4137 size - pre_shift,
4138 &ml, &post_shift, &dummy);
4139 gcc_assert (!mh);
4141 else
4142 pre_shift = 0;
4144 if (mh != 0)
4146 rtx t1, t2, t3, t4;
4148 if (post_shift - 1 >= BITS_PER_WORD)
4149 goto fail1;
4151 extra_cost
4152 = (shift_cost (speed, compute_mode, post_shift - 1)
4153 + shift_cost (speed, compute_mode, 1)
4154 + 2 * add_cost (speed, compute_mode));
4155 t1 = expmed_mult_highpart
4156 (compute_mode, op0,
4157 gen_int_mode (ml, compute_mode),
4158 NULL_RTX, 1, max_cost - extra_cost);
4159 if (t1 == 0)
4160 goto fail1;
4161 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4162 op0, t1),
4163 NULL_RTX);
4164 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4165 t2, 1, NULL_RTX, 1);
4166 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4167 t1, t3),
4168 NULL_RTX);
4169 quotient = expand_shift
4170 (RSHIFT_EXPR, compute_mode, t4,
4171 post_shift - 1, tquotient, 1);
4173 else
4175 rtx t1, t2;
4177 if (pre_shift >= BITS_PER_WORD
4178 || post_shift >= BITS_PER_WORD)
4179 goto fail1;
4181 t1 = expand_shift
4182 (RSHIFT_EXPR, compute_mode, op0,
4183 pre_shift, NULL_RTX, 1);
4184 extra_cost
4185 = (shift_cost (speed, compute_mode, pre_shift)
4186 + shift_cost (speed, compute_mode, post_shift));
4187 t2 = expmed_mult_highpart
4188 (compute_mode, t1,
4189 gen_int_mode (ml, compute_mode),
4190 NULL_RTX, 1, max_cost - extra_cost);
4191 if (t2 == 0)
4192 goto fail1;
4193 quotient = expand_shift
4194 (RSHIFT_EXPR, compute_mode, t2,
4195 post_shift, tquotient, 1);
4199 else /* Too wide mode to use tricky code */
4200 break;
4202 insn = get_last_insn ();
4203 if (insn != last)
4204 set_dst_reg_note (insn, REG_EQUAL,
4205 gen_rtx_UDIV (compute_mode, op0, op1),
4206 quotient);
4208 else /* TRUNC_DIV, signed */
4210 unsigned HOST_WIDE_INT ml;
4211 int lgup, post_shift;
4212 rtx mlr;
4213 HOST_WIDE_INT d = INTVAL (op1);
4214 unsigned HOST_WIDE_INT abs_d;
4216 /* Since d might be INT_MIN, we have to cast to
4217 unsigned HOST_WIDE_INT before negating to avoid
4218 undefined signed overflow. */
4219 abs_d = (d >= 0
4220 ? (unsigned HOST_WIDE_INT) d
4221 : - (unsigned HOST_WIDE_INT) d);
4223 /* n rem d = n rem -d */
4224 if (rem_flag && d < 0)
4226 d = abs_d;
4227 op1 = gen_int_mode (abs_d, compute_mode);
4230 if (d == 1)
4231 quotient = op0;
4232 else if (d == -1)
4233 quotient = expand_unop (compute_mode, neg_optab, op0,
4234 tquotient, 0);
4235 else if (HOST_BITS_PER_WIDE_INT >= size
4236 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4238 /* This case is not handled correctly below. */
4239 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4240 compute_mode, 1, 1);
4241 if (quotient == 0)
4242 goto fail1;
4244 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4245 && (rem_flag
4246 ? smod_pow2_cheap (speed, compute_mode)
4247 : sdiv_pow2_cheap (speed, compute_mode))
4248 /* We assume that cheap metric is true if the
4249 optab has an expander for this mode. */
4250 && ((optab_handler ((rem_flag ? smod_optab
4251 : sdiv_optab),
4252 compute_mode)
4253 != CODE_FOR_nothing)
4254 || (optab_handler (sdivmod_optab,
4255 compute_mode)
4256 != CODE_FOR_nothing)))
4258 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4260 if (rem_flag)
4262 remainder = expand_smod_pow2 (compute_mode, op0, d);
4263 if (remainder)
4264 return gen_lowpart (mode, remainder);
4267 if (sdiv_pow2_cheap (speed, compute_mode)
4268 && ((optab_handler (sdiv_optab, compute_mode)
4269 != CODE_FOR_nothing)
4270 || (optab_handler (sdivmod_optab, compute_mode)
4271 != CODE_FOR_nothing)))
4272 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4273 compute_mode, op0,
4274 gen_int_mode (abs_d,
4275 compute_mode),
4276 NULL_RTX, 0);
4277 else
4278 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4280 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4281 negate the quotient. */
4282 if (d < 0)
4284 insn = get_last_insn ();
4285 if (insn != last
4286 && abs_d < ((unsigned HOST_WIDE_INT) 1
4287 << (HOST_BITS_PER_WIDE_INT - 1)))
4288 set_dst_reg_note (insn, REG_EQUAL,
4289 gen_rtx_DIV (compute_mode, op0,
4290 gen_int_mode
4291 (abs_d,
4292 compute_mode)),
4293 quotient);
4295 quotient = expand_unop (compute_mode, neg_optab,
4296 quotient, quotient, 0);
4299 else if (size <= HOST_BITS_PER_WIDE_INT)
4301 choose_multiplier (abs_d, size, size - 1,
4302 &ml, &post_shift, &lgup);
4303 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4305 rtx t1, t2, t3;
4307 if (post_shift >= BITS_PER_WORD
4308 || size - 1 >= BITS_PER_WORD)
4309 goto fail1;
4311 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4312 + shift_cost (speed, compute_mode, size - 1)
4313 + add_cost (speed, compute_mode));
4314 t1 = expmed_mult_highpart
4315 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4316 NULL_RTX, 0, max_cost - extra_cost);
4317 if (t1 == 0)
4318 goto fail1;
4319 t2 = expand_shift
4320 (RSHIFT_EXPR, compute_mode, t1,
4321 post_shift, NULL_RTX, 0);
4322 t3 = expand_shift
4323 (RSHIFT_EXPR, compute_mode, op0,
4324 size - 1, NULL_RTX, 0);
4325 if (d < 0)
4326 quotient
4327 = force_operand (gen_rtx_MINUS (compute_mode,
4328 t3, t2),
4329 tquotient);
4330 else
4331 quotient
4332 = force_operand (gen_rtx_MINUS (compute_mode,
4333 t2, t3),
4334 tquotient);
4336 else
4338 rtx t1, t2, t3, t4;
4340 if (post_shift >= BITS_PER_WORD
4341 || size - 1 >= BITS_PER_WORD)
4342 goto fail1;
4344 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4345 mlr = gen_int_mode (ml, compute_mode);
4346 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4347 + shift_cost (speed, compute_mode, size - 1)
4348 + 2 * add_cost (speed, compute_mode));
4349 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4350 NULL_RTX, 0,
4351 max_cost - extra_cost);
4352 if (t1 == 0)
4353 goto fail1;
4354 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4355 t1, op0),
4356 NULL_RTX);
4357 t3 = expand_shift
4358 (RSHIFT_EXPR, compute_mode, t2,
4359 post_shift, NULL_RTX, 0);
4360 t4 = expand_shift
4361 (RSHIFT_EXPR, compute_mode, op0,
4362 size - 1, NULL_RTX, 0);
4363 if (d < 0)
4364 quotient
4365 = force_operand (gen_rtx_MINUS (compute_mode,
4366 t4, t3),
4367 tquotient);
4368 else
4369 quotient
4370 = force_operand (gen_rtx_MINUS (compute_mode,
4371 t3, t4),
4372 tquotient);
4375 else /* Too wide mode to use tricky code */
4376 break;
4378 insn = get_last_insn ();
4379 if (insn != last)
4380 set_dst_reg_note (insn, REG_EQUAL,
4381 gen_rtx_DIV (compute_mode, op0, op1),
4382 quotient);
4384 break;
4386 fail1:
4387 delete_insns_since (last);
4388 break;
4390 case FLOOR_DIV_EXPR:
4391 case FLOOR_MOD_EXPR:
4392 /* We will come here only for signed operations. */
4393 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4395 unsigned HOST_WIDE_INT mh, ml;
4396 int pre_shift, lgup, post_shift;
4397 HOST_WIDE_INT d = INTVAL (op1);
4399 if (d > 0)
4401 /* We could just as easily deal with negative constants here,
4402 but it does not seem worth the trouble for GCC 2.6. */
4403 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4405 pre_shift = floor_log2 (d);
4406 if (rem_flag)
4408 unsigned HOST_WIDE_INT mask
4409 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4410 remainder = expand_binop
4411 (compute_mode, and_optab, op0,
4412 gen_int_mode (mask, compute_mode),
4413 remainder, 0, OPTAB_LIB_WIDEN);
4414 if (remainder)
4415 return gen_lowpart (mode, remainder);
4417 quotient = expand_shift
4418 (RSHIFT_EXPR, compute_mode, op0,
4419 pre_shift, tquotient, 0);
4421 else
4423 rtx t1, t2, t3, t4;
4425 mh = choose_multiplier (d, size, size - 1,
4426 &ml, &post_shift, &lgup);
4427 gcc_assert (!mh);
4429 if (post_shift < BITS_PER_WORD
4430 && size - 1 < BITS_PER_WORD)
4432 t1 = expand_shift
4433 (RSHIFT_EXPR, compute_mode, op0,
4434 size - 1, NULL_RTX, 0);
4435 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4436 NULL_RTX, 0, OPTAB_WIDEN);
4437 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4438 + shift_cost (speed, compute_mode, size - 1)
4439 + 2 * add_cost (speed, compute_mode));
4440 t3 = expmed_mult_highpart
4441 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4442 NULL_RTX, 1, max_cost - extra_cost);
4443 if (t3 != 0)
4445 t4 = expand_shift
4446 (RSHIFT_EXPR, compute_mode, t3,
4447 post_shift, NULL_RTX, 1);
4448 quotient = expand_binop (compute_mode, xor_optab,
4449 t4, t1, tquotient, 0,
4450 OPTAB_WIDEN);
4455 else
4457 rtx nsign, t1, t2, t3, t4;
4458 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4459 op0, constm1_rtx), NULL_RTX);
4460 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4461 0, OPTAB_WIDEN);
4462 nsign = expand_shift
4463 (RSHIFT_EXPR, compute_mode, t2,
4464 size - 1, NULL_RTX, 0);
4465 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4466 NULL_RTX);
4467 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4468 NULL_RTX, 0);
4469 if (t4)
4471 rtx t5;
4472 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4473 NULL_RTX, 0);
4474 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4475 t4, t5),
4476 tquotient);
4481 if (quotient != 0)
4482 break;
4483 delete_insns_since (last);
4485 /* Try using an instruction that produces both the quotient and
4486 remainder, using truncation. We can easily compensate the quotient
4487 or remainder to get floor rounding, once we have the remainder.
4488 Notice that we compute also the final remainder value here,
4489 and return the result right away. */
4490 if (target == 0 || GET_MODE (target) != compute_mode)
4491 target = gen_reg_rtx (compute_mode);
4493 if (rem_flag)
4495 remainder
4496 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4497 quotient = gen_reg_rtx (compute_mode);
4499 else
4501 quotient
4502 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4503 remainder = gen_reg_rtx (compute_mode);
4506 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4507 quotient, remainder, 0))
4509 /* This could be computed with a branch-less sequence.
4510 Save that for later. */
4511 rtx tem;
4512 rtx_code_label *label = gen_label_rtx ();
4513 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4514 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4515 NULL_RTX, 0, OPTAB_WIDEN);
4516 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4517 expand_dec (quotient, const1_rtx);
4518 expand_inc (remainder, op1);
4519 emit_label (label);
4520 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4523 /* No luck with division elimination or divmod. Have to do it
4524 by conditionally adjusting op0 *and* the result. */
4526 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4527 rtx adjusted_op0;
4528 rtx tem;
4530 quotient = gen_reg_rtx (compute_mode);
4531 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4532 label1 = gen_label_rtx ();
4533 label2 = gen_label_rtx ();
4534 label3 = gen_label_rtx ();
4535 label4 = gen_label_rtx ();
4536 label5 = gen_label_rtx ();
4537 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4538 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4539 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4540 quotient, 0, OPTAB_LIB_WIDEN);
4541 if (tem != quotient)
4542 emit_move_insn (quotient, tem);
4543 emit_jump_insn (gen_jump (label5));
4544 emit_barrier ();
4545 emit_label (label1);
4546 expand_inc (adjusted_op0, const1_rtx);
4547 emit_jump_insn (gen_jump (label4));
4548 emit_barrier ();
4549 emit_label (label2);
4550 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4551 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4552 quotient, 0, OPTAB_LIB_WIDEN);
4553 if (tem != quotient)
4554 emit_move_insn (quotient, tem);
4555 emit_jump_insn (gen_jump (label5));
4556 emit_barrier ();
4557 emit_label (label3);
4558 expand_dec (adjusted_op0, const1_rtx);
4559 emit_label (label4);
4560 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4561 quotient, 0, OPTAB_LIB_WIDEN);
4562 if (tem != quotient)
4563 emit_move_insn (quotient, tem);
4564 expand_dec (quotient, const1_rtx);
4565 emit_label (label5);
4567 break;
4569 case CEIL_DIV_EXPR:
4570 case CEIL_MOD_EXPR:
4571 if (unsignedp)
4573 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4575 rtx t1, t2, t3;
4576 unsigned HOST_WIDE_INT d = INTVAL (op1);
4577 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4578 floor_log2 (d), tquotient, 1);
4579 t2 = expand_binop (compute_mode, and_optab, op0,
4580 gen_int_mode (d - 1, compute_mode),
4581 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4582 t3 = gen_reg_rtx (compute_mode);
4583 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4584 compute_mode, 1, 1);
4585 if (t3 == 0)
4587 rtx_code_label *lab;
4588 lab = gen_label_rtx ();
4589 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4590 expand_inc (t1, const1_rtx);
4591 emit_label (lab);
4592 quotient = t1;
4594 else
4595 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4596 t1, t3),
4597 tquotient);
4598 break;
4601 /* Try using an instruction that produces both the quotient and
4602 remainder, using truncation. We can easily compensate the
4603 quotient or remainder to get ceiling rounding, once we have the
4604 remainder. Notice that we compute also the final remainder
4605 value here, and return the result right away. */
4606 if (target == 0 || GET_MODE (target) != compute_mode)
4607 target = gen_reg_rtx (compute_mode);
4609 if (rem_flag)
4611 remainder = (REG_P (target)
4612 ? target : gen_reg_rtx (compute_mode));
4613 quotient = gen_reg_rtx (compute_mode);
4615 else
4617 quotient = (REG_P (target)
4618 ? target : gen_reg_rtx (compute_mode));
4619 remainder = gen_reg_rtx (compute_mode);
4622 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4623 remainder, 1))
4625 /* This could be computed with a branch-less sequence.
4626 Save that for later. */
4627 rtx_code_label *label = gen_label_rtx ();
4628 do_cmp_and_jump (remainder, const0_rtx, EQ,
4629 compute_mode, label);
4630 expand_inc (quotient, const1_rtx);
4631 expand_dec (remainder, op1);
4632 emit_label (label);
4633 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4636 /* No luck with division elimination or divmod. Have to do it
4637 by conditionally adjusting op0 *and* the result. */
4639 rtx_code_label *label1, *label2;
4640 rtx adjusted_op0, tem;
4642 quotient = gen_reg_rtx (compute_mode);
4643 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4644 label1 = gen_label_rtx ();
4645 label2 = gen_label_rtx ();
4646 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4647 compute_mode, label1);
4648 emit_move_insn (quotient, const0_rtx);
4649 emit_jump_insn (gen_jump (label2));
4650 emit_barrier ();
4651 emit_label (label1);
4652 expand_dec (adjusted_op0, const1_rtx);
4653 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4654 quotient, 1, OPTAB_LIB_WIDEN);
4655 if (tem != quotient)
4656 emit_move_insn (quotient, tem);
4657 expand_inc (quotient, const1_rtx);
4658 emit_label (label2);
4661 else /* signed */
4663 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4664 && INTVAL (op1) >= 0)
4666 /* This is extremely similar to the code for the unsigned case
4667 above. For 2.7 we should merge these variants, but for
4668 2.6.1 I don't want to touch the code for unsigned since that
4669 get used in C. The signed case will only be used by other
4670 languages (Ada). */
4672 rtx t1, t2, t3;
4673 unsigned HOST_WIDE_INT d = INTVAL (op1);
4674 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4675 floor_log2 (d), tquotient, 0);
4676 t2 = expand_binop (compute_mode, and_optab, op0,
4677 gen_int_mode (d - 1, compute_mode),
4678 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4679 t3 = gen_reg_rtx (compute_mode);
4680 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4681 compute_mode, 1, 1);
4682 if (t3 == 0)
4684 rtx_code_label *lab;
4685 lab = gen_label_rtx ();
4686 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4687 expand_inc (t1, const1_rtx);
4688 emit_label (lab);
4689 quotient = t1;
4691 else
4692 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4693 t1, t3),
4694 tquotient);
4695 break;
4698 /* Try using an instruction that produces both the quotient and
4699 remainder, using truncation. We can easily compensate the
4700 quotient or remainder to get ceiling rounding, once we have the
4701 remainder. Notice that we compute also the final remainder
4702 value here, and return the result right away. */
4703 if (target == 0 || GET_MODE (target) != compute_mode)
4704 target = gen_reg_rtx (compute_mode);
4705 if (rem_flag)
4707 remainder= (REG_P (target)
4708 ? target : gen_reg_rtx (compute_mode));
4709 quotient = gen_reg_rtx (compute_mode);
4711 else
4713 quotient = (REG_P (target)
4714 ? target : gen_reg_rtx (compute_mode));
4715 remainder = gen_reg_rtx (compute_mode);
4718 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4719 remainder, 0))
4721 /* This could be computed with a branch-less sequence.
4722 Save that for later. */
4723 rtx tem;
4724 rtx_code_label *label = gen_label_rtx ();
4725 do_cmp_and_jump (remainder, const0_rtx, EQ,
4726 compute_mode, label);
4727 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4728 NULL_RTX, 0, OPTAB_WIDEN);
4729 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4730 expand_inc (quotient, const1_rtx);
4731 expand_dec (remainder, op1);
4732 emit_label (label);
4733 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4736 /* No luck with division elimination or divmod. Have to do it
4737 by conditionally adjusting op0 *and* the result. */
4739 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4740 rtx adjusted_op0;
4741 rtx tem;
4743 quotient = gen_reg_rtx (compute_mode);
4744 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4745 label1 = gen_label_rtx ();
4746 label2 = gen_label_rtx ();
4747 label3 = gen_label_rtx ();
4748 label4 = gen_label_rtx ();
4749 label5 = gen_label_rtx ();
4750 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4751 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4752 compute_mode, label1);
4753 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4754 quotient, 0, OPTAB_LIB_WIDEN);
4755 if (tem != quotient)
4756 emit_move_insn (quotient, tem);
4757 emit_jump_insn (gen_jump (label5));
4758 emit_barrier ();
4759 emit_label (label1);
4760 expand_dec (adjusted_op0, const1_rtx);
4761 emit_jump_insn (gen_jump (label4));
4762 emit_barrier ();
4763 emit_label (label2);
4764 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4765 compute_mode, label3);
4766 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4767 quotient, 0, OPTAB_LIB_WIDEN);
4768 if (tem != quotient)
4769 emit_move_insn (quotient, tem);
4770 emit_jump_insn (gen_jump (label5));
4771 emit_barrier ();
4772 emit_label (label3);
4773 expand_inc (adjusted_op0, const1_rtx);
4774 emit_label (label4);
4775 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4776 quotient, 0, OPTAB_LIB_WIDEN);
4777 if (tem != quotient)
4778 emit_move_insn (quotient, tem);
4779 expand_inc (quotient, const1_rtx);
4780 emit_label (label5);
4783 break;
4785 case EXACT_DIV_EXPR:
4786 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4788 HOST_WIDE_INT d = INTVAL (op1);
4789 unsigned HOST_WIDE_INT ml;
4790 int pre_shift;
4791 rtx t1;
4793 pre_shift = floor_log2 (d & -d);
4794 ml = invert_mod2n (d >> pre_shift, size);
4795 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4796 pre_shift, NULL_RTX, unsignedp);
4797 quotient = expand_mult (compute_mode, t1,
4798 gen_int_mode (ml, compute_mode),
4799 NULL_RTX, 1);
4801 insn = get_last_insn ();
4802 set_dst_reg_note (insn, REG_EQUAL,
4803 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4804 compute_mode, op0, op1),
4805 quotient);
4807 break;
4809 case ROUND_DIV_EXPR:
4810 case ROUND_MOD_EXPR:
4811 if (unsignedp)
4813 rtx tem;
4814 rtx_code_label *label;
4815 label = gen_label_rtx ();
4816 quotient = gen_reg_rtx (compute_mode);
4817 remainder = gen_reg_rtx (compute_mode);
4818 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4820 rtx tem;
4821 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4822 quotient, 1, OPTAB_LIB_WIDEN);
4823 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4824 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4825 remainder, 1, OPTAB_LIB_WIDEN);
4827 tem = plus_constant (compute_mode, op1, -1);
4828 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4829 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4830 expand_inc (quotient, const1_rtx);
4831 expand_dec (remainder, op1);
4832 emit_label (label);
4834 else
4836 rtx abs_rem, abs_op1, tem, mask;
4837 rtx_code_label *label;
4838 label = gen_label_rtx ();
4839 quotient = gen_reg_rtx (compute_mode);
4840 remainder = gen_reg_rtx (compute_mode);
4841 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4843 rtx tem;
4844 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4845 quotient, 0, OPTAB_LIB_WIDEN);
4846 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4847 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4848 remainder, 0, OPTAB_LIB_WIDEN);
4850 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4851 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4852 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4853 1, NULL_RTX, 1);
4854 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4855 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4856 NULL_RTX, 0, OPTAB_WIDEN);
4857 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4858 size - 1, NULL_RTX, 0);
4859 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4860 NULL_RTX, 0, OPTAB_WIDEN);
4861 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4862 NULL_RTX, 0, OPTAB_WIDEN);
4863 expand_inc (quotient, tem);
4864 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4865 NULL_RTX, 0, OPTAB_WIDEN);
4866 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4867 NULL_RTX, 0, OPTAB_WIDEN);
4868 expand_dec (remainder, tem);
4869 emit_label (label);
4871 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4873 default:
4874 gcc_unreachable ();
4877 if (quotient == 0)
4879 if (target && GET_MODE (target) != compute_mode)
4880 target = 0;
4882 if (rem_flag)
4884 /* Try to produce the remainder without producing the quotient.
4885 If we seem to have a divmod pattern that does not require widening,
4886 don't try widening here. We should really have a WIDEN argument
4887 to expand_twoval_binop, since what we'd really like to do here is
4888 1) try a mod insn in compute_mode
4889 2) try a divmod insn in compute_mode
4890 3) try a div insn in compute_mode and multiply-subtract to get
4891 remainder
4892 4) try the same things with widening allowed. */
4893 remainder
4894 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4895 op0, op1, target,
4896 unsignedp,
4897 ((optab_handler (optab2, compute_mode)
4898 != CODE_FOR_nothing)
4899 ? OPTAB_DIRECT : OPTAB_WIDEN));
4900 if (remainder == 0)
4902 /* No luck there. Can we do remainder and divide at once
4903 without a library call? */
4904 remainder = gen_reg_rtx (compute_mode);
4905 if (! expand_twoval_binop ((unsignedp
4906 ? udivmod_optab
4907 : sdivmod_optab),
4908 op0, op1,
4909 NULL_RTX, remainder, unsignedp))
4910 remainder = 0;
4913 if (remainder)
4914 return gen_lowpart (mode, remainder);
4917 /* Produce the quotient. Try a quotient insn, but not a library call.
4918 If we have a divmod in this mode, use it in preference to widening
4919 the div (for this test we assume it will not fail). Note that optab2
4920 is set to the one of the two optabs that the call below will use. */
4921 quotient
4922 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4923 op0, op1, rem_flag ? NULL_RTX : target,
4924 unsignedp,
4925 ((optab_handler (optab2, compute_mode)
4926 != CODE_FOR_nothing)
4927 ? OPTAB_DIRECT : OPTAB_WIDEN));
4929 if (quotient == 0)
4931 /* No luck there. Try a quotient-and-remainder insn,
4932 keeping the quotient alone. */
4933 quotient = gen_reg_rtx (compute_mode);
4934 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4935 op0, op1,
4936 quotient, NULL_RTX, unsignedp))
4938 quotient = 0;
4939 if (! rem_flag)
4940 /* Still no luck. If we are not computing the remainder,
4941 use a library call for the quotient. */
4942 quotient = sign_expand_binop (compute_mode,
4943 udiv_optab, sdiv_optab,
4944 op0, op1, target,
4945 unsignedp, OPTAB_LIB_WIDEN);
4950 if (rem_flag)
4952 if (target && GET_MODE (target) != compute_mode)
4953 target = 0;
4955 if (quotient == 0)
4957 /* No divide instruction either. Use library for remainder. */
4958 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4959 op0, op1, target,
4960 unsignedp, OPTAB_LIB_WIDEN);
4961 /* No remainder function. Try a quotient-and-remainder
4962 function, keeping the remainder. */
4963 if (!remainder)
4965 remainder = gen_reg_rtx (compute_mode);
4966 if (!expand_twoval_binop_libfunc
4967 (unsignedp ? udivmod_optab : sdivmod_optab,
4968 op0, op1,
4969 NULL_RTX, remainder,
4970 unsignedp ? UMOD : MOD))
4971 remainder = NULL_RTX;
4974 else
4976 /* We divided. Now finish doing X - Y * (X / Y). */
4977 remainder = expand_mult (compute_mode, quotient, op1,
4978 NULL_RTX, unsignedp);
4979 remainder = expand_binop (compute_mode, sub_optab, op0,
4980 remainder, target, unsignedp,
4981 OPTAB_LIB_WIDEN);
4985 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4988 /* Return a tree node with data type TYPE, describing the value of X.
4989 Usually this is an VAR_DECL, if there is no obvious better choice.
4990 X may be an expression, however we only support those expressions
4991 generated by loop.c. */
4993 tree
4994 make_tree (tree type, rtx x)
4996 tree t;
4998 switch (GET_CODE (x))
5000 case CONST_INT:
5001 case CONST_WIDE_INT:
5002 t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5003 return t;
5005 case CONST_DOUBLE:
5006 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5007 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5008 t = wide_int_to_tree (type,
5009 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5010 HOST_BITS_PER_WIDE_INT * 2));
5011 else
5013 REAL_VALUE_TYPE d;
5015 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5016 t = build_real (type, d);
5019 return t;
5021 case CONST_VECTOR:
5023 int units = CONST_VECTOR_NUNITS (x);
5024 tree itype = TREE_TYPE (type);
5025 tree *elts;
5026 int i;
5028 /* Build a tree with vector elements. */
5029 elts = XALLOCAVEC (tree, units);
5030 for (i = units - 1; i >= 0; --i)
5032 rtx elt = CONST_VECTOR_ELT (x, i);
5033 elts[i] = make_tree (itype, elt);
5036 return build_vector (type, elts);
5039 case PLUS:
5040 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5041 make_tree (type, XEXP (x, 1)));
5043 case MINUS:
5044 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5045 make_tree (type, XEXP (x, 1)));
5047 case NEG:
5048 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5050 case MULT:
5051 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5052 make_tree (type, XEXP (x, 1)));
5054 case ASHIFT:
5055 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5056 make_tree (type, XEXP (x, 1)));
5058 case LSHIFTRT:
5059 t = unsigned_type_for (type);
5060 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5061 make_tree (t, XEXP (x, 0)),
5062 make_tree (type, XEXP (x, 1))));
5064 case ASHIFTRT:
5065 t = signed_type_for (type);
5066 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5067 make_tree (t, XEXP (x, 0)),
5068 make_tree (type, XEXP (x, 1))));
5070 case DIV:
5071 if (TREE_CODE (type) != REAL_TYPE)
5072 t = signed_type_for (type);
5073 else
5074 t = type;
5076 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5077 make_tree (t, XEXP (x, 0)),
5078 make_tree (t, XEXP (x, 1))));
5079 case UDIV:
5080 t = unsigned_type_for (type);
5081 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5082 make_tree (t, XEXP (x, 0)),
5083 make_tree (t, XEXP (x, 1))));
5085 case SIGN_EXTEND:
5086 case ZERO_EXTEND:
5087 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5088 GET_CODE (x) == ZERO_EXTEND);
5089 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5091 case CONST:
5092 return make_tree (type, XEXP (x, 0));
5094 case SYMBOL_REF:
5095 t = SYMBOL_REF_DECL (x);
5096 if (t)
5097 return fold_convert (type, build_fold_addr_expr (t));
5098 /* else fall through. */
5100 default:
5101 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5103 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5104 address mode to pointer mode. */
5105 if (POINTER_TYPE_P (type))
5106 x = convert_memory_address_addr_space
5107 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5109 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5110 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5111 t->decl_with_rtl.rtl = x;
5113 return t;
5117 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5118 and returning TARGET.
5120 If TARGET is 0, a pseudo-register or constant is returned. */
5123 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5125 rtx tem = 0;
5127 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5128 tem = simplify_binary_operation (AND, mode, op0, op1);
5129 if (tem == 0)
5130 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5132 if (target == 0)
5133 target = tem;
5134 else if (tem != target)
5135 emit_move_insn (target, tem);
5136 return target;
5139 /* Helper function for emit_store_flag. */
5141 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5142 machine_mode mode, machine_mode compare_mode,
5143 int unsignedp, rtx x, rtx y, int normalizep,
5144 machine_mode target_mode)
5146 struct expand_operand ops[4];
5147 rtx op0, comparison, subtarget;
5148 rtx_insn *last;
5149 machine_mode result_mode = targetm.cstore_mode (icode);
5151 last = get_last_insn ();
5152 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5153 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5154 if (!x || !y)
5156 delete_insns_since (last);
5157 return NULL_RTX;
5160 if (target_mode == VOIDmode)
5161 target_mode = result_mode;
5162 if (!target)
5163 target = gen_reg_rtx (target_mode);
5165 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5167 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5168 create_fixed_operand (&ops[1], comparison);
5169 create_fixed_operand (&ops[2], x);
5170 create_fixed_operand (&ops[3], y);
5171 if (!maybe_expand_insn (icode, 4, ops))
5173 delete_insns_since (last);
5174 return NULL_RTX;
5176 subtarget = ops[0].value;
5178 /* If we are converting to a wider mode, first convert to
5179 TARGET_MODE, then normalize. This produces better combining
5180 opportunities on machines that have a SIGN_EXTRACT when we are
5181 testing a single bit. This mostly benefits the 68k.
5183 If STORE_FLAG_VALUE does not have the sign bit set when
5184 interpreted in MODE, we can do this conversion as unsigned, which
5185 is usually more efficient. */
5186 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5188 convert_move (target, subtarget,
5189 val_signbit_known_clear_p (result_mode,
5190 STORE_FLAG_VALUE));
5191 op0 = target;
5192 result_mode = target_mode;
5194 else
5195 op0 = subtarget;
5197 /* If we want to keep subexpressions around, don't reuse our last
5198 target. */
5199 if (optimize)
5200 subtarget = 0;
5202 /* Now normalize to the proper value in MODE. Sometimes we don't
5203 have to do anything. */
5204 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5206 /* STORE_FLAG_VALUE might be the most negative number, so write
5207 the comparison this way to avoid a compiler-time warning. */
5208 else if (- normalizep == STORE_FLAG_VALUE)
5209 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5211 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5212 it hard to use a value of just the sign bit due to ANSI integer
5213 constant typing rules. */
5214 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5215 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5216 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5217 normalizep == 1);
5218 else
5220 gcc_assert (STORE_FLAG_VALUE & 1);
5222 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5223 if (normalizep == -1)
5224 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5227 /* If we were converting to a smaller mode, do the conversion now. */
5228 if (target_mode != result_mode)
5230 convert_move (target, op0, 0);
5231 return target;
5233 else
5234 return op0;
5238 /* A subroutine of emit_store_flag only including "tricks" that do not
5239 need a recursive call. These are kept separate to avoid infinite
5240 loops. */
5242 static rtx
5243 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5244 machine_mode mode, int unsignedp, int normalizep,
5245 machine_mode target_mode)
5247 rtx subtarget;
5248 enum insn_code icode;
5249 machine_mode compare_mode;
5250 enum mode_class mclass;
5251 enum rtx_code scode;
5253 if (unsignedp)
5254 code = unsigned_condition (code);
5255 scode = swap_condition (code);
5257 /* If one operand is constant, make it the second one. Only do this
5258 if the other operand is not constant as well. */
5260 if (swap_commutative_operands_p (op0, op1))
5262 std::swap (op0, op1);
5263 code = swap_condition (code);
5266 if (mode == VOIDmode)
5267 mode = GET_MODE (op0);
5269 /* For some comparisons with 1 and -1, we can convert this to
5270 comparisons with zero. This will often produce more opportunities for
5271 store-flag insns. */
5273 switch (code)
5275 case LT:
5276 if (op1 == const1_rtx)
5277 op1 = const0_rtx, code = LE;
5278 break;
5279 case LE:
5280 if (op1 == constm1_rtx)
5281 op1 = const0_rtx, code = LT;
5282 break;
5283 case GE:
5284 if (op1 == const1_rtx)
5285 op1 = const0_rtx, code = GT;
5286 break;
5287 case GT:
5288 if (op1 == constm1_rtx)
5289 op1 = const0_rtx, code = GE;
5290 break;
5291 case GEU:
5292 if (op1 == const1_rtx)
5293 op1 = const0_rtx, code = NE;
5294 break;
5295 case LTU:
5296 if (op1 == const1_rtx)
5297 op1 = const0_rtx, code = EQ;
5298 break;
5299 default:
5300 break;
5303 /* If we are comparing a double-word integer with zero or -1, we can
5304 convert the comparison into one involving a single word. */
5305 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5306 && GET_MODE_CLASS (mode) == MODE_INT
5307 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5309 rtx tem;
5310 if ((code == EQ || code == NE)
5311 && (op1 == const0_rtx || op1 == constm1_rtx))
5313 rtx op00, op01;
5315 /* Do a logical OR or AND of the two words and compare the
5316 result. */
5317 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5318 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5319 tem = expand_binop (word_mode,
5320 op1 == const0_rtx ? ior_optab : and_optab,
5321 op00, op01, NULL_RTX, unsignedp,
5322 OPTAB_DIRECT);
5324 if (tem != 0)
5325 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5326 unsignedp, normalizep);
5328 else if ((code == LT || code == GE) && op1 == const0_rtx)
5330 rtx op0h;
5332 /* If testing the sign bit, can just test on high word. */
5333 op0h = simplify_gen_subreg (word_mode, op0, mode,
5334 subreg_highpart_offset (word_mode,
5335 mode));
5336 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5337 unsignedp, normalizep);
5339 else
5340 tem = NULL_RTX;
5342 if (tem)
5344 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5345 return tem;
5346 if (!target)
5347 target = gen_reg_rtx (target_mode);
5349 convert_move (target, tem,
5350 !val_signbit_known_set_p (word_mode,
5351 (normalizep ? normalizep
5352 : STORE_FLAG_VALUE)));
5353 return target;
5357 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5358 complement of A (for GE) and shifting the sign bit to the low bit. */
5359 if (op1 == const0_rtx && (code == LT || code == GE)
5360 && GET_MODE_CLASS (mode) == MODE_INT
5361 && (normalizep || STORE_FLAG_VALUE == 1
5362 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5364 subtarget = target;
5366 if (!target)
5367 target_mode = mode;
5369 /* If the result is to be wider than OP0, it is best to convert it
5370 first. If it is to be narrower, it is *incorrect* to convert it
5371 first. */
5372 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5374 op0 = convert_modes (target_mode, mode, op0, 0);
5375 mode = target_mode;
5378 if (target_mode != mode)
5379 subtarget = 0;
5381 if (code == GE)
5382 op0 = expand_unop (mode, one_cmpl_optab, op0,
5383 ((STORE_FLAG_VALUE == 1 || normalizep)
5384 ? 0 : subtarget), 0);
5386 if (STORE_FLAG_VALUE == 1 || normalizep)
5387 /* If we are supposed to produce a 0/1 value, we want to do
5388 a logical shift from the sign bit to the low-order bit; for
5389 a -1/0 value, we do an arithmetic shift. */
5390 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5391 GET_MODE_BITSIZE (mode) - 1,
5392 subtarget, normalizep != -1);
5394 if (mode != target_mode)
5395 op0 = convert_modes (target_mode, mode, op0, 0);
5397 return op0;
5400 mclass = GET_MODE_CLASS (mode);
5401 for (compare_mode = mode; compare_mode != VOIDmode;
5402 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5404 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5405 icode = optab_handler (cstore_optab, optab_mode);
5406 if (icode != CODE_FOR_nothing)
5408 do_pending_stack_adjust ();
5409 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5410 unsignedp, op0, op1, normalizep, target_mode);
5411 if (tem)
5412 return tem;
5414 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5416 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5417 unsignedp, op1, op0, normalizep, target_mode);
5418 if (tem)
5419 return tem;
5421 break;
5425 return 0;
5428 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5429 and storing in TARGET. Normally return TARGET.
5430 Return 0 if that cannot be done.
5432 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5433 it is VOIDmode, they cannot both be CONST_INT.
5435 UNSIGNEDP is for the case where we have to widen the operands
5436 to perform the operation. It says to use zero-extension.
5438 NORMALIZEP is 1 if we should convert the result to be either zero
5439 or one. Normalize is -1 if we should convert the result to be
5440 either zero or -1. If NORMALIZEP is zero, the result will be left
5441 "raw" out of the scc insn. */
5444 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5445 machine_mode mode, int unsignedp, int normalizep)
5447 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5448 enum rtx_code rcode;
5449 rtx subtarget;
5450 rtx tem, trueval;
5451 rtx_insn *last;
5453 /* If we compare constants, we shouldn't use a store-flag operation,
5454 but a constant load. We can get there via the vanilla route that
5455 usually generates a compare-branch sequence, but will in this case
5456 fold the comparison to a constant, and thus elide the branch. */
5457 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5458 return NULL_RTX;
5460 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5461 target_mode);
5462 if (tem)
5463 return tem;
5465 /* If we reached here, we can't do this with a scc insn, however there
5466 are some comparisons that can be done in other ways. Don't do any
5467 of these cases if branches are very cheap. */
5468 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5469 return 0;
5471 /* See what we need to return. We can only return a 1, -1, or the
5472 sign bit. */
5474 if (normalizep == 0)
5476 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5477 normalizep = STORE_FLAG_VALUE;
5479 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5481 else
5482 return 0;
5485 last = get_last_insn ();
5487 /* If optimizing, use different pseudo registers for each insn, instead
5488 of reusing the same pseudo. This leads to better CSE, but slows
5489 down the compiler, since there are more pseudos */
5490 subtarget = (!optimize
5491 && (target_mode == mode)) ? target : NULL_RTX;
5492 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5494 /* For floating-point comparisons, try the reverse comparison or try
5495 changing the "orderedness" of the comparison. */
5496 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5498 enum rtx_code first_code;
5499 bool and_them;
5501 rcode = reverse_condition_maybe_unordered (code);
5502 if (can_compare_p (rcode, mode, ccp_store_flag)
5503 && (code == ORDERED || code == UNORDERED
5504 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5505 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5507 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5508 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5510 /* For the reverse comparison, use either an addition or a XOR. */
5511 if (want_add
5512 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5513 optimize_insn_for_speed_p ()) == 0)
5515 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5516 STORE_FLAG_VALUE, target_mode);
5517 if (tem)
5518 return expand_binop (target_mode, add_optab, tem,
5519 gen_int_mode (normalizep, target_mode),
5520 target, 0, OPTAB_WIDEN);
5522 else if (!want_add
5523 && rtx_cost (trueval, XOR, 1,
5524 optimize_insn_for_speed_p ()) == 0)
5526 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5527 normalizep, target_mode);
5528 if (tem)
5529 return expand_binop (target_mode, xor_optab, tem, trueval,
5530 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5534 delete_insns_since (last);
5536 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5537 if (code == ORDERED || code == UNORDERED)
5538 return 0;
5540 and_them = split_comparison (code, mode, &first_code, &code);
5542 /* If there are no NaNs, the first comparison should always fall through.
5543 Effectively change the comparison to the other one. */
5544 if (!HONOR_NANS (mode))
5546 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5547 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5548 target_mode);
5551 if (!HAVE_conditional_move)
5552 return 0;
5554 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5555 conditional move. */
5556 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5557 normalizep, target_mode);
5558 if (tem == 0)
5559 return 0;
5561 if (and_them)
5562 tem = emit_conditional_move (target, code, op0, op1, mode,
5563 tem, const0_rtx, GET_MODE (tem), 0);
5564 else
5565 tem = emit_conditional_move (target, code, op0, op1, mode,
5566 trueval, tem, GET_MODE (tem), 0);
5568 if (tem == 0)
5569 delete_insns_since (last);
5570 return tem;
5573 /* The remaining tricks only apply to integer comparisons. */
5575 if (GET_MODE_CLASS (mode) != MODE_INT)
5576 return 0;
5578 /* If this is an equality comparison of integers, we can try to exclusive-or
5579 (or subtract) the two operands and use a recursive call to try the
5580 comparison with zero. Don't do any of these cases if branches are
5581 very cheap. */
5583 if ((code == EQ || code == NE) && op1 != const0_rtx)
5585 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5586 OPTAB_WIDEN);
5588 if (tem == 0)
5589 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5590 OPTAB_WIDEN);
5591 if (tem != 0)
5592 tem = emit_store_flag (target, code, tem, const0_rtx,
5593 mode, unsignedp, normalizep);
5594 if (tem != 0)
5595 return tem;
5597 delete_insns_since (last);
5600 /* For integer comparisons, try the reverse comparison. However, for
5601 small X and if we'd have anyway to extend, implementing "X != 0"
5602 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5603 rcode = reverse_condition (code);
5604 if (can_compare_p (rcode, mode, ccp_store_flag)
5605 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5606 && code == NE
5607 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5608 && op1 == const0_rtx))
5610 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5611 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5613 /* Again, for the reverse comparison, use either an addition or a XOR. */
5614 if (want_add
5615 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5616 optimize_insn_for_speed_p ()) == 0)
5618 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5619 STORE_FLAG_VALUE, target_mode);
5620 if (tem != 0)
5621 tem = expand_binop (target_mode, add_optab, tem,
5622 gen_int_mode (normalizep, target_mode),
5623 target, 0, OPTAB_WIDEN);
5625 else if (!want_add
5626 && rtx_cost (trueval, XOR, 1,
5627 optimize_insn_for_speed_p ()) == 0)
5629 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5630 normalizep, target_mode);
5631 if (tem != 0)
5632 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5633 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5636 if (tem != 0)
5637 return tem;
5638 delete_insns_since (last);
5641 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5642 the constant zero. Reject all other comparisons at this point. Only
5643 do LE and GT if branches are expensive since they are expensive on
5644 2-operand machines. */
5646 if (op1 != const0_rtx
5647 || (code != EQ && code != NE
5648 && (BRANCH_COST (optimize_insn_for_speed_p (),
5649 false) <= 1 || (code != LE && code != GT))))
5650 return 0;
5652 /* Try to put the result of the comparison in the sign bit. Assume we can't
5653 do the necessary operation below. */
5655 tem = 0;
5657 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5658 the sign bit set. */
5660 if (code == LE)
5662 /* This is destructive, so SUBTARGET can't be OP0. */
5663 if (rtx_equal_p (subtarget, op0))
5664 subtarget = 0;
5666 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5667 OPTAB_WIDEN);
5668 if (tem)
5669 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5670 OPTAB_WIDEN);
5673 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5674 number of bits in the mode of OP0, minus one. */
5676 if (code == GT)
5678 if (rtx_equal_p (subtarget, op0))
5679 subtarget = 0;
5681 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5682 GET_MODE_BITSIZE (mode) - 1,
5683 subtarget, 0);
5684 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5685 OPTAB_WIDEN);
5688 if (code == EQ || code == NE)
5690 /* For EQ or NE, one way to do the comparison is to apply an operation
5691 that converts the operand into a positive number if it is nonzero
5692 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5693 for NE we negate. This puts the result in the sign bit. Then we
5694 normalize with a shift, if needed.
5696 Two operations that can do the above actions are ABS and FFS, so try
5697 them. If that doesn't work, and MODE is smaller than a full word,
5698 we can use zero-extension to the wider mode (an unsigned conversion)
5699 as the operation. */
5701 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5702 that is compensated by the subsequent overflow when subtracting
5703 one / negating. */
5705 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5706 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5707 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5708 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5709 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5711 tem = convert_modes (word_mode, mode, op0, 1);
5712 mode = word_mode;
5715 if (tem != 0)
5717 if (code == EQ)
5718 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5719 0, OPTAB_WIDEN);
5720 else
5721 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5724 /* If we couldn't do it that way, for NE we can "or" the two's complement
5725 of the value with itself. For EQ, we take the one's complement of
5726 that "or", which is an extra insn, so we only handle EQ if branches
5727 are expensive. */
5729 if (tem == 0
5730 && (code == NE
5731 || BRANCH_COST (optimize_insn_for_speed_p (),
5732 false) > 1))
5734 if (rtx_equal_p (subtarget, op0))
5735 subtarget = 0;
5737 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5738 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5739 OPTAB_WIDEN);
5741 if (tem && code == EQ)
5742 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5746 if (tem && normalizep)
5747 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5748 GET_MODE_BITSIZE (mode) - 1,
5749 subtarget, normalizep == 1);
5751 if (tem)
5753 if (!target)
5755 else if (GET_MODE (tem) != target_mode)
5757 convert_move (target, tem, 0);
5758 tem = target;
5760 else if (!subtarget)
5762 emit_move_insn (target, tem);
5763 tem = target;
5766 else
5767 delete_insns_since (last);
5769 return tem;
5772 /* Like emit_store_flag, but always succeeds. */
5775 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5776 machine_mode mode, int unsignedp, int normalizep)
5778 rtx tem;
5779 rtx_code_label *label;
5780 rtx trueval, falseval;
5782 /* First see if emit_store_flag can do the job. */
5783 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5784 if (tem != 0)
5785 return tem;
5787 if (!target)
5788 target = gen_reg_rtx (word_mode);
5790 /* If this failed, we have to do this with set/compare/jump/set code.
5791 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5792 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5793 if (code == NE
5794 && GET_MODE_CLASS (mode) == MODE_INT
5795 && REG_P (target)
5796 && op0 == target
5797 && op1 == const0_rtx)
5799 label = gen_label_rtx ();
5800 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5801 NULL_RTX, NULL, label, -1);
5802 emit_move_insn (target, trueval);
5803 emit_label (label);
5804 return target;
5807 if (!REG_P (target)
5808 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5809 target = gen_reg_rtx (GET_MODE (target));
5811 /* Jump in the right direction if the target cannot implement CODE
5812 but can jump on its reverse condition. */
5813 falseval = const0_rtx;
5814 if (! can_compare_p (code, mode, ccp_jump)
5815 && (! FLOAT_MODE_P (mode)
5816 || code == ORDERED || code == UNORDERED
5817 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5818 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5820 enum rtx_code rcode;
5821 if (FLOAT_MODE_P (mode))
5822 rcode = reverse_condition_maybe_unordered (code);
5823 else
5824 rcode = reverse_condition (code);
5826 /* Canonicalize to UNORDERED for the libcall. */
5827 if (can_compare_p (rcode, mode, ccp_jump)
5828 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5830 falseval = trueval;
5831 trueval = const0_rtx;
5832 code = rcode;
5836 emit_move_insn (target, trueval);
5837 label = gen_label_rtx ();
5838 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5839 label, -1);
5841 emit_move_insn (target, falseval);
5842 emit_label (label);
5844 return target;
5847 /* Perform possibly multi-word comparison and conditional jump to LABEL
5848 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5849 now a thin wrapper around do_compare_rtx_and_jump. */
5851 static void
5852 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5853 rtx_code_label *label)
5855 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5856 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5857 NULL, label, -1);