* omp-low.c (lower_omp_target): Remove unreachable code & merge
[official-gcc.git] / gcc / expmed.c
blob68db1f700b6a06cce0500c13653e0304206f7d17
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2015 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "diagnostic-core.h"
37 #include "alias.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "flags.h"
41 #include "dojump.h"
42 #include "explow.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "stmt.h"
46 #include "expr.h"
47 #include "langhooks.h"
49 struct target_expmed default_target_expmed;
50 #if SWITCHABLE_TARGET
51 struct target_expmed *this_target_expmed = &default_target_expmed;
52 #endif
54 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT,
61 rtx);
62 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
63 unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT,
65 unsigned HOST_WIDE_INT,
66 rtx);
67 static rtx extract_fixed_bit_field (machine_mode, rtx,
68 unsigned HOST_WIDE_INT,
69 unsigned HOST_WIDE_INT, rtx, int);
70 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
71 unsigned HOST_WIDE_INT,
72 unsigned HOST_WIDE_INT, rtx, int);
73 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
74 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
75 unsigned HOST_WIDE_INT, int);
76 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
77 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
78 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
80 /* Return a constant integer mask value of mode MODE with BITSIZE ones
81 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
82 The mask is truncated if necessary to the width of mode MODE. The
83 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
85 static inline rtx
86 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
88 return immed_wide_int_const
89 (wi::shifted_mask (bitpos, bitsize, complement,
90 GET_MODE_PRECISION (mode)), mode);
93 /* Test whether a value is zero of a power of two. */
94 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
95 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
97 struct init_expmed_rtl
99 rtx reg;
100 rtx plus;
101 rtx neg;
102 rtx mult;
103 rtx sdiv;
104 rtx udiv;
105 rtx sdiv_32;
106 rtx smod_32;
107 rtx wide_mult;
108 rtx wide_lshr;
109 rtx wide_trunc;
110 rtx shift;
111 rtx shift_mult;
112 rtx shift_add;
113 rtx shift_sub0;
114 rtx shift_sub1;
115 rtx zext;
116 rtx trunc;
118 rtx pow2[MAX_BITS_PER_WORD];
119 rtx cint[MAX_BITS_PER_WORD];
122 static void
123 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
124 machine_mode from_mode, bool speed)
126 int to_size, from_size;
127 rtx which;
129 to_size = GET_MODE_PRECISION (to_mode);
130 from_size = GET_MODE_PRECISION (from_mode);
132 /* Most partial integers have a precision less than the "full"
133 integer it requires for storage. In case one doesn't, for
134 comparison purposes here, reduce the bit size by one in that
135 case. */
136 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
137 && exact_log2 (to_size) != -1)
138 to_size --;
139 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
140 && exact_log2 (from_size) != -1)
141 from_size --;
143 /* Assume cost of zero-extend and sign-extend is the same. */
144 which = (to_size < from_size ? all->trunc : all->zext);
146 PUT_MODE (all->reg, from_mode);
147 set_convert_cost (to_mode, from_mode, speed,
148 set_src_cost (which, to_mode, speed));
151 static void
152 init_expmed_one_mode (struct init_expmed_rtl *all,
153 machine_mode mode, int speed)
155 int m, n, mode_bitsize;
156 machine_mode mode_from;
158 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
160 PUT_MODE (all->reg, mode);
161 PUT_MODE (all->plus, mode);
162 PUT_MODE (all->neg, mode);
163 PUT_MODE (all->mult, mode);
164 PUT_MODE (all->sdiv, mode);
165 PUT_MODE (all->udiv, mode);
166 PUT_MODE (all->sdiv_32, mode);
167 PUT_MODE (all->smod_32, mode);
168 PUT_MODE (all->wide_trunc, mode);
169 PUT_MODE (all->shift, mode);
170 PUT_MODE (all->shift_mult, mode);
171 PUT_MODE (all->shift_add, mode);
172 PUT_MODE (all->shift_sub0, mode);
173 PUT_MODE (all->shift_sub1, mode);
174 PUT_MODE (all->zext, mode);
175 PUT_MODE (all->trunc, mode);
177 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
178 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
179 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
180 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
181 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
183 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
184 <= 2 * add_cost (speed, mode)));
185 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
186 <= 4 * add_cost (speed, mode)));
188 set_shift_cost (speed, mode, 0, 0);
190 int cost = add_cost (speed, mode);
191 set_shiftadd_cost (speed, mode, 0, cost);
192 set_shiftsub0_cost (speed, mode, 0, cost);
193 set_shiftsub1_cost (speed, mode, 0, cost);
196 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
197 for (m = 1; m < n; m++)
199 XEXP (all->shift, 1) = all->cint[m];
200 XEXP (all->shift_mult, 1) = all->pow2[m];
202 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
203 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
204 speed));
205 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
206 speed));
207 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
208 speed));
211 if (SCALAR_INT_MODE_P (mode))
213 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
214 mode_from = (machine_mode)(mode_from + 1))
215 init_expmed_one_conv (all, mode, mode_from, speed);
217 if (GET_MODE_CLASS (mode) == MODE_INT)
219 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
220 if (wider_mode != VOIDmode)
222 PUT_MODE (all->zext, wider_mode);
223 PUT_MODE (all->wide_mult, wider_mode);
224 PUT_MODE (all->wide_lshr, wider_mode);
225 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
227 set_mul_widen_cost (speed, wider_mode,
228 set_src_cost (all->wide_mult, wider_mode, speed));
229 set_mul_highpart_cost (speed, mode,
230 set_src_cost (all->wide_trunc, mode, speed));
235 void
236 init_expmed (void)
238 struct init_expmed_rtl all;
239 machine_mode mode = QImode;
240 int m, speed;
242 memset (&all, 0, sizeof all);
243 for (m = 1; m < MAX_BITS_PER_WORD; m++)
245 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
246 all.cint[m] = GEN_INT (m);
249 /* Avoid using hard regs in ways which may be unsupported. */
250 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
251 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
252 all.neg = gen_rtx_NEG (mode, all.reg);
253 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
254 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
255 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
256 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
257 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
258 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
259 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
260 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
261 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
262 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
263 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
264 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
265 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
266 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
267 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
269 for (speed = 0; speed < 2; speed++)
271 crtl->maybe_hot_insn_p = speed;
272 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
274 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
275 mode = (machine_mode)(mode + 1))
276 init_expmed_one_mode (&all, mode, speed);
278 if (MIN_MODE_PARTIAL_INT != VOIDmode)
279 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
280 mode = (machine_mode)(mode + 1))
281 init_expmed_one_mode (&all, mode, speed);
283 if (MIN_MODE_VECTOR_INT != VOIDmode)
284 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
285 mode = (machine_mode)(mode + 1))
286 init_expmed_one_mode (&all, mode, speed);
289 if (alg_hash_used_p ())
291 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
292 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
294 else
295 set_alg_hash_used_p (true);
296 default_rtl_profile ();
298 ggc_free (all.trunc);
299 ggc_free (all.shift_sub1);
300 ggc_free (all.shift_sub0);
301 ggc_free (all.shift_add);
302 ggc_free (all.shift_mult);
303 ggc_free (all.shift);
304 ggc_free (all.wide_trunc);
305 ggc_free (all.wide_lshr);
306 ggc_free (all.wide_mult);
307 ggc_free (all.zext);
308 ggc_free (all.smod_32);
309 ggc_free (all.sdiv_32);
310 ggc_free (all.udiv);
311 ggc_free (all.sdiv);
312 ggc_free (all.mult);
313 ggc_free (all.neg);
314 ggc_free (all.plus);
315 ggc_free (all.reg);
318 /* Return an rtx representing minus the value of X.
319 MODE is the intended mode of the result,
320 useful if X is a CONST_INT. */
323 negate_rtx (machine_mode mode, rtx x)
325 rtx result = simplify_unary_operation (NEG, mode, x, mode);
327 if (result == 0)
328 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
330 return result;
333 /* Adjust bitfield memory MEM so that it points to the first unit of mode
334 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
335 If MODE is BLKmode, return a reference to every byte in the bitfield.
336 Set *NEW_BITNUM to the bit position of the field within the new memory. */
338 static rtx
339 narrow_bit_field_mem (rtx mem, machine_mode mode,
340 unsigned HOST_WIDE_INT bitsize,
341 unsigned HOST_WIDE_INT bitnum,
342 unsigned HOST_WIDE_INT *new_bitnum)
344 if (mode == BLKmode)
346 *new_bitnum = bitnum % BITS_PER_UNIT;
347 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
348 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
349 / BITS_PER_UNIT);
350 return adjust_bitfield_address_size (mem, mode, offset, size);
352 else
354 unsigned int unit = GET_MODE_BITSIZE (mode);
355 *new_bitnum = bitnum % unit;
356 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
357 return adjust_bitfield_address (mem, mode, offset);
361 /* The caller wants to perform insertion or extraction PATTERN on a
362 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
363 BITREGION_START and BITREGION_END are as for store_bit_field
364 and FIELDMODE is the natural mode of the field.
366 Search for a mode that is compatible with the memory access
367 restrictions and (where applicable) with a register insertion or
368 extraction. Return the new memory on success, storing the adjusted
369 bit position in *NEW_BITNUM. Return null otherwise. */
371 static rtx
372 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
373 rtx op0, HOST_WIDE_INT bitsize,
374 HOST_WIDE_INT bitnum,
375 unsigned HOST_WIDE_INT bitregion_start,
376 unsigned HOST_WIDE_INT bitregion_end,
377 machine_mode fieldmode,
378 unsigned HOST_WIDE_INT *new_bitnum)
380 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
381 bitregion_end, MEM_ALIGN (op0),
382 MEM_VOLATILE_P (op0));
383 machine_mode best_mode;
384 if (iter.next_mode (&best_mode))
386 /* We can use a memory in BEST_MODE. See whether this is true for
387 any wider modes. All other things being equal, we prefer to
388 use the widest mode possible because it tends to expose more
389 CSE opportunities. */
390 if (!iter.prefer_smaller_modes ())
392 /* Limit the search to the mode required by the corresponding
393 register insertion or extraction instruction, if any. */
394 machine_mode limit_mode = word_mode;
395 extraction_insn insn;
396 if (get_best_reg_extraction_insn (&insn, pattern,
397 GET_MODE_BITSIZE (best_mode),
398 fieldmode))
399 limit_mode = insn.field_mode;
401 machine_mode wider_mode;
402 while (iter.next_mode (&wider_mode)
403 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
404 best_mode = wider_mode;
406 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
407 new_bitnum);
409 return NULL_RTX;
412 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
413 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
414 offset is then BITNUM / BITS_PER_UNIT. */
416 static bool
417 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
418 unsigned HOST_WIDE_INT bitsize,
419 machine_mode struct_mode)
421 if (BYTES_BIG_ENDIAN)
422 return (bitnum % BITS_PER_UNIT == 0
423 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
424 || (bitnum + bitsize) % BITS_PER_WORD == 0));
425 else
426 return bitnum % BITS_PER_WORD == 0;
429 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
430 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
431 Return false if the access would touch memory outside the range
432 BITREGION_START to BITREGION_END for conformance to the C++ memory
433 model. */
435 static bool
436 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
437 unsigned HOST_WIDE_INT bitnum,
438 machine_mode fieldmode,
439 unsigned HOST_WIDE_INT bitregion_start,
440 unsigned HOST_WIDE_INT bitregion_end)
442 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
444 /* -fstrict-volatile-bitfields must be enabled and we must have a
445 volatile MEM. */
446 if (!MEM_P (op0)
447 || !MEM_VOLATILE_P (op0)
448 || flag_strict_volatile_bitfields <= 0)
449 return false;
451 /* Non-integral modes likely only happen with packed structures.
452 Punt. */
453 if (!SCALAR_INT_MODE_P (fieldmode))
454 return false;
456 /* The bit size must not be larger than the field mode, and
457 the field mode must not be larger than a word. */
458 if (bitsize > modesize || modesize > BITS_PER_WORD)
459 return false;
461 /* Check for cases of unaligned fields that must be split. */
462 if (bitnum % modesize + bitsize > modesize)
463 return false;
465 /* The memory must be sufficiently aligned for a MODESIZE access.
466 This condition guarantees, that the memory access will not
467 touch anything after the end of the structure. */
468 if (MEM_ALIGN (op0) < modesize)
469 return false;
471 /* Check for cases where the C++ memory model applies. */
472 if (bitregion_end != 0
473 && (bitnum - bitnum % modesize < bitregion_start
474 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
475 return false;
477 return true;
480 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
481 bit number BITNUM can be treated as a simple value of mode MODE. */
483 static bool
484 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
485 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
487 return (MEM_P (op0)
488 && bitnum % BITS_PER_UNIT == 0
489 && bitsize == GET_MODE_BITSIZE (mode)
490 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
491 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
492 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
495 /* Try to use instruction INSV to store VALUE into a field of OP0.
496 BITSIZE and BITNUM are as for store_bit_field. */
498 static bool
499 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
500 unsigned HOST_WIDE_INT bitsize,
501 unsigned HOST_WIDE_INT bitnum,
502 rtx value)
504 struct expand_operand ops[4];
505 rtx value1;
506 rtx xop0 = op0;
507 rtx_insn *last = get_last_insn ();
508 bool copy_back = false;
510 machine_mode op_mode = insv->field_mode;
511 unsigned int unit = GET_MODE_BITSIZE (op_mode);
512 if (bitsize == 0 || bitsize > unit)
513 return false;
515 if (MEM_P (xop0))
516 /* Get a reference to the first byte of the field. */
517 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
518 &bitnum);
519 else
521 /* Convert from counting within OP0 to counting in OP_MODE. */
522 if (BYTES_BIG_ENDIAN)
523 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
525 /* If xop0 is a register, we need it in OP_MODE
526 to make it acceptable to the format of insv. */
527 if (GET_CODE (xop0) == SUBREG)
528 /* We can't just change the mode, because this might clobber op0,
529 and we will need the original value of op0 if insv fails. */
530 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
531 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
532 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
535 /* If the destination is a paradoxical subreg such that we need a
536 truncate to the inner mode, perform the insertion on a temporary and
537 truncate the result to the original destination. Note that we can't
538 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
539 X) 0)) is (reg:N X). */
540 if (GET_CODE (xop0) == SUBREG
541 && REG_P (SUBREG_REG (xop0))
542 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
543 op_mode))
545 rtx tem = gen_reg_rtx (op_mode);
546 emit_move_insn (tem, xop0);
547 xop0 = tem;
548 copy_back = true;
551 /* There are similar overflow check at the start of store_bit_field_1,
552 but that only check the situation where the field lies completely
553 outside the register, while there do have situation where the field
554 lies partialy in the register, we need to adjust bitsize for this
555 partial overflow situation. Without this fix, pr48335-2.c on big-endian
556 will broken on those arch support bit insert instruction, like arm, aarch64
557 etc. */
558 if (bitsize + bitnum > unit && bitnum < unit)
560 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
561 "destination object, data truncated into %wu-bit",
562 bitsize, unit - bitnum);
563 bitsize = unit - bitnum;
566 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
567 "backwards" from the size of the unit we are inserting into.
568 Otherwise, we count bits from the most significant on a
569 BYTES/BITS_BIG_ENDIAN machine. */
571 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
572 bitnum = unit - bitsize - bitnum;
574 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
575 value1 = value;
576 if (GET_MODE (value) != op_mode)
578 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
580 /* Optimization: Don't bother really extending VALUE
581 if it has all the bits we will actually use. However,
582 if we must narrow it, be sure we do it correctly. */
584 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
586 rtx tmp;
588 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
589 if (! tmp)
590 tmp = simplify_gen_subreg (op_mode,
591 force_reg (GET_MODE (value),
592 value1),
593 GET_MODE (value), 0);
594 value1 = tmp;
596 else
597 value1 = gen_lowpart (op_mode, value1);
599 else if (CONST_INT_P (value))
600 value1 = gen_int_mode (INTVAL (value), op_mode);
601 else
602 /* Parse phase is supposed to make VALUE's data type
603 match that of the component reference, which is a type
604 at least as wide as the field; so VALUE should have
605 a mode that corresponds to that type. */
606 gcc_assert (CONSTANT_P (value));
609 create_fixed_operand (&ops[0], xop0);
610 create_integer_operand (&ops[1], bitsize);
611 create_integer_operand (&ops[2], bitnum);
612 create_input_operand (&ops[3], value1, op_mode);
613 if (maybe_expand_insn (insv->icode, 4, ops))
615 if (copy_back)
616 convert_move (op0, xop0, true);
617 return true;
619 delete_insns_since (last);
620 return false;
623 /* A subroutine of store_bit_field, with the same arguments. Return true
624 if the operation could be implemented.
626 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
627 no other way of implementing the operation. If FALLBACK_P is false,
628 return false instead. */
630 static bool
631 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
632 unsigned HOST_WIDE_INT bitnum,
633 unsigned HOST_WIDE_INT bitregion_start,
634 unsigned HOST_WIDE_INT bitregion_end,
635 machine_mode fieldmode,
636 rtx value, bool fallback_p)
638 rtx op0 = str_rtx;
639 rtx orig_value;
641 while (GET_CODE (op0) == SUBREG)
643 /* The following line once was done only if WORDS_BIG_ENDIAN,
644 but I think that is a mistake. WORDS_BIG_ENDIAN is
645 meaningful at a much higher level; when structures are copied
646 between memory and regs, the higher-numbered regs
647 always get higher addresses. */
648 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
649 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
650 int byte_offset = 0;
652 /* Paradoxical subregs need special handling on big endian machines. */
653 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
655 int difference = inner_mode_size - outer_mode_size;
657 if (WORDS_BIG_ENDIAN)
658 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
659 if (BYTES_BIG_ENDIAN)
660 byte_offset += difference % UNITS_PER_WORD;
662 else
663 byte_offset = SUBREG_BYTE (op0);
665 bitnum += byte_offset * BITS_PER_UNIT;
666 op0 = SUBREG_REG (op0);
669 /* No action is needed if the target is a register and if the field
670 lies completely outside that register. This can occur if the source
671 code contains an out-of-bounds access to a small array. */
672 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
673 return true;
675 /* Use vec_set patterns for inserting parts of vectors whenever
676 available. */
677 if (VECTOR_MODE_P (GET_MODE (op0))
678 && !MEM_P (op0)
679 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
680 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
681 && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
682 && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
684 struct expand_operand ops[3];
685 machine_mode outermode = GET_MODE (op0);
686 machine_mode innermode = GET_MODE_INNER (outermode);
687 enum insn_code icode = optab_handler (vec_set_optab, outermode);
688 int pos = bitnum / GET_MODE_BITSIZE (innermode);
690 create_fixed_operand (&ops[0], op0);
691 create_input_operand (&ops[1], value, innermode);
692 create_integer_operand (&ops[2], pos);
693 if (maybe_expand_insn (icode, 3, ops))
694 return true;
697 /* If the target is a register, overwriting the entire object, or storing
698 a full-word or multi-word field can be done with just a SUBREG. */
699 if (!MEM_P (op0)
700 && bitsize == GET_MODE_BITSIZE (fieldmode)
701 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
702 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
704 /* Use the subreg machinery either to narrow OP0 to the required
705 words or to cope with mode punning between equal-sized modes.
706 In the latter case, use subreg on the rhs side, not lhs. */
707 rtx sub;
709 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
711 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
712 if (sub)
714 emit_move_insn (op0, sub);
715 return true;
718 else
720 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
721 bitnum / BITS_PER_UNIT);
722 if (sub)
724 emit_move_insn (sub, value);
725 return true;
730 /* If the target is memory, storing any naturally aligned field can be
731 done with a simple store. For targets that support fast unaligned
732 memory, any naturally sized, unit aligned field can be done directly. */
733 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
735 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
736 emit_move_insn (op0, value);
737 return true;
740 /* Make sure we are playing with integral modes. Pun with subregs
741 if we aren't. This must come after the entire register case above,
742 since that case is valid for any mode. The following cases are only
743 valid for integral modes. */
745 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
746 if (imode != GET_MODE (op0))
748 if (MEM_P (op0))
749 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
750 else
752 gcc_assert (imode != BLKmode);
753 op0 = gen_lowpart (imode, op0);
758 /* Storing an lsb-aligned field in a register
759 can be done with a movstrict instruction. */
761 if (!MEM_P (op0)
762 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
763 && bitsize == GET_MODE_BITSIZE (fieldmode)
764 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
766 struct expand_operand ops[2];
767 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
768 rtx arg0 = op0;
769 unsigned HOST_WIDE_INT subreg_off;
771 if (GET_CODE (arg0) == SUBREG)
773 /* Else we've got some float mode source being extracted into
774 a different float mode destination -- this combination of
775 subregs results in Severe Tire Damage. */
776 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
777 || GET_MODE_CLASS (fieldmode) == MODE_INT
778 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
779 arg0 = SUBREG_REG (arg0);
782 subreg_off = bitnum / BITS_PER_UNIT;
783 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
785 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
787 create_fixed_operand (&ops[0], arg0);
788 /* Shrink the source operand to FIELDMODE. */
789 create_convert_operand_to (&ops[1], value, fieldmode, false);
790 if (maybe_expand_insn (icode, 2, ops))
791 return true;
795 /* Handle fields bigger than a word. */
797 if (bitsize > BITS_PER_WORD)
799 /* Here we transfer the words of the field
800 in the order least significant first.
801 This is because the most significant word is the one which may
802 be less than full.
803 However, only do that if the value is not BLKmode. */
805 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
806 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
807 unsigned int i;
808 rtx_insn *last;
810 /* This is the mode we must force value to, so that there will be enough
811 subwords to extract. Note that fieldmode will often (always?) be
812 VOIDmode, because that is what store_field uses to indicate that this
813 is a bit field, but passing VOIDmode to operand_subword_force
814 is not allowed. */
815 fieldmode = GET_MODE (value);
816 if (fieldmode == VOIDmode)
817 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
819 last = get_last_insn ();
820 for (i = 0; i < nwords; i++)
822 /* If I is 0, use the low-order word in both field and target;
823 if I is 1, use the next to lowest word; and so on. */
824 unsigned int wordnum = (backwards
825 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
826 - i - 1
827 : i);
828 unsigned int bit_offset = (backwards
829 ? MAX ((int) bitsize - ((int) i + 1)
830 * BITS_PER_WORD,
832 : (int) i * BITS_PER_WORD);
833 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
834 unsigned HOST_WIDE_INT new_bitsize =
835 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
837 /* If the remaining chunk doesn't have full wordsize we have
838 to make sure that for big endian machines the higher order
839 bits are used. */
840 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
841 value_word = simplify_expand_binop (word_mode, lshr_optab,
842 value_word,
843 GEN_INT (BITS_PER_WORD
844 - new_bitsize),
845 NULL_RTX, true,
846 OPTAB_LIB_WIDEN);
848 if (!store_bit_field_1 (op0, new_bitsize,
849 bitnum + bit_offset,
850 bitregion_start, bitregion_end,
851 word_mode,
852 value_word, fallback_p))
854 delete_insns_since (last);
855 return false;
858 return true;
861 /* If VALUE has a floating-point or complex mode, access it as an
862 integer of the corresponding size. This can occur on a machine
863 with 64 bit registers that uses SFmode for float. It can also
864 occur for unaligned float or complex fields. */
865 orig_value = value;
866 if (GET_MODE (value) != VOIDmode
867 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
868 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
870 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
871 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
874 /* If OP0 is a multi-word register, narrow it to the affected word.
875 If the region spans two words, defer to store_split_bit_field. */
876 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
878 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
879 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
880 gcc_assert (op0);
881 bitnum %= BITS_PER_WORD;
882 if (bitnum + bitsize > BITS_PER_WORD)
884 if (!fallback_p)
885 return false;
887 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
888 bitregion_end, value);
889 return true;
893 /* From here on we can assume that the field to be stored in fits
894 within a word. If the destination is a register, it too fits
895 in a word. */
897 extraction_insn insv;
898 if (!MEM_P (op0)
899 && get_best_reg_extraction_insn (&insv, EP_insv,
900 GET_MODE_BITSIZE (GET_MODE (op0)),
901 fieldmode)
902 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
903 return true;
905 /* If OP0 is a memory, try copying it to a register and seeing if a
906 cheap register alternative is available. */
907 if (MEM_P (op0))
909 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
910 fieldmode)
911 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
912 return true;
914 rtx_insn *last = get_last_insn ();
916 /* Try loading part of OP0 into a register, inserting the bitfield
917 into that, and then copying the result back to OP0. */
918 unsigned HOST_WIDE_INT bitpos;
919 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
920 bitregion_start, bitregion_end,
921 fieldmode, &bitpos);
922 if (xop0)
924 rtx tempreg = copy_to_reg (xop0);
925 if (store_bit_field_1 (tempreg, bitsize, bitpos,
926 bitregion_start, bitregion_end,
927 fieldmode, orig_value, false))
929 emit_move_insn (xop0, tempreg);
930 return true;
932 delete_insns_since (last);
936 if (!fallback_p)
937 return false;
939 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
940 bitregion_end, value);
941 return true;
944 /* Generate code to store value from rtx VALUE
945 into a bit-field within structure STR_RTX
946 containing BITSIZE bits starting at bit BITNUM.
948 BITREGION_START is bitpos of the first bitfield in this region.
949 BITREGION_END is the bitpos of the ending bitfield in this region.
950 These two fields are 0, if the C++ memory model does not apply,
951 or we are not interested in keeping track of bitfield regions.
953 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
955 void
956 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
957 unsigned HOST_WIDE_INT bitnum,
958 unsigned HOST_WIDE_INT bitregion_start,
959 unsigned HOST_WIDE_INT bitregion_end,
960 machine_mode fieldmode,
961 rtx value)
963 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
964 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
965 bitregion_start, bitregion_end))
967 /* Storing of a full word can be done with a simple store.
968 We know here that the field can be accessed with one single
969 instruction. For targets that support unaligned memory,
970 an unaligned access may be necessary. */
971 if (bitsize == GET_MODE_BITSIZE (fieldmode))
973 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
974 bitnum / BITS_PER_UNIT);
975 gcc_assert (bitnum % BITS_PER_UNIT == 0);
976 emit_move_insn (str_rtx, value);
978 else
980 rtx temp;
982 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
983 &bitnum);
984 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
985 temp = copy_to_reg (str_rtx);
986 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
987 fieldmode, value, true))
988 gcc_unreachable ();
990 emit_move_insn (str_rtx, temp);
993 return;
996 /* Under the C++0x memory model, we must not touch bits outside the
997 bit region. Adjust the address to start at the beginning of the
998 bit region. */
999 if (MEM_P (str_rtx) && bitregion_start > 0)
1001 machine_mode bestmode;
1002 HOST_WIDE_INT offset, size;
1004 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1006 offset = bitregion_start / BITS_PER_UNIT;
1007 bitnum -= bitregion_start;
1008 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1009 bitregion_end -= bitregion_start;
1010 bitregion_start = 0;
1011 bestmode = get_best_mode (bitsize, bitnum,
1012 bitregion_start, bitregion_end,
1013 MEM_ALIGN (str_rtx), VOIDmode,
1014 MEM_VOLATILE_P (str_rtx));
1015 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1018 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1019 bitregion_start, bitregion_end,
1020 fieldmode, value, true))
1021 gcc_unreachable ();
1024 /* Use shifts and boolean operations to store VALUE into a bit field of
1025 width BITSIZE in OP0, starting at bit BITNUM. */
1027 static void
1028 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1029 unsigned HOST_WIDE_INT bitnum,
1030 unsigned HOST_WIDE_INT bitregion_start,
1031 unsigned HOST_WIDE_INT bitregion_end,
1032 rtx value)
1034 /* There is a case not handled here:
1035 a structure with a known alignment of just a halfword
1036 and a field split across two aligned halfwords within the structure.
1037 Or likewise a structure with a known alignment of just a byte
1038 and a field split across two bytes.
1039 Such cases are not supposed to be able to occur. */
1041 if (MEM_P (op0))
1043 machine_mode mode = GET_MODE (op0);
1044 if (GET_MODE_BITSIZE (mode) == 0
1045 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1046 mode = word_mode;
1047 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1048 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1050 if (mode == VOIDmode)
1052 /* The only way this should occur is if the field spans word
1053 boundaries. */
1054 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1055 bitregion_end, value);
1056 return;
1059 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1062 store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1065 /* Helper function for store_fixed_bit_field, stores
1066 the bit field always using the MODE of OP0. */
1068 static void
1069 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1070 unsigned HOST_WIDE_INT bitnum,
1071 rtx value)
1073 machine_mode mode;
1074 rtx temp;
1075 int all_zero = 0;
1076 int all_one = 0;
1078 mode = GET_MODE (op0);
1079 gcc_assert (SCALAR_INT_MODE_P (mode));
1081 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1082 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1084 if (BYTES_BIG_ENDIAN)
1085 /* BITNUM is the distance between our msb
1086 and that of the containing datum.
1087 Convert it to the distance from the lsb. */
1088 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1090 /* Now BITNUM is always the distance between our lsb
1091 and that of OP0. */
1093 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1094 we must first convert its mode to MODE. */
1096 if (CONST_INT_P (value))
1098 unsigned HOST_WIDE_INT v = UINTVAL (value);
1100 if (bitsize < HOST_BITS_PER_WIDE_INT)
1101 v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1103 if (v == 0)
1104 all_zero = 1;
1105 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1106 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1107 || (bitsize == HOST_BITS_PER_WIDE_INT
1108 && v == (unsigned HOST_WIDE_INT) -1))
1109 all_one = 1;
1111 value = lshift_value (mode, v, bitnum);
1113 else
1115 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1116 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1118 if (GET_MODE (value) != mode)
1119 value = convert_to_mode (mode, value, 1);
1121 if (must_and)
1122 value = expand_binop (mode, and_optab, value,
1123 mask_rtx (mode, 0, bitsize, 0),
1124 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1125 if (bitnum > 0)
1126 value = expand_shift (LSHIFT_EXPR, mode, value,
1127 bitnum, NULL_RTX, 1);
1130 /* Now clear the chosen bits in OP0,
1131 except that if VALUE is -1 we need not bother. */
1132 /* We keep the intermediates in registers to allow CSE to combine
1133 consecutive bitfield assignments. */
1135 temp = force_reg (mode, op0);
1137 if (! all_one)
1139 temp = expand_binop (mode, and_optab, temp,
1140 mask_rtx (mode, bitnum, bitsize, 1),
1141 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1142 temp = force_reg (mode, temp);
1145 /* Now logical-or VALUE into OP0, unless it is zero. */
1147 if (! all_zero)
1149 temp = expand_binop (mode, ior_optab, temp, value,
1150 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1151 temp = force_reg (mode, temp);
1154 if (op0 != temp)
1156 op0 = copy_rtx (op0);
1157 emit_move_insn (op0, temp);
1161 /* Store a bit field that is split across multiple accessible memory objects.
1163 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1164 BITSIZE is the field width; BITPOS the position of its first bit
1165 (within the word).
1166 VALUE is the value to store.
1168 This does not yet handle fields wider than BITS_PER_WORD. */
1170 static void
1171 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1172 unsigned HOST_WIDE_INT bitpos,
1173 unsigned HOST_WIDE_INT bitregion_start,
1174 unsigned HOST_WIDE_INT bitregion_end,
1175 rtx value)
1177 unsigned int unit;
1178 unsigned int bitsdone = 0;
1180 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1181 much at a time. */
1182 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1183 unit = BITS_PER_WORD;
1184 else
1185 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1187 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1188 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1189 again, and we will mutually recurse forever. */
1190 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1191 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1193 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1194 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1195 that VALUE might be a floating-point constant. */
1196 if (CONSTANT_P (value) && !CONST_INT_P (value))
1198 rtx word = gen_lowpart_common (word_mode, value);
1200 if (word && (value != word))
1201 value = word;
1202 else
1203 value = gen_lowpart_common (word_mode,
1204 force_reg (GET_MODE (value) != VOIDmode
1205 ? GET_MODE (value)
1206 : word_mode, value));
1209 while (bitsdone < bitsize)
1211 unsigned HOST_WIDE_INT thissize;
1212 rtx part, word;
1213 unsigned HOST_WIDE_INT thispos;
1214 unsigned HOST_WIDE_INT offset;
1216 offset = (bitpos + bitsdone) / unit;
1217 thispos = (bitpos + bitsdone) % unit;
1219 /* When region of bytes we can touch is restricted, decrease
1220 UNIT close to the end of the region as needed. If op0 is a REG
1221 or SUBREG of REG, don't do this, as there can't be data races
1222 on a register and we can expand shorter code in some cases. */
1223 if (bitregion_end
1224 && unit > BITS_PER_UNIT
1225 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1226 && !REG_P (op0)
1227 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1229 unit = unit / 2;
1230 continue;
1233 /* THISSIZE must not overrun a word boundary. Otherwise,
1234 store_fixed_bit_field will call us again, and we will mutually
1235 recurse forever. */
1236 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1237 thissize = MIN (thissize, unit - thispos);
1239 if (BYTES_BIG_ENDIAN)
1241 /* Fetch successively less significant portions. */
1242 if (CONST_INT_P (value))
1243 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1244 >> (bitsize - bitsdone - thissize))
1245 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1246 else
1248 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1249 /* The args are chosen so that the last part includes the
1250 lsb. Give extract_bit_field the value it needs (with
1251 endianness compensation) to fetch the piece we want. */
1252 part = extract_fixed_bit_field (word_mode, value, thissize,
1253 total_bits - bitsize + bitsdone,
1254 NULL_RTX, 1);
1257 else
1259 /* Fetch successively more significant portions. */
1260 if (CONST_INT_P (value))
1261 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1262 >> bitsdone)
1263 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1264 else
1265 part = extract_fixed_bit_field (word_mode, value, thissize,
1266 bitsdone, NULL_RTX, 1);
1269 /* If OP0 is a register, then handle OFFSET here.
1271 When handling multiword bitfields, extract_bit_field may pass
1272 down a word_mode SUBREG of a larger REG for a bitfield that actually
1273 crosses a word boundary. Thus, for a SUBREG, we must find
1274 the current word starting from the base register. */
1275 if (GET_CODE (op0) == SUBREG)
1277 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1278 + (offset * unit / BITS_PER_WORD);
1279 machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1280 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1281 word = word_offset ? const0_rtx : op0;
1282 else
1283 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1284 GET_MODE (SUBREG_REG (op0)));
1285 offset &= BITS_PER_WORD / unit - 1;
1287 else if (REG_P (op0))
1289 machine_mode op0_mode = GET_MODE (op0);
1290 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1291 word = offset ? const0_rtx : op0;
1292 else
1293 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1294 GET_MODE (op0));
1295 offset &= BITS_PER_WORD / unit - 1;
1297 else
1298 word = op0;
1300 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1301 it is just an out-of-bounds access. Ignore it. */
1302 if (word != const0_rtx)
1303 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1304 bitregion_start, bitregion_end, part);
1305 bitsdone += thissize;
1309 /* A subroutine of extract_bit_field_1 that converts return value X
1310 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1311 to extract_bit_field. */
1313 static rtx
1314 convert_extracted_bit_field (rtx x, machine_mode mode,
1315 machine_mode tmode, bool unsignedp)
1317 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1318 return x;
1320 /* If the x mode is not a scalar integral, first convert to the
1321 integer mode of that size and then access it as a floating-point
1322 value via a SUBREG. */
1323 if (!SCALAR_INT_MODE_P (tmode))
1325 machine_mode smode;
1327 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1328 x = convert_to_mode (smode, x, unsignedp);
1329 x = force_reg (smode, x);
1330 return gen_lowpart (tmode, x);
1333 return convert_to_mode (tmode, x, unsignedp);
1336 /* Try to use an ext(z)v pattern to extract a field from OP0.
1337 Return the extracted value on success, otherwise return null.
1338 EXT_MODE is the mode of the extraction and the other arguments
1339 are as for extract_bit_field. */
1341 static rtx
1342 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1343 unsigned HOST_WIDE_INT bitsize,
1344 unsigned HOST_WIDE_INT bitnum,
1345 int unsignedp, rtx target,
1346 machine_mode mode, machine_mode tmode)
1348 struct expand_operand ops[4];
1349 rtx spec_target = target;
1350 rtx spec_target_subreg = 0;
1351 machine_mode ext_mode = extv->field_mode;
1352 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1354 if (bitsize == 0 || unit < bitsize)
1355 return NULL_RTX;
1357 if (MEM_P (op0))
1358 /* Get a reference to the first byte of the field. */
1359 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1360 &bitnum);
1361 else
1363 /* Convert from counting within OP0 to counting in EXT_MODE. */
1364 if (BYTES_BIG_ENDIAN)
1365 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1367 /* If op0 is a register, we need it in EXT_MODE to make it
1368 acceptable to the format of ext(z)v. */
1369 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1370 return NULL_RTX;
1371 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1372 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1375 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1376 "backwards" from the size of the unit we are extracting from.
1377 Otherwise, we count bits from the most significant on a
1378 BYTES/BITS_BIG_ENDIAN machine. */
1380 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1381 bitnum = unit - bitsize - bitnum;
1383 if (target == 0)
1384 target = spec_target = gen_reg_rtx (tmode);
1386 if (GET_MODE (target) != ext_mode)
1388 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1389 between the mode of the extraction (word_mode) and the target
1390 mode. Instead, create a temporary and use convert_move to set
1391 the target. */
1392 if (REG_P (target)
1393 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1395 target = gen_lowpart (ext_mode, target);
1396 if (GET_MODE_PRECISION (ext_mode)
1397 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1398 spec_target_subreg = target;
1400 else
1401 target = gen_reg_rtx (ext_mode);
1404 create_output_operand (&ops[0], target, ext_mode);
1405 create_fixed_operand (&ops[1], op0);
1406 create_integer_operand (&ops[2], bitsize);
1407 create_integer_operand (&ops[3], bitnum);
1408 if (maybe_expand_insn (extv->icode, 4, ops))
1410 target = ops[0].value;
1411 if (target == spec_target)
1412 return target;
1413 if (target == spec_target_subreg)
1414 return spec_target;
1415 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1417 return NULL_RTX;
1420 /* A subroutine of extract_bit_field, with the same arguments.
1421 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1422 if we can find no other means of implementing the operation.
1423 if FALLBACK_P is false, return NULL instead. */
1425 static rtx
1426 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1427 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1428 machine_mode mode, machine_mode tmode,
1429 bool fallback_p)
1431 rtx op0 = str_rtx;
1432 machine_mode int_mode;
1433 machine_mode mode1;
1435 if (tmode == VOIDmode)
1436 tmode = mode;
1438 while (GET_CODE (op0) == SUBREG)
1440 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1441 op0 = SUBREG_REG (op0);
1444 /* If we have an out-of-bounds access to a register, just return an
1445 uninitialized register of the required mode. This can occur if the
1446 source code contains an out-of-bounds access to a small array. */
1447 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1448 return gen_reg_rtx (tmode);
1450 if (REG_P (op0)
1451 && mode == GET_MODE (op0)
1452 && bitnum == 0
1453 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1455 /* We're trying to extract a full register from itself. */
1456 return op0;
1459 /* See if we can get a better vector mode before extracting. */
1460 if (VECTOR_MODE_P (GET_MODE (op0))
1461 && !MEM_P (op0)
1462 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1464 machine_mode new_mode;
1466 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1467 new_mode = MIN_MODE_VECTOR_FLOAT;
1468 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1469 new_mode = MIN_MODE_VECTOR_FRACT;
1470 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1471 new_mode = MIN_MODE_VECTOR_UFRACT;
1472 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1473 new_mode = MIN_MODE_VECTOR_ACCUM;
1474 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1475 new_mode = MIN_MODE_VECTOR_UACCUM;
1476 else
1477 new_mode = MIN_MODE_VECTOR_INT;
1479 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1480 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1481 && targetm.vector_mode_supported_p (new_mode))
1482 break;
1483 if (new_mode != VOIDmode)
1484 op0 = gen_lowpart (new_mode, op0);
1487 /* Use vec_extract patterns for extracting parts of vectors whenever
1488 available. */
1489 if (VECTOR_MODE_P (GET_MODE (op0))
1490 && !MEM_P (op0)
1491 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1492 && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1493 == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1495 struct expand_operand ops[3];
1496 machine_mode outermode = GET_MODE (op0);
1497 machine_mode innermode = GET_MODE_INNER (outermode);
1498 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1499 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1501 create_output_operand (&ops[0], target, innermode);
1502 create_input_operand (&ops[1], op0, outermode);
1503 create_integer_operand (&ops[2], pos);
1504 if (maybe_expand_insn (icode, 3, ops))
1506 target = ops[0].value;
1507 if (GET_MODE (target) != mode)
1508 return gen_lowpart (tmode, target);
1509 return target;
1513 /* Make sure we are playing with integral modes. Pun with subregs
1514 if we aren't. */
1516 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1517 if (imode != GET_MODE (op0))
1519 if (MEM_P (op0))
1520 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1521 else if (imode != BLKmode)
1523 op0 = gen_lowpart (imode, op0);
1525 /* If we got a SUBREG, force it into a register since we
1526 aren't going to be able to do another SUBREG on it. */
1527 if (GET_CODE (op0) == SUBREG)
1528 op0 = force_reg (imode, op0);
1530 else if (REG_P (op0))
1532 rtx reg, subreg;
1533 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1534 MODE_INT);
1535 reg = gen_reg_rtx (imode);
1536 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1537 emit_move_insn (subreg, op0);
1538 op0 = reg;
1539 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1541 else
1543 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1544 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1545 emit_move_insn (mem, op0);
1546 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1551 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1552 If that's wrong, the solution is to test for it and set TARGET to 0
1553 if needed. */
1555 /* Get the mode of the field to use for atomic access or subreg
1556 conversion. */
1557 mode1 = mode;
1558 if (SCALAR_INT_MODE_P (tmode))
1560 machine_mode try_mode = mode_for_size (bitsize,
1561 GET_MODE_CLASS (tmode), 0);
1562 if (try_mode != BLKmode)
1563 mode1 = try_mode;
1565 gcc_assert (mode1 != BLKmode);
1567 /* Extraction of a full MODE1 value can be done with a subreg as long
1568 as the least significant bit of the value is the least significant
1569 bit of either OP0 or a word of OP0. */
1570 if (!MEM_P (op0)
1571 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1572 && bitsize == GET_MODE_BITSIZE (mode1)
1573 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1575 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1576 bitnum / BITS_PER_UNIT);
1577 if (sub)
1578 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1581 /* Extraction of a full MODE1 value can be done with a load as long as
1582 the field is on a byte boundary and is sufficiently aligned. */
1583 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1585 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1586 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1589 /* Handle fields bigger than a word. */
1591 if (bitsize > BITS_PER_WORD)
1593 /* Here we transfer the words of the field
1594 in the order least significant first.
1595 This is because the most significant word is the one which may
1596 be less than full. */
1598 unsigned int backwards = WORDS_BIG_ENDIAN;
1599 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1600 unsigned int i;
1601 rtx_insn *last;
1603 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1604 target = gen_reg_rtx (mode);
1606 /* In case we're about to clobber a base register or something
1607 (see gcc.c-torture/execute/20040625-1.c). */
1608 if (reg_mentioned_p (target, str_rtx))
1609 target = gen_reg_rtx (mode);
1611 /* Indicate for flow that the entire target reg is being set. */
1612 emit_clobber (target);
1614 last = get_last_insn ();
1615 for (i = 0; i < nwords; i++)
1617 /* If I is 0, use the low-order word in both field and target;
1618 if I is 1, use the next to lowest word; and so on. */
1619 /* Word number in TARGET to use. */
1620 unsigned int wordnum
1621 = (backwards
1622 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1623 : i);
1624 /* Offset from start of field in OP0. */
1625 unsigned int bit_offset = (backwards
1626 ? MAX ((int) bitsize - ((int) i + 1)
1627 * BITS_PER_WORD,
1629 : (int) i * BITS_PER_WORD);
1630 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1631 rtx result_part
1632 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1633 bitsize - i * BITS_PER_WORD),
1634 bitnum + bit_offset, 1, target_part,
1635 mode, word_mode, fallback_p);
1637 gcc_assert (target_part);
1638 if (!result_part)
1640 delete_insns_since (last);
1641 return NULL;
1644 if (result_part != target_part)
1645 emit_move_insn (target_part, result_part);
1648 if (unsignedp)
1650 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1651 need to be zero'd out. */
1652 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1654 unsigned int i, total_words;
1656 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1657 for (i = nwords; i < total_words; i++)
1658 emit_move_insn
1659 (operand_subword (target,
1660 backwards ? total_words - i - 1 : i,
1661 1, VOIDmode),
1662 const0_rtx);
1664 return target;
1667 /* Signed bit field: sign-extend with two arithmetic shifts. */
1668 target = expand_shift (LSHIFT_EXPR, mode, target,
1669 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1670 return expand_shift (RSHIFT_EXPR, mode, target,
1671 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1674 /* If OP0 is a multi-word register, narrow it to the affected word.
1675 If the region spans two words, defer to extract_split_bit_field. */
1676 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1678 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1679 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1680 bitnum %= BITS_PER_WORD;
1681 if (bitnum + bitsize > BITS_PER_WORD)
1683 if (!fallback_p)
1684 return NULL_RTX;
1685 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1686 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1690 /* From here on we know the desired field is smaller than a word.
1691 If OP0 is a register, it too fits within a word. */
1692 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1693 extraction_insn extv;
1694 if (!MEM_P (op0)
1695 /* ??? We could limit the structure size to the part of OP0 that
1696 contains the field, with appropriate checks for endianness
1697 and TRULY_NOOP_TRUNCATION. */
1698 && get_best_reg_extraction_insn (&extv, pattern,
1699 GET_MODE_BITSIZE (GET_MODE (op0)),
1700 tmode))
1702 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1703 unsignedp, target, mode,
1704 tmode);
1705 if (result)
1706 return result;
1709 /* If OP0 is a memory, try copying it to a register and seeing if a
1710 cheap register alternative is available. */
1711 if (MEM_P (op0))
1713 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1714 tmode))
1716 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1717 bitnum, unsignedp,
1718 target, mode,
1719 tmode);
1720 if (result)
1721 return result;
1724 rtx_insn *last = get_last_insn ();
1726 /* Try loading part of OP0 into a register and extracting the
1727 bitfield from that. */
1728 unsigned HOST_WIDE_INT bitpos;
1729 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1730 0, 0, tmode, &bitpos);
1731 if (xop0)
1733 xop0 = copy_to_reg (xop0);
1734 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1735 unsignedp, target,
1736 mode, tmode, false);
1737 if (result)
1738 return result;
1739 delete_insns_since (last);
1743 if (!fallback_p)
1744 return NULL;
1746 /* Find a correspondingly-sized integer field, so we can apply
1747 shifts and masks to it. */
1748 int_mode = int_mode_for_mode (tmode);
1749 if (int_mode == BLKmode)
1750 int_mode = int_mode_for_mode (mode);
1751 /* Should probably push op0 out to memory and then do a load. */
1752 gcc_assert (int_mode != BLKmode);
1754 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1755 target, unsignedp);
1756 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1759 /* Generate code to extract a byte-field from STR_RTX
1760 containing BITSIZE bits, starting at BITNUM,
1761 and put it in TARGET if possible (if TARGET is nonzero).
1762 Regardless of TARGET, we return the rtx for where the value is placed.
1764 STR_RTX is the structure containing the byte (a REG or MEM).
1765 UNSIGNEDP is nonzero if this is an unsigned bit field.
1766 MODE is the natural mode of the field value once extracted.
1767 TMODE is the mode the caller would like the value to have;
1768 but the value may be returned with type MODE instead.
1770 If a TARGET is specified and we can store in it at no extra cost,
1771 we do so, and return TARGET.
1772 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1773 if they are equally easy. */
1776 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1777 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1778 machine_mode mode, machine_mode tmode)
1780 machine_mode mode1;
1782 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1783 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1784 mode1 = GET_MODE (str_rtx);
1785 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1786 mode1 = GET_MODE (target);
1787 else
1788 mode1 = tmode;
1790 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1792 /* Extraction of a full MODE1 value can be done with a simple load.
1793 We know here that the field can be accessed with one single
1794 instruction. For targets that support unaligned memory,
1795 an unaligned access may be necessary. */
1796 if (bitsize == GET_MODE_BITSIZE (mode1))
1798 rtx result = adjust_bitfield_address (str_rtx, mode1,
1799 bitnum / BITS_PER_UNIT);
1800 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1801 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1804 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1805 &bitnum);
1806 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1807 str_rtx = copy_to_reg (str_rtx);
1810 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1811 target, mode, tmode, true);
1814 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1815 from bit BITNUM of OP0.
1817 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1818 If TARGET is nonzero, attempts to store the value there
1819 and return TARGET, but this is not guaranteed.
1820 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1822 static rtx
1823 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1824 unsigned HOST_WIDE_INT bitsize,
1825 unsigned HOST_WIDE_INT bitnum, rtx target,
1826 int unsignedp)
1828 if (MEM_P (op0))
1830 machine_mode mode
1831 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1832 MEM_VOLATILE_P (op0));
1834 if (mode == VOIDmode)
1835 /* The only way this should occur is if the field spans word
1836 boundaries. */
1837 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1839 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1842 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1843 target, unsignedp);
1846 /* Helper function for extract_fixed_bit_field, extracts
1847 the bit field always using the MODE of OP0. */
1849 static rtx
1850 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1851 unsigned HOST_WIDE_INT bitsize,
1852 unsigned HOST_WIDE_INT bitnum, rtx target,
1853 int unsignedp)
1855 machine_mode mode = GET_MODE (op0);
1856 gcc_assert (SCALAR_INT_MODE_P (mode));
1858 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1859 for invalid input, such as extract equivalent of f5 from
1860 gcc.dg/pr48335-2.c. */
1862 if (BYTES_BIG_ENDIAN)
1863 /* BITNUM is the distance between our msb and that of OP0.
1864 Convert it to the distance from the lsb. */
1865 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1867 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1868 We have reduced the big-endian case to the little-endian case. */
1870 if (unsignedp)
1872 if (bitnum)
1874 /* If the field does not already start at the lsb,
1875 shift it so it does. */
1876 /* Maybe propagate the target for the shift. */
1877 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1878 if (tmode != mode)
1879 subtarget = 0;
1880 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1882 /* Convert the value to the desired mode. */
1883 if (mode != tmode)
1884 op0 = convert_to_mode (tmode, op0, 1);
1886 /* Unless the msb of the field used to be the msb when we shifted,
1887 mask out the upper bits. */
1889 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1890 return expand_binop (GET_MODE (op0), and_optab, op0,
1891 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1892 target, 1, OPTAB_LIB_WIDEN);
1893 return op0;
1896 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1897 then arithmetic-shift its lsb to the lsb of the word. */
1898 op0 = force_reg (mode, op0);
1900 /* Find the narrowest integer mode that contains the field. */
1902 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1903 mode = GET_MODE_WIDER_MODE (mode))
1904 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1906 op0 = convert_to_mode (mode, op0, 0);
1907 break;
1910 if (mode != tmode)
1911 target = 0;
1913 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1915 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1916 /* Maybe propagate the target for the shift. */
1917 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1918 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1921 return expand_shift (RSHIFT_EXPR, mode, op0,
1922 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1925 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1926 VALUE << BITPOS. */
1928 static rtx
1929 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1930 int bitpos)
1932 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1935 /* Extract a bit field that is split across two words
1936 and return an RTX for the result.
1938 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1939 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1940 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1942 static rtx
1943 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1944 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1946 unsigned int unit;
1947 unsigned int bitsdone = 0;
1948 rtx result = NULL_RTX;
1949 int first = 1;
1951 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1952 much at a time. */
1953 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1954 unit = BITS_PER_WORD;
1955 else
1956 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1958 while (bitsdone < bitsize)
1960 unsigned HOST_WIDE_INT thissize;
1961 rtx part, word;
1962 unsigned HOST_WIDE_INT thispos;
1963 unsigned HOST_WIDE_INT offset;
1965 offset = (bitpos + bitsdone) / unit;
1966 thispos = (bitpos + bitsdone) % unit;
1968 /* THISSIZE must not overrun a word boundary. Otherwise,
1969 extract_fixed_bit_field will call us again, and we will mutually
1970 recurse forever. */
1971 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1972 thissize = MIN (thissize, unit - thispos);
1974 /* If OP0 is a register, then handle OFFSET here.
1976 When handling multiword bitfields, extract_bit_field may pass
1977 down a word_mode SUBREG of a larger REG for a bitfield that actually
1978 crosses a word boundary. Thus, for a SUBREG, we must find
1979 the current word starting from the base register. */
1980 if (GET_CODE (op0) == SUBREG)
1982 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1983 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1984 GET_MODE (SUBREG_REG (op0)));
1985 offset = 0;
1987 else if (REG_P (op0))
1989 word = operand_subword_force (op0, offset, GET_MODE (op0));
1990 offset = 0;
1992 else
1993 word = op0;
1995 /* Extract the parts in bit-counting order,
1996 whose meaning is determined by BYTES_PER_UNIT.
1997 OFFSET is in UNITs, and UNIT is in bits. */
1998 part = extract_fixed_bit_field (word_mode, word, thissize,
1999 offset * unit + thispos, 0, 1);
2000 bitsdone += thissize;
2002 /* Shift this part into place for the result. */
2003 if (BYTES_BIG_ENDIAN)
2005 if (bitsize != bitsdone)
2006 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2007 bitsize - bitsdone, 0, 1);
2009 else
2011 if (bitsdone != thissize)
2012 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2013 bitsdone - thissize, 0, 1);
2016 if (first)
2017 result = part;
2018 else
2019 /* Combine the parts with bitwise or. This works
2020 because we extracted each part as an unsigned bit field. */
2021 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2022 OPTAB_LIB_WIDEN);
2024 first = 0;
2027 /* Unsigned bit field: we are done. */
2028 if (unsignedp)
2029 return result;
2030 /* Signed bit field: sign-extend with two arithmetic shifts. */
2031 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2032 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2033 return expand_shift (RSHIFT_EXPR, word_mode, result,
2034 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2037 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2038 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2039 MODE, fill the upper bits with zeros. Fail if the layout of either
2040 mode is unknown (as for CC modes) or if the extraction would involve
2041 unprofitable mode punning. Return the value on success, otherwise
2042 return null.
2044 This is different from gen_lowpart* in these respects:
2046 - the returned value must always be considered an rvalue
2048 - when MODE is wider than SRC_MODE, the extraction involves
2049 a zero extension
2051 - when MODE is smaller than SRC_MODE, the extraction involves
2052 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2054 In other words, this routine performs a computation, whereas the
2055 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2056 operations. */
2059 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2061 machine_mode int_mode, src_int_mode;
2063 if (mode == src_mode)
2064 return src;
2066 if (CONSTANT_P (src))
2068 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2069 fails, it will happily create (subreg (symbol_ref)) or similar
2070 invalid SUBREGs. */
2071 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2072 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2073 if (ret)
2074 return ret;
2076 if (GET_MODE (src) == VOIDmode
2077 || !validate_subreg (mode, src_mode, src, byte))
2078 return NULL_RTX;
2080 src = force_reg (GET_MODE (src), src);
2081 return gen_rtx_SUBREG (mode, src, byte);
2084 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2085 return NULL_RTX;
2087 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2088 && MODES_TIEABLE_P (mode, src_mode))
2090 rtx x = gen_lowpart_common (mode, src);
2091 if (x)
2092 return x;
2095 src_int_mode = int_mode_for_mode (src_mode);
2096 int_mode = int_mode_for_mode (mode);
2097 if (src_int_mode == BLKmode || int_mode == BLKmode)
2098 return NULL_RTX;
2100 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2101 return NULL_RTX;
2102 if (!MODES_TIEABLE_P (int_mode, mode))
2103 return NULL_RTX;
2105 src = gen_lowpart (src_int_mode, src);
2106 src = convert_modes (int_mode, src_int_mode, src, true);
2107 src = gen_lowpart (mode, src);
2108 return src;
2111 /* Add INC into TARGET. */
2113 void
2114 expand_inc (rtx target, rtx inc)
2116 rtx value = expand_binop (GET_MODE (target), add_optab,
2117 target, inc,
2118 target, 0, OPTAB_LIB_WIDEN);
2119 if (value != target)
2120 emit_move_insn (target, value);
2123 /* Subtract DEC from TARGET. */
2125 void
2126 expand_dec (rtx target, rtx dec)
2128 rtx value = expand_binop (GET_MODE (target), sub_optab,
2129 target, dec,
2130 target, 0, OPTAB_LIB_WIDEN);
2131 if (value != target)
2132 emit_move_insn (target, value);
2135 /* Output a shift instruction for expression code CODE,
2136 with SHIFTED being the rtx for the value to shift,
2137 and AMOUNT the rtx for the amount to shift by.
2138 Store the result in the rtx TARGET, if that is convenient.
2139 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2140 Return the rtx for where the value is. */
2142 static rtx
2143 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2144 rtx amount, rtx target, int unsignedp)
2146 rtx op1, temp = 0;
2147 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2148 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2149 optab lshift_optab = ashl_optab;
2150 optab rshift_arith_optab = ashr_optab;
2151 optab rshift_uns_optab = lshr_optab;
2152 optab lrotate_optab = rotl_optab;
2153 optab rrotate_optab = rotr_optab;
2154 machine_mode op1_mode;
2155 machine_mode scalar_mode = mode;
2156 int attempt;
2157 bool speed = optimize_insn_for_speed_p ();
2159 if (VECTOR_MODE_P (mode))
2160 scalar_mode = GET_MODE_INNER (mode);
2161 op1 = amount;
2162 op1_mode = GET_MODE (op1);
2164 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2165 shift amount is a vector, use the vector/vector shift patterns. */
2166 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2168 lshift_optab = vashl_optab;
2169 rshift_arith_optab = vashr_optab;
2170 rshift_uns_optab = vlshr_optab;
2171 lrotate_optab = vrotl_optab;
2172 rrotate_optab = vrotr_optab;
2175 /* Previously detected shift-counts computed by NEGATE_EXPR
2176 and shifted in the other direction; but that does not work
2177 on all machines. */
2179 if (SHIFT_COUNT_TRUNCATED)
2181 if (CONST_INT_P (op1)
2182 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2183 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2184 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2185 % GET_MODE_BITSIZE (scalar_mode));
2186 else if (GET_CODE (op1) == SUBREG
2187 && subreg_lowpart_p (op1)
2188 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2189 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2190 op1 = SUBREG_REG (op1);
2193 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2194 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2195 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2196 amount instead. */
2197 if (rotate
2198 && CONST_INT_P (op1)
2199 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2200 GET_MODE_BITSIZE (scalar_mode) - 1))
2202 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2203 left = !left;
2204 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2207 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2208 Note that this is not the case for bigger values. For instance a rotation
2209 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2210 0x04030201 (bswapsi). */
2211 if (rotate
2212 && CONST_INT_P (op1)
2213 && INTVAL (op1) == BITS_PER_UNIT
2214 && GET_MODE_SIZE (scalar_mode) == 2
2215 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2216 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2217 unsignedp);
2219 if (op1 == const0_rtx)
2220 return shifted;
2222 /* Check whether its cheaper to implement a left shift by a constant
2223 bit count by a sequence of additions. */
2224 if (code == LSHIFT_EXPR
2225 && CONST_INT_P (op1)
2226 && INTVAL (op1) > 0
2227 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2228 && INTVAL (op1) < MAX_BITS_PER_WORD
2229 && (shift_cost (speed, mode, INTVAL (op1))
2230 > INTVAL (op1) * add_cost (speed, mode))
2231 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2233 int i;
2234 for (i = 0; i < INTVAL (op1); i++)
2236 temp = force_reg (mode, shifted);
2237 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2238 unsignedp, OPTAB_LIB_WIDEN);
2240 return shifted;
2243 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2245 enum optab_methods methods;
2247 if (attempt == 0)
2248 methods = OPTAB_DIRECT;
2249 else if (attempt == 1)
2250 methods = OPTAB_WIDEN;
2251 else
2252 methods = OPTAB_LIB_WIDEN;
2254 if (rotate)
2256 /* Widening does not work for rotation. */
2257 if (methods == OPTAB_WIDEN)
2258 continue;
2259 else if (methods == OPTAB_LIB_WIDEN)
2261 /* If we have been unable to open-code this by a rotation,
2262 do it as the IOR of two shifts. I.e., to rotate A
2263 by N bits, compute
2264 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2265 where C is the bitsize of A.
2267 It is theoretically possible that the target machine might
2268 not be able to perform either shift and hence we would
2269 be making two libcalls rather than just the one for the
2270 shift (similarly if IOR could not be done). We will allow
2271 this extremely unlikely lossage to avoid complicating the
2272 code below. */
2274 rtx subtarget = target == shifted ? 0 : target;
2275 rtx new_amount, other_amount;
2276 rtx temp1;
2278 new_amount = op1;
2279 if (op1 == const0_rtx)
2280 return shifted;
2281 else if (CONST_INT_P (op1))
2282 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2283 - INTVAL (op1));
2284 else
2286 other_amount
2287 = simplify_gen_unary (NEG, GET_MODE (op1),
2288 op1, GET_MODE (op1));
2289 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2290 other_amount
2291 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2292 gen_int_mode (mask, GET_MODE (op1)));
2295 shifted = force_reg (mode, shifted);
2297 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2298 mode, shifted, new_amount, 0, 1);
2299 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2300 mode, shifted, other_amount,
2301 subtarget, 1);
2302 return expand_binop (mode, ior_optab, temp, temp1, target,
2303 unsignedp, methods);
2306 temp = expand_binop (mode,
2307 left ? lrotate_optab : rrotate_optab,
2308 shifted, op1, target, unsignedp, methods);
2310 else if (unsignedp)
2311 temp = expand_binop (mode,
2312 left ? lshift_optab : rshift_uns_optab,
2313 shifted, op1, target, unsignedp, methods);
2315 /* Do arithmetic shifts.
2316 Also, if we are going to widen the operand, we can just as well
2317 use an arithmetic right-shift instead of a logical one. */
2318 if (temp == 0 && ! rotate
2319 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2321 enum optab_methods methods1 = methods;
2323 /* If trying to widen a log shift to an arithmetic shift,
2324 don't accept an arithmetic shift of the same size. */
2325 if (unsignedp)
2326 methods1 = OPTAB_MUST_WIDEN;
2328 /* Arithmetic shift */
2330 temp = expand_binop (mode,
2331 left ? lshift_optab : rshift_arith_optab,
2332 shifted, op1, target, unsignedp, methods1);
2335 /* We used to try extzv here for logical right shifts, but that was
2336 only useful for one machine, the VAX, and caused poor code
2337 generation there for lshrdi3, so the code was deleted and a
2338 define_expand for lshrsi3 was added to vax.md. */
2341 gcc_assert (temp);
2342 return temp;
2345 /* Output a shift instruction for expression code CODE,
2346 with SHIFTED being the rtx for the value to shift,
2347 and AMOUNT the amount to shift by.
2348 Store the result in the rtx TARGET, if that is convenient.
2349 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2350 Return the rtx for where the value is. */
2353 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2354 int amount, rtx target, int unsignedp)
2356 return expand_shift_1 (code, mode,
2357 shifted, GEN_INT (amount), target, unsignedp);
2360 /* Output a shift instruction for expression code CODE,
2361 with SHIFTED being the rtx for the value to shift,
2362 and AMOUNT the tree for the amount to shift by.
2363 Store the result in the rtx TARGET, if that is convenient.
2364 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2365 Return the rtx for where the value is. */
2368 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2369 tree amount, rtx target, int unsignedp)
2371 return expand_shift_1 (code, mode,
2372 shifted, expand_normal (amount), target, unsignedp);
2376 /* Indicates the type of fixup needed after a constant multiplication.
2377 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2378 the result should be negated, and ADD_VARIANT means that the
2379 multiplicand should be added to the result. */
2380 enum mult_variant {basic_variant, negate_variant, add_variant};
2382 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2383 const struct mult_cost *, machine_mode mode);
2384 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2385 struct algorithm *, enum mult_variant *, int);
2386 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2387 const struct algorithm *, enum mult_variant);
2388 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2389 static rtx extract_high_half (machine_mode, rtx);
2390 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2391 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2392 int, int);
2393 /* Compute and return the best algorithm for multiplying by T.
2394 The algorithm must cost less than cost_limit
2395 If retval.cost >= COST_LIMIT, no algorithm was found and all
2396 other field of the returned struct are undefined.
2397 MODE is the machine mode of the multiplication. */
2399 static void
2400 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2401 const struct mult_cost *cost_limit, machine_mode mode)
2403 int m;
2404 struct algorithm *alg_in, *best_alg;
2405 struct mult_cost best_cost;
2406 struct mult_cost new_limit;
2407 int op_cost, op_latency;
2408 unsigned HOST_WIDE_INT orig_t = t;
2409 unsigned HOST_WIDE_INT q;
2410 int maxm, hash_index;
2411 bool cache_hit = false;
2412 enum alg_code cache_alg = alg_zero;
2413 bool speed = optimize_insn_for_speed_p ();
2414 machine_mode imode;
2415 struct alg_hash_entry *entry_ptr;
2417 /* Indicate that no algorithm is yet found. If no algorithm
2418 is found, this value will be returned and indicate failure. */
2419 alg_out->cost.cost = cost_limit->cost + 1;
2420 alg_out->cost.latency = cost_limit->latency + 1;
2422 if (cost_limit->cost < 0
2423 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2424 return;
2426 /* Be prepared for vector modes. */
2427 imode = GET_MODE_INNER (mode);
2429 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2431 /* Restrict the bits of "t" to the multiplication's mode. */
2432 t &= GET_MODE_MASK (imode);
2434 /* t == 1 can be done in zero cost. */
2435 if (t == 1)
2437 alg_out->ops = 1;
2438 alg_out->cost.cost = 0;
2439 alg_out->cost.latency = 0;
2440 alg_out->op[0] = alg_m;
2441 return;
2444 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2445 fail now. */
2446 if (t == 0)
2448 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2449 return;
2450 else
2452 alg_out->ops = 1;
2453 alg_out->cost.cost = zero_cost (speed);
2454 alg_out->cost.latency = zero_cost (speed);
2455 alg_out->op[0] = alg_zero;
2456 return;
2460 /* We'll be needing a couple extra algorithm structures now. */
2462 alg_in = XALLOCA (struct algorithm);
2463 best_alg = XALLOCA (struct algorithm);
2464 best_cost = *cost_limit;
2466 /* Compute the hash index. */
2467 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2469 /* See if we already know what to do for T. */
2470 entry_ptr = alg_hash_entry_ptr (hash_index);
2471 if (entry_ptr->t == t
2472 && entry_ptr->mode == mode
2473 && entry_ptr->mode == mode
2474 && entry_ptr->speed == speed
2475 && entry_ptr->alg != alg_unknown)
2477 cache_alg = entry_ptr->alg;
2479 if (cache_alg == alg_impossible)
2481 /* The cache tells us that it's impossible to synthesize
2482 multiplication by T within entry_ptr->cost. */
2483 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2484 /* COST_LIMIT is at least as restrictive as the one
2485 recorded in the hash table, in which case we have no
2486 hope of synthesizing a multiplication. Just
2487 return. */
2488 return;
2490 /* If we get here, COST_LIMIT is less restrictive than the
2491 one recorded in the hash table, so we may be able to
2492 synthesize a multiplication. Proceed as if we didn't
2493 have the cache entry. */
2495 else
2497 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2498 /* The cached algorithm shows that this multiplication
2499 requires more cost than COST_LIMIT. Just return. This
2500 way, we don't clobber this cache entry with
2501 alg_impossible but retain useful information. */
2502 return;
2504 cache_hit = true;
2506 switch (cache_alg)
2508 case alg_shift:
2509 goto do_alg_shift;
2511 case alg_add_t_m2:
2512 case alg_sub_t_m2:
2513 goto do_alg_addsub_t_m2;
2515 case alg_add_factor:
2516 case alg_sub_factor:
2517 goto do_alg_addsub_factor;
2519 case alg_add_t2_m:
2520 goto do_alg_add_t2_m;
2522 case alg_sub_t2_m:
2523 goto do_alg_sub_t2_m;
2525 default:
2526 gcc_unreachable ();
2531 /* If we have a group of zero bits at the low-order part of T, try
2532 multiplying by the remaining bits and then doing a shift. */
2534 if ((t & 1) == 0)
2536 do_alg_shift:
2537 m = floor_log2 (t & -t); /* m = number of low zero bits */
2538 if (m < maxm)
2540 q = t >> m;
2541 /* The function expand_shift will choose between a shift and
2542 a sequence of additions, so the observed cost is given as
2543 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2544 op_cost = m * add_cost (speed, mode);
2545 if (shift_cost (speed, mode, m) < op_cost)
2546 op_cost = shift_cost (speed, mode, m);
2547 new_limit.cost = best_cost.cost - op_cost;
2548 new_limit.latency = best_cost.latency - op_cost;
2549 synth_mult (alg_in, q, &new_limit, mode);
2551 alg_in->cost.cost += op_cost;
2552 alg_in->cost.latency += op_cost;
2553 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2555 best_cost = alg_in->cost;
2556 std::swap (alg_in, best_alg);
2557 best_alg->log[best_alg->ops] = m;
2558 best_alg->op[best_alg->ops] = alg_shift;
2561 /* See if treating ORIG_T as a signed number yields a better
2562 sequence. Try this sequence only for a negative ORIG_T
2563 as it would be useless for a non-negative ORIG_T. */
2564 if ((HOST_WIDE_INT) orig_t < 0)
2566 /* Shift ORIG_T as follows because a right shift of a
2567 negative-valued signed type is implementation
2568 defined. */
2569 q = ~(~orig_t >> m);
2570 /* The function expand_shift will choose between a shift
2571 and a sequence of additions, so the observed cost is
2572 given as MIN (m * add_cost(speed, mode),
2573 shift_cost(speed, mode, m)). */
2574 op_cost = m * add_cost (speed, mode);
2575 if (shift_cost (speed, mode, m) < op_cost)
2576 op_cost = shift_cost (speed, mode, m);
2577 new_limit.cost = best_cost.cost - op_cost;
2578 new_limit.latency = best_cost.latency - op_cost;
2579 synth_mult (alg_in, q, &new_limit, mode);
2581 alg_in->cost.cost += op_cost;
2582 alg_in->cost.latency += op_cost;
2583 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2585 best_cost = alg_in->cost;
2586 std::swap (alg_in, best_alg);
2587 best_alg->log[best_alg->ops] = m;
2588 best_alg->op[best_alg->ops] = alg_shift;
2592 if (cache_hit)
2593 goto done;
2596 /* If we have an odd number, add or subtract one. */
2597 if ((t & 1) != 0)
2599 unsigned HOST_WIDE_INT w;
2601 do_alg_addsub_t_m2:
2602 for (w = 1; (w & t) != 0; w <<= 1)
2604 /* If T was -1, then W will be zero after the loop. This is another
2605 case where T ends with ...111. Handling this with (T + 1) and
2606 subtract 1 produces slightly better code and results in algorithm
2607 selection much faster than treating it like the ...0111 case
2608 below. */
2609 if (w == 0
2610 || (w > 2
2611 /* Reject the case where t is 3.
2612 Thus we prefer addition in that case. */
2613 && t != 3))
2615 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2617 op_cost = add_cost (speed, mode);
2618 new_limit.cost = best_cost.cost - op_cost;
2619 new_limit.latency = best_cost.latency - op_cost;
2620 synth_mult (alg_in, t + 1, &new_limit, mode);
2622 alg_in->cost.cost += op_cost;
2623 alg_in->cost.latency += op_cost;
2624 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2626 best_cost = alg_in->cost;
2627 std::swap (alg_in, best_alg);
2628 best_alg->log[best_alg->ops] = 0;
2629 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2632 else
2634 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2636 op_cost = add_cost (speed, mode);
2637 new_limit.cost = best_cost.cost - op_cost;
2638 new_limit.latency = best_cost.latency - op_cost;
2639 synth_mult (alg_in, t - 1, &new_limit, mode);
2641 alg_in->cost.cost += op_cost;
2642 alg_in->cost.latency += op_cost;
2643 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2645 best_cost = alg_in->cost;
2646 std::swap (alg_in, best_alg);
2647 best_alg->log[best_alg->ops] = 0;
2648 best_alg->op[best_alg->ops] = alg_add_t_m2;
2652 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2653 quickly with a - a * n for some appropriate constant n. */
2654 m = exact_log2 (-orig_t + 1);
2655 if (m >= 0 && m < maxm)
2657 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2658 /* If the target has a cheap shift-and-subtract insn use
2659 that in preference to a shift insn followed by a sub insn.
2660 Assume that the shift-and-sub is "atomic" with a latency
2661 equal to it's cost, otherwise assume that on superscalar
2662 hardware the shift may be executed concurrently with the
2663 earlier steps in the algorithm. */
2664 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2666 op_cost = shiftsub1_cost (speed, mode, m);
2667 op_latency = op_cost;
2669 else
2670 op_latency = add_cost (speed, mode);
2672 new_limit.cost = best_cost.cost - op_cost;
2673 new_limit.latency = best_cost.latency - op_latency;
2674 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2675 &new_limit, mode);
2677 alg_in->cost.cost += op_cost;
2678 alg_in->cost.latency += op_latency;
2679 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2681 best_cost = alg_in->cost;
2682 std::swap (alg_in, best_alg);
2683 best_alg->log[best_alg->ops] = m;
2684 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2688 if (cache_hit)
2689 goto done;
2692 /* Look for factors of t of the form
2693 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2694 If we find such a factor, we can multiply by t using an algorithm that
2695 multiplies by q, shift the result by m and add/subtract it to itself.
2697 We search for large factors first and loop down, even if large factors
2698 are less probable than small; if we find a large factor we will find a
2699 good sequence quickly, and therefore be able to prune (by decreasing
2700 COST_LIMIT) the search. */
2702 do_alg_addsub_factor:
2703 for (m = floor_log2 (t - 1); m >= 2; m--)
2705 unsigned HOST_WIDE_INT d;
2707 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2708 if (t % d == 0 && t > d && m < maxm
2709 && (!cache_hit || cache_alg == alg_add_factor))
2711 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2712 if (shiftadd_cost (speed, mode, m) <= op_cost)
2713 op_cost = shiftadd_cost (speed, mode, m);
2715 op_latency = op_cost;
2718 new_limit.cost = best_cost.cost - op_cost;
2719 new_limit.latency = best_cost.latency - op_latency;
2720 synth_mult (alg_in, t / d, &new_limit, mode);
2722 alg_in->cost.cost += op_cost;
2723 alg_in->cost.latency += op_latency;
2724 if (alg_in->cost.latency < op_cost)
2725 alg_in->cost.latency = op_cost;
2726 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2728 best_cost = alg_in->cost;
2729 std::swap (alg_in, best_alg);
2730 best_alg->log[best_alg->ops] = m;
2731 best_alg->op[best_alg->ops] = alg_add_factor;
2733 /* Other factors will have been taken care of in the recursion. */
2734 break;
2737 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2738 if (t % d == 0 && t > d && m < maxm
2739 && (!cache_hit || cache_alg == alg_sub_factor))
2741 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2742 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2743 op_cost = shiftsub0_cost (speed, mode, m);
2745 op_latency = op_cost;
2747 new_limit.cost = best_cost.cost - op_cost;
2748 new_limit.latency = best_cost.latency - op_latency;
2749 synth_mult (alg_in, t / d, &new_limit, mode);
2751 alg_in->cost.cost += op_cost;
2752 alg_in->cost.latency += op_latency;
2753 if (alg_in->cost.latency < op_cost)
2754 alg_in->cost.latency = op_cost;
2755 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2757 best_cost = alg_in->cost;
2758 std::swap (alg_in, best_alg);
2759 best_alg->log[best_alg->ops] = m;
2760 best_alg->op[best_alg->ops] = alg_sub_factor;
2762 break;
2765 if (cache_hit)
2766 goto done;
2768 /* Try shift-and-add (load effective address) instructions,
2769 i.e. do a*3, a*5, a*9. */
2770 if ((t & 1) != 0)
2772 do_alg_add_t2_m:
2773 q = t - 1;
2774 q = q & -q;
2775 m = exact_log2 (q);
2776 if (m >= 0 && m < maxm)
2778 op_cost = shiftadd_cost (speed, mode, m);
2779 new_limit.cost = best_cost.cost - op_cost;
2780 new_limit.latency = best_cost.latency - op_cost;
2781 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2783 alg_in->cost.cost += op_cost;
2784 alg_in->cost.latency += op_cost;
2785 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2787 best_cost = alg_in->cost;
2788 std::swap (alg_in, best_alg);
2789 best_alg->log[best_alg->ops] = m;
2790 best_alg->op[best_alg->ops] = alg_add_t2_m;
2793 if (cache_hit)
2794 goto done;
2796 do_alg_sub_t2_m:
2797 q = t + 1;
2798 q = q & -q;
2799 m = exact_log2 (q);
2800 if (m >= 0 && m < maxm)
2802 op_cost = shiftsub0_cost (speed, mode, m);
2803 new_limit.cost = best_cost.cost - op_cost;
2804 new_limit.latency = best_cost.latency - op_cost;
2805 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2807 alg_in->cost.cost += op_cost;
2808 alg_in->cost.latency += op_cost;
2809 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2811 best_cost = alg_in->cost;
2812 std::swap (alg_in, best_alg);
2813 best_alg->log[best_alg->ops] = m;
2814 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2817 if (cache_hit)
2818 goto done;
2821 done:
2822 /* If best_cost has not decreased, we have not found any algorithm. */
2823 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2825 /* We failed to find an algorithm. Record alg_impossible for
2826 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2827 we are asked to find an algorithm for T within the same or
2828 lower COST_LIMIT, we can immediately return to the
2829 caller. */
2830 entry_ptr->t = t;
2831 entry_ptr->mode = mode;
2832 entry_ptr->speed = speed;
2833 entry_ptr->alg = alg_impossible;
2834 entry_ptr->cost = *cost_limit;
2835 return;
2838 /* Cache the result. */
2839 if (!cache_hit)
2841 entry_ptr->t = t;
2842 entry_ptr->mode = mode;
2843 entry_ptr->speed = speed;
2844 entry_ptr->alg = best_alg->op[best_alg->ops];
2845 entry_ptr->cost.cost = best_cost.cost;
2846 entry_ptr->cost.latency = best_cost.latency;
2849 /* If we are getting a too long sequence for `struct algorithm'
2850 to record, make this search fail. */
2851 if (best_alg->ops == MAX_BITS_PER_WORD)
2852 return;
2854 /* Copy the algorithm from temporary space to the space at alg_out.
2855 We avoid using structure assignment because the majority of
2856 best_alg is normally undefined, and this is a critical function. */
2857 alg_out->ops = best_alg->ops + 1;
2858 alg_out->cost = best_cost;
2859 memcpy (alg_out->op, best_alg->op,
2860 alg_out->ops * sizeof *alg_out->op);
2861 memcpy (alg_out->log, best_alg->log,
2862 alg_out->ops * sizeof *alg_out->log);
2865 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2866 Try three variations:
2868 - a shift/add sequence based on VAL itself
2869 - a shift/add sequence based on -VAL, followed by a negation
2870 - a shift/add sequence based on VAL - 1, followed by an addition.
2872 Return true if the cheapest of these cost less than MULT_COST,
2873 describing the algorithm in *ALG and final fixup in *VARIANT. */
2875 static bool
2876 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2877 struct algorithm *alg, enum mult_variant *variant,
2878 int mult_cost)
2880 struct algorithm alg2;
2881 struct mult_cost limit;
2882 int op_cost;
2883 bool speed = optimize_insn_for_speed_p ();
2885 /* Fail quickly for impossible bounds. */
2886 if (mult_cost < 0)
2887 return false;
2889 /* Ensure that mult_cost provides a reasonable upper bound.
2890 Any constant multiplication can be performed with less
2891 than 2 * bits additions. */
2892 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2893 if (mult_cost > op_cost)
2894 mult_cost = op_cost;
2896 *variant = basic_variant;
2897 limit.cost = mult_cost;
2898 limit.latency = mult_cost;
2899 synth_mult (alg, val, &limit, mode);
2901 /* This works only if the inverted value actually fits in an
2902 `unsigned int' */
2903 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2905 op_cost = neg_cost (speed, mode);
2906 if (MULT_COST_LESS (&alg->cost, mult_cost))
2908 limit.cost = alg->cost.cost - op_cost;
2909 limit.latency = alg->cost.latency - op_cost;
2911 else
2913 limit.cost = mult_cost - op_cost;
2914 limit.latency = mult_cost - op_cost;
2917 synth_mult (&alg2, -val, &limit, mode);
2918 alg2.cost.cost += op_cost;
2919 alg2.cost.latency += op_cost;
2920 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2921 *alg = alg2, *variant = negate_variant;
2924 /* This proves very useful for division-by-constant. */
2925 op_cost = add_cost (speed, mode);
2926 if (MULT_COST_LESS (&alg->cost, mult_cost))
2928 limit.cost = alg->cost.cost - op_cost;
2929 limit.latency = alg->cost.latency - op_cost;
2931 else
2933 limit.cost = mult_cost - op_cost;
2934 limit.latency = mult_cost - op_cost;
2937 synth_mult (&alg2, val - 1, &limit, mode);
2938 alg2.cost.cost += op_cost;
2939 alg2.cost.latency += op_cost;
2940 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2941 *alg = alg2, *variant = add_variant;
2943 return MULT_COST_LESS (&alg->cost, mult_cost);
2946 /* A subroutine of expand_mult, used for constant multiplications.
2947 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2948 convenient. Use the shift/add sequence described by ALG and apply
2949 the final fixup specified by VARIANT. */
2951 static rtx
2952 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2953 rtx target, const struct algorithm *alg,
2954 enum mult_variant variant)
2956 HOST_WIDE_INT val_so_far;
2957 rtx_insn *insn;
2958 rtx accum, tem;
2959 int opno;
2960 machine_mode nmode;
2962 /* Avoid referencing memory over and over and invalid sharing
2963 on SUBREGs. */
2964 op0 = force_reg (mode, op0);
2966 /* ACCUM starts out either as OP0 or as a zero, depending on
2967 the first operation. */
2969 if (alg->op[0] == alg_zero)
2971 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2972 val_so_far = 0;
2974 else if (alg->op[0] == alg_m)
2976 accum = copy_to_mode_reg (mode, op0);
2977 val_so_far = 1;
2979 else
2980 gcc_unreachable ();
2982 for (opno = 1; opno < alg->ops; opno++)
2984 int log = alg->log[opno];
2985 rtx shift_subtarget = optimize ? 0 : accum;
2986 rtx add_target
2987 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2988 && !optimize)
2989 ? target : 0;
2990 rtx accum_target = optimize ? 0 : accum;
2991 rtx accum_inner;
2993 switch (alg->op[opno])
2995 case alg_shift:
2996 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2997 /* REG_EQUAL note will be attached to the following insn. */
2998 emit_move_insn (accum, tem);
2999 val_so_far <<= log;
3000 break;
3002 case alg_add_t_m2:
3003 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3004 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3005 add_target ? add_target : accum_target);
3006 val_so_far += (HOST_WIDE_INT) 1 << log;
3007 break;
3009 case alg_sub_t_m2:
3010 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3011 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3012 add_target ? add_target : accum_target);
3013 val_so_far -= (HOST_WIDE_INT) 1 << log;
3014 break;
3016 case alg_add_t2_m:
3017 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3018 log, shift_subtarget, 0);
3019 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3020 add_target ? add_target : accum_target);
3021 val_so_far = (val_so_far << log) + 1;
3022 break;
3024 case alg_sub_t2_m:
3025 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3026 log, shift_subtarget, 0);
3027 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3028 add_target ? add_target : accum_target);
3029 val_so_far = (val_so_far << log) - 1;
3030 break;
3032 case alg_add_factor:
3033 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3034 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3035 add_target ? add_target : accum_target);
3036 val_so_far += val_so_far << log;
3037 break;
3039 case alg_sub_factor:
3040 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3041 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3042 (add_target
3043 ? add_target : (optimize ? 0 : tem)));
3044 val_so_far = (val_so_far << log) - val_so_far;
3045 break;
3047 default:
3048 gcc_unreachable ();
3051 if (SCALAR_INT_MODE_P (mode))
3053 /* Write a REG_EQUAL note on the last insn so that we can cse
3054 multiplication sequences. Note that if ACCUM is a SUBREG,
3055 we've set the inner register and must properly indicate that. */
3056 tem = op0, nmode = mode;
3057 accum_inner = accum;
3058 if (GET_CODE (accum) == SUBREG)
3060 accum_inner = SUBREG_REG (accum);
3061 nmode = GET_MODE (accum_inner);
3062 tem = gen_lowpart (nmode, op0);
3065 insn = get_last_insn ();
3066 set_dst_reg_note (insn, REG_EQUAL,
3067 gen_rtx_MULT (nmode, tem,
3068 gen_int_mode (val_so_far, nmode)),
3069 accum_inner);
3073 if (variant == negate_variant)
3075 val_so_far = -val_so_far;
3076 accum = expand_unop (mode, neg_optab, accum, target, 0);
3078 else if (variant == add_variant)
3080 val_so_far = val_so_far + 1;
3081 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3084 /* Compare only the bits of val and val_so_far that are significant
3085 in the result mode, to avoid sign-/zero-extension confusion. */
3086 nmode = GET_MODE_INNER (mode);
3087 val &= GET_MODE_MASK (nmode);
3088 val_so_far &= GET_MODE_MASK (nmode);
3089 gcc_assert (val == val_so_far);
3091 return accum;
3094 /* Perform a multiplication and return an rtx for the result.
3095 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3096 TARGET is a suggestion for where to store the result (an rtx).
3098 We check specially for a constant integer as OP1.
3099 If you want this check for OP0 as well, then before calling
3100 you should swap the two operands if OP0 would be constant. */
3103 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3104 int unsignedp)
3106 enum mult_variant variant;
3107 struct algorithm algorithm;
3108 rtx scalar_op1;
3109 int max_cost;
3110 bool speed = optimize_insn_for_speed_p ();
3111 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3113 if (CONSTANT_P (op0))
3114 std::swap (op0, op1);
3116 /* For vectors, there are several simplifications that can be made if
3117 all elements of the vector constant are identical. */
3118 scalar_op1 = unwrap_const_vec_duplicate (op1);
3120 if (INTEGRAL_MODE_P (mode))
3122 rtx fake_reg;
3123 HOST_WIDE_INT coeff;
3124 bool is_neg;
3125 int mode_bitsize;
3127 if (op1 == CONST0_RTX (mode))
3128 return op1;
3129 if (op1 == CONST1_RTX (mode))
3130 return op0;
3131 if (op1 == CONSTM1_RTX (mode))
3132 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3133 op0, target, 0);
3135 if (do_trapv)
3136 goto skip_synth;
3138 /* If mode is integer vector mode, check if the backend supports
3139 vector lshift (by scalar or vector) at all. If not, we can't use
3140 synthetized multiply. */
3141 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3142 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3143 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3144 goto skip_synth;
3146 /* These are the operations that are potentially turned into
3147 a sequence of shifts and additions. */
3148 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3150 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3151 less than or equal in size to `unsigned int' this doesn't matter.
3152 If the mode is larger than `unsigned int', then synth_mult works
3153 only if the constant value exactly fits in an `unsigned int' without
3154 any truncation. This means that multiplying by negative values does
3155 not work; results are off by 2^32 on a 32 bit machine. */
3156 if (CONST_INT_P (scalar_op1))
3158 coeff = INTVAL (scalar_op1);
3159 is_neg = coeff < 0;
3161 #if TARGET_SUPPORTS_WIDE_INT
3162 else if (CONST_WIDE_INT_P (scalar_op1))
3163 #else
3164 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3165 #endif
3167 int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3168 /* Perfect power of 2 (other than 1, which is handled above). */
3169 if (shift > 0)
3170 return expand_shift (LSHIFT_EXPR, mode, op0,
3171 shift, target, unsignedp);
3172 else
3173 goto skip_synth;
3175 else
3176 goto skip_synth;
3178 /* We used to test optimize here, on the grounds that it's better to
3179 produce a smaller program when -O is not used. But this causes
3180 such a terrible slowdown sometimes that it seems better to always
3181 use synth_mult. */
3183 /* Special case powers of two. */
3184 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3185 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3186 return expand_shift (LSHIFT_EXPR, mode, op0,
3187 floor_log2 (coeff), target, unsignedp);
3189 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3191 /* Attempt to handle multiplication of DImode values by negative
3192 coefficients, by performing the multiplication by a positive
3193 multiplier and then inverting the result. */
3194 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3196 /* Its safe to use -coeff even for INT_MIN, as the
3197 result is interpreted as an unsigned coefficient.
3198 Exclude cost of op0 from max_cost to match the cost
3199 calculation of the synth_mult. */
3200 coeff = -(unsigned HOST_WIDE_INT) coeff;
3201 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3202 mode, speed)
3203 - neg_cost (speed, mode));
3204 if (max_cost <= 0)
3205 goto skip_synth;
3207 /* Special case powers of two. */
3208 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3210 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3211 floor_log2 (coeff), target, unsignedp);
3212 return expand_unop (mode, neg_optab, temp, target, 0);
3215 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3216 max_cost))
3218 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3219 &algorithm, variant);
3220 return expand_unop (mode, neg_optab, temp, target, 0);
3222 goto skip_synth;
3225 /* Exclude cost of op0 from max_cost to match the cost
3226 calculation of the synth_mult. */
3227 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3228 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3229 return expand_mult_const (mode, op0, coeff, target,
3230 &algorithm, variant);
3232 skip_synth:
3234 /* Expand x*2.0 as x+x. */
3235 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3236 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3238 op0 = force_reg (GET_MODE (op0), op0);
3239 return expand_binop (mode, add_optab, op0, op0,
3240 target, unsignedp, OPTAB_LIB_WIDEN);
3243 /* This used to use umul_optab if unsigned, but for non-widening multiply
3244 there is no difference between signed and unsigned. */
3245 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3246 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3247 gcc_assert (op0);
3248 return op0;
3251 /* Return a cost estimate for multiplying a register by the given
3252 COEFFicient in the given MODE and SPEED. */
3255 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3257 int max_cost;
3258 struct algorithm algorithm;
3259 enum mult_variant variant;
3261 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3262 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3263 mode, speed);
3264 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3265 return algorithm.cost.cost;
3266 else
3267 return max_cost;
3270 /* Perform a widening multiplication and return an rtx for the result.
3271 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3272 TARGET is a suggestion for where to store the result (an rtx).
3273 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3274 or smul_widen_optab.
3276 We check specially for a constant integer as OP1, comparing the
3277 cost of a widening multiply against the cost of a sequence of shifts
3278 and adds. */
3281 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3282 int unsignedp, optab this_optab)
3284 bool speed = optimize_insn_for_speed_p ();
3285 rtx cop1;
3287 if (CONST_INT_P (op1)
3288 && GET_MODE (op0) != VOIDmode
3289 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3290 this_optab == umul_widen_optab))
3291 && CONST_INT_P (cop1)
3292 && (INTVAL (cop1) >= 0
3293 || HWI_COMPUTABLE_MODE_P (mode)))
3295 HOST_WIDE_INT coeff = INTVAL (cop1);
3296 int max_cost;
3297 enum mult_variant variant;
3298 struct algorithm algorithm;
3300 if (coeff == 0)
3301 return CONST0_RTX (mode);
3303 /* Special case powers of two. */
3304 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3306 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3307 return expand_shift (LSHIFT_EXPR, mode, op0,
3308 floor_log2 (coeff), target, unsignedp);
3311 /* Exclude cost of op0 from max_cost to match the cost
3312 calculation of the synth_mult. */
3313 max_cost = mul_widen_cost (speed, mode);
3314 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3315 max_cost))
3317 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3318 return expand_mult_const (mode, op0, coeff, target,
3319 &algorithm, variant);
3322 return expand_binop (mode, this_optab, op0, op1, target,
3323 unsignedp, OPTAB_LIB_WIDEN);
3326 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3327 replace division by D, and put the least significant N bits of the result
3328 in *MULTIPLIER_PTR and return the most significant bit.
3330 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3331 needed precision is in PRECISION (should be <= N).
3333 PRECISION should be as small as possible so this function can choose
3334 multiplier more freely.
3336 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3337 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3339 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3340 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3342 unsigned HOST_WIDE_INT
3343 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3344 unsigned HOST_WIDE_INT *multiplier_ptr,
3345 int *post_shift_ptr, int *lgup_ptr)
3347 int lgup, post_shift;
3348 int pow, pow2;
3350 /* lgup = ceil(log2(divisor)); */
3351 lgup = ceil_log2 (d);
3353 gcc_assert (lgup <= n);
3355 pow = n + lgup;
3356 pow2 = n + lgup - precision;
3358 /* mlow = 2^(N + lgup)/d */
3359 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3360 wide_int mlow = wi::udiv_trunc (val, d);
3362 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3363 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3364 wide_int mhigh = wi::udiv_trunc (val, d);
3366 /* If precision == N, then mlow, mhigh exceed 2^N
3367 (but they do not exceed 2^(N+1)). */
3369 /* Reduce to lowest terms. */
3370 for (post_shift = lgup; post_shift > 0; post_shift--)
3372 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3373 HOST_BITS_PER_WIDE_INT);
3374 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3375 HOST_BITS_PER_WIDE_INT);
3376 if (ml_lo >= mh_lo)
3377 break;
3379 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3380 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3383 *post_shift_ptr = post_shift;
3384 *lgup_ptr = lgup;
3385 if (n < HOST_BITS_PER_WIDE_INT)
3387 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3388 *multiplier_ptr = mhigh.to_uhwi () & mask;
3389 return mhigh.to_uhwi () >= mask;
3391 else
3393 *multiplier_ptr = mhigh.to_uhwi ();
3394 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3398 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3399 congruent to 1 (mod 2**N). */
3401 static unsigned HOST_WIDE_INT
3402 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3404 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3406 /* The algorithm notes that the choice y = x satisfies
3407 x*y == 1 mod 2^3, since x is assumed odd.
3408 Each iteration doubles the number of bits of significance in y. */
3410 unsigned HOST_WIDE_INT mask;
3411 unsigned HOST_WIDE_INT y = x;
3412 int nbit = 3;
3414 mask = (n == HOST_BITS_PER_WIDE_INT
3415 ? ~(unsigned HOST_WIDE_INT) 0
3416 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3418 while (nbit < n)
3420 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3421 nbit *= 2;
3423 return y;
3426 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3427 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3428 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3429 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3430 become signed.
3432 The result is put in TARGET if that is convenient.
3434 MODE is the mode of operation. */
3437 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3438 rtx op1, rtx target, int unsignedp)
3440 rtx tem;
3441 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3443 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3444 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3445 tem = expand_and (mode, tem, op1, NULL_RTX);
3446 adj_operand
3447 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3448 adj_operand);
3450 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3451 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3452 tem = expand_and (mode, tem, op0, NULL_RTX);
3453 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3454 target);
3456 return target;
3459 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3461 static rtx
3462 extract_high_half (machine_mode mode, rtx op)
3464 machine_mode wider_mode;
3466 if (mode == word_mode)
3467 return gen_highpart (mode, op);
3469 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3471 wider_mode = GET_MODE_WIDER_MODE (mode);
3472 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3473 GET_MODE_BITSIZE (mode), 0, 1);
3474 return convert_modes (mode, wider_mode, op, 0);
3477 /* Like expmed_mult_highpart, but only consider using a multiplication
3478 optab. OP1 is an rtx for the constant operand. */
3480 static rtx
3481 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3482 rtx target, int unsignedp, int max_cost)
3484 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3485 machine_mode wider_mode;
3486 optab moptab;
3487 rtx tem;
3488 int size;
3489 bool speed = optimize_insn_for_speed_p ();
3491 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3493 wider_mode = GET_MODE_WIDER_MODE (mode);
3494 size = GET_MODE_BITSIZE (mode);
3496 /* Firstly, try using a multiplication insn that only generates the needed
3497 high part of the product, and in the sign flavor of unsignedp. */
3498 if (mul_highpart_cost (speed, mode) < max_cost)
3500 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3501 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3502 unsignedp, OPTAB_DIRECT);
3503 if (tem)
3504 return tem;
3507 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3508 Need to adjust the result after the multiplication. */
3509 if (size - 1 < BITS_PER_WORD
3510 && (mul_highpart_cost (speed, mode)
3511 + 2 * shift_cost (speed, mode, size-1)
3512 + 4 * add_cost (speed, mode) < max_cost))
3514 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3515 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3516 unsignedp, OPTAB_DIRECT);
3517 if (tem)
3518 /* We used the wrong signedness. Adjust the result. */
3519 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3520 tem, unsignedp);
3523 /* Try widening multiplication. */
3524 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3525 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3526 && mul_widen_cost (speed, wider_mode) < max_cost)
3528 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3529 unsignedp, OPTAB_WIDEN);
3530 if (tem)
3531 return extract_high_half (mode, tem);
3534 /* Try widening the mode and perform a non-widening multiplication. */
3535 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3536 && size - 1 < BITS_PER_WORD
3537 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3538 < max_cost))
3540 rtx_insn *insns;
3541 rtx wop0, wop1;
3543 /* We need to widen the operands, for example to ensure the
3544 constant multiplier is correctly sign or zero extended.
3545 Use a sequence to clean-up any instructions emitted by
3546 the conversions if things don't work out. */
3547 start_sequence ();
3548 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3549 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3550 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3551 unsignedp, OPTAB_WIDEN);
3552 insns = get_insns ();
3553 end_sequence ();
3555 if (tem)
3557 emit_insn (insns);
3558 return extract_high_half (mode, tem);
3562 /* Try widening multiplication of opposite signedness, and adjust. */
3563 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3564 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3565 && size - 1 < BITS_PER_WORD
3566 && (mul_widen_cost (speed, wider_mode)
3567 + 2 * shift_cost (speed, mode, size-1)
3568 + 4 * add_cost (speed, mode) < max_cost))
3570 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3571 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3572 if (tem != 0)
3574 tem = extract_high_half (mode, tem);
3575 /* We used the wrong signedness. Adjust the result. */
3576 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3577 target, unsignedp);
3581 return 0;
3584 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3585 putting the high half of the result in TARGET if that is convenient,
3586 and return where the result is. If the operation can not be performed,
3587 0 is returned.
3589 MODE is the mode of operation and result.
3591 UNSIGNEDP nonzero means unsigned multiply.
3593 MAX_COST is the total allowed cost for the expanded RTL. */
3595 static rtx
3596 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3597 rtx target, int unsignedp, int max_cost)
3599 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3600 unsigned HOST_WIDE_INT cnst1;
3601 int extra_cost;
3602 bool sign_adjust = false;
3603 enum mult_variant variant;
3604 struct algorithm alg;
3605 rtx tem;
3606 bool speed = optimize_insn_for_speed_p ();
3608 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3609 /* We can't support modes wider than HOST_BITS_PER_INT. */
3610 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3612 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3614 /* We can't optimize modes wider than BITS_PER_WORD.
3615 ??? We might be able to perform double-word arithmetic if
3616 mode == word_mode, however all the cost calculations in
3617 synth_mult etc. assume single-word operations. */
3618 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3619 return expmed_mult_highpart_optab (mode, op0, op1, target,
3620 unsignedp, max_cost);
3622 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3624 /* Check whether we try to multiply by a negative constant. */
3625 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3627 sign_adjust = true;
3628 extra_cost += add_cost (speed, mode);
3631 /* See whether shift/add multiplication is cheap enough. */
3632 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3633 max_cost - extra_cost))
3635 /* See whether the specialized multiplication optabs are
3636 cheaper than the shift/add version. */
3637 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3638 alg.cost.cost + extra_cost);
3639 if (tem)
3640 return tem;
3642 tem = convert_to_mode (wider_mode, op0, unsignedp);
3643 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3644 tem = extract_high_half (mode, tem);
3646 /* Adjust result for signedness. */
3647 if (sign_adjust)
3648 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3650 return tem;
3652 return expmed_mult_highpart_optab (mode, op0, op1, target,
3653 unsignedp, max_cost);
3657 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3659 static rtx
3660 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3662 rtx result, temp, shift;
3663 rtx_code_label *label;
3664 int logd;
3665 int prec = GET_MODE_PRECISION (mode);
3667 logd = floor_log2 (d);
3668 result = gen_reg_rtx (mode);
3670 /* Avoid conditional branches when they're expensive. */
3671 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3672 && optimize_insn_for_speed_p ())
3674 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3675 mode, 0, -1);
3676 if (signmask)
3678 HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3679 signmask = force_reg (mode, signmask);
3680 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3682 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3683 which instruction sequence to use. If logical right shifts
3684 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3685 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3687 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3688 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3689 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3690 > COSTS_N_INSNS (2)))
3692 temp = expand_binop (mode, xor_optab, op0, signmask,
3693 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3694 temp = expand_binop (mode, sub_optab, temp, signmask,
3695 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3696 temp = expand_binop (mode, and_optab, temp,
3697 gen_int_mode (masklow, mode),
3698 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3699 temp = expand_binop (mode, xor_optab, temp, signmask,
3700 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3701 temp = expand_binop (mode, sub_optab, temp, signmask,
3702 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3704 else
3706 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3707 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3708 signmask = force_reg (mode, signmask);
3710 temp = expand_binop (mode, add_optab, op0, signmask,
3711 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3712 temp = expand_binop (mode, and_optab, temp,
3713 gen_int_mode (masklow, mode),
3714 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3715 temp = expand_binop (mode, sub_optab, temp, signmask,
3716 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3718 return temp;
3722 /* Mask contains the mode's signbit and the significant bits of the
3723 modulus. By including the signbit in the operation, many targets
3724 can avoid an explicit compare operation in the following comparison
3725 against zero. */
3726 wide_int mask = wi::mask (logd, false, prec);
3727 mask = wi::set_bit (mask, prec - 1);
3729 temp = expand_binop (mode, and_optab, op0,
3730 immed_wide_int_const (mask, mode),
3731 result, 1, OPTAB_LIB_WIDEN);
3732 if (temp != result)
3733 emit_move_insn (result, temp);
3735 label = gen_label_rtx ();
3736 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3738 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3739 0, OPTAB_LIB_WIDEN);
3741 mask = wi::mask (logd, true, prec);
3742 temp = expand_binop (mode, ior_optab, temp,
3743 immed_wide_int_const (mask, mode),
3744 result, 1, OPTAB_LIB_WIDEN);
3745 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3746 0, OPTAB_LIB_WIDEN);
3747 if (temp != result)
3748 emit_move_insn (result, temp);
3749 emit_label (label);
3750 return result;
3753 /* Expand signed division of OP0 by a power of two D in mode MODE.
3754 This routine is only called for positive values of D. */
3756 static rtx
3757 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3759 rtx temp;
3760 rtx_code_label *label;
3761 int logd;
3763 logd = floor_log2 (d);
3765 if (d == 2
3766 && BRANCH_COST (optimize_insn_for_speed_p (),
3767 false) >= 1)
3769 temp = gen_reg_rtx (mode);
3770 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3771 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3772 0, OPTAB_LIB_WIDEN);
3773 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3776 if (HAVE_conditional_move
3777 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3779 rtx temp2;
3781 start_sequence ();
3782 temp2 = copy_to_mode_reg (mode, op0);
3783 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3784 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3785 temp = force_reg (mode, temp);
3787 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3788 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3789 mode, temp, temp2, mode, 0);
3790 if (temp2)
3792 rtx_insn *seq = get_insns ();
3793 end_sequence ();
3794 emit_insn (seq);
3795 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3797 end_sequence ();
3800 if (BRANCH_COST (optimize_insn_for_speed_p (),
3801 false) >= 2)
3803 int ushift = GET_MODE_BITSIZE (mode) - logd;
3805 temp = gen_reg_rtx (mode);
3806 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3807 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3808 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3809 > COSTS_N_INSNS (1))
3810 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3811 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3812 else
3813 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3814 ushift, NULL_RTX, 1);
3815 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3816 0, OPTAB_LIB_WIDEN);
3817 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3820 label = gen_label_rtx ();
3821 temp = copy_to_mode_reg (mode, op0);
3822 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3823 expand_inc (temp, gen_int_mode (d - 1, mode));
3824 emit_label (label);
3825 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3828 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3829 if that is convenient, and returning where the result is.
3830 You may request either the quotient or the remainder as the result;
3831 specify REM_FLAG nonzero to get the remainder.
3833 CODE is the expression code for which kind of division this is;
3834 it controls how rounding is done. MODE is the machine mode to use.
3835 UNSIGNEDP nonzero means do unsigned division. */
3837 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3838 and then correct it by or'ing in missing high bits
3839 if result of ANDI is nonzero.
3840 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3841 This could optimize to a bfexts instruction.
3842 But C doesn't use these operations, so their optimizations are
3843 left for later. */
3844 /* ??? For modulo, we don't actually need the highpart of the first product,
3845 the low part will do nicely. And for small divisors, the second multiply
3846 can also be a low-part only multiply or even be completely left out.
3847 E.g. to calculate the remainder of a division by 3 with a 32 bit
3848 multiply, multiply with 0x55555556 and extract the upper two bits;
3849 the result is exact for inputs up to 0x1fffffff.
3850 The input range can be reduced by using cross-sum rules.
3851 For odd divisors >= 3, the following table gives right shift counts
3852 so that if a number is shifted by an integer multiple of the given
3853 amount, the remainder stays the same:
3854 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3855 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3856 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3857 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3858 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3860 Cross-sum rules for even numbers can be derived by leaving as many bits
3861 to the right alone as the divisor has zeros to the right.
3862 E.g. if x is an unsigned 32 bit number:
3863 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3867 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3868 rtx op0, rtx op1, rtx target, int unsignedp)
3870 machine_mode compute_mode;
3871 rtx tquotient;
3872 rtx quotient = 0, remainder = 0;
3873 rtx_insn *last;
3874 int size;
3875 rtx_insn *insn;
3876 optab optab1, optab2;
3877 int op1_is_constant, op1_is_pow2 = 0;
3878 int max_cost, extra_cost;
3879 static HOST_WIDE_INT last_div_const = 0;
3880 bool speed = optimize_insn_for_speed_p ();
3882 op1_is_constant = CONST_INT_P (op1);
3883 if (op1_is_constant)
3885 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3886 if (unsignedp)
3887 ext_op1 &= GET_MODE_MASK (mode);
3888 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3889 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3893 This is the structure of expand_divmod:
3895 First comes code to fix up the operands so we can perform the operations
3896 correctly and efficiently.
3898 Second comes a switch statement with code specific for each rounding mode.
3899 For some special operands this code emits all RTL for the desired
3900 operation, for other cases, it generates only a quotient and stores it in
3901 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3902 to indicate that it has not done anything.
3904 Last comes code that finishes the operation. If QUOTIENT is set and
3905 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3906 QUOTIENT is not set, it is computed using trunc rounding.
3908 We try to generate special code for division and remainder when OP1 is a
3909 constant. If |OP1| = 2**n we can use shifts and some other fast
3910 operations. For other values of OP1, we compute a carefully selected
3911 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3912 by m.
3914 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3915 half of the product. Different strategies for generating the product are
3916 implemented in expmed_mult_highpart.
3918 If what we actually want is the remainder, we generate that by another
3919 by-constant multiplication and a subtraction. */
3921 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3922 code below will malfunction if we are, so check here and handle
3923 the special case if so. */
3924 if (op1 == const1_rtx)
3925 return rem_flag ? const0_rtx : op0;
3927 /* When dividing by -1, we could get an overflow.
3928 negv_optab can handle overflows. */
3929 if (! unsignedp && op1 == constm1_rtx)
3931 if (rem_flag)
3932 return const0_rtx;
3933 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3934 ? negv_optab : neg_optab, op0, target, 0);
3937 if (target
3938 /* Don't use the function value register as a target
3939 since we have to read it as well as write it,
3940 and function-inlining gets confused by this. */
3941 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3942 /* Don't clobber an operand while doing a multi-step calculation. */
3943 || ((rem_flag || op1_is_constant)
3944 && (reg_mentioned_p (target, op0)
3945 || (MEM_P (op0) && MEM_P (target))))
3946 || reg_mentioned_p (target, op1)
3947 || (MEM_P (op1) && MEM_P (target))))
3948 target = 0;
3950 /* Get the mode in which to perform this computation. Normally it will
3951 be MODE, but sometimes we can't do the desired operation in MODE.
3952 If so, pick a wider mode in which we can do the operation. Convert
3953 to that mode at the start to avoid repeated conversions.
3955 First see what operations we need. These depend on the expression
3956 we are evaluating. (We assume that divxx3 insns exist under the
3957 same conditions that modxx3 insns and that these insns don't normally
3958 fail. If these assumptions are not correct, we may generate less
3959 efficient code in some cases.)
3961 Then see if we find a mode in which we can open-code that operation
3962 (either a division, modulus, or shift). Finally, check for the smallest
3963 mode for which we can do the operation with a library call. */
3965 /* We might want to refine this now that we have division-by-constant
3966 optimization. Since expmed_mult_highpart tries so many variants, it is
3967 not straightforward to generalize this. Maybe we should make an array
3968 of possible modes in init_expmed? Save this for GCC 2.7. */
3970 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3971 ? (unsignedp ? lshr_optab : ashr_optab)
3972 : (unsignedp ? udiv_optab : sdiv_optab));
3973 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3974 ? optab1
3975 : (unsignedp ? udivmod_optab : sdivmod_optab));
3977 for (compute_mode = mode; compute_mode != VOIDmode;
3978 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3979 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3980 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3981 break;
3983 if (compute_mode == VOIDmode)
3984 for (compute_mode = mode; compute_mode != VOIDmode;
3985 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3986 if (optab_libfunc (optab1, compute_mode)
3987 || optab_libfunc (optab2, compute_mode))
3988 break;
3990 /* If we still couldn't find a mode, use MODE, but expand_binop will
3991 probably die. */
3992 if (compute_mode == VOIDmode)
3993 compute_mode = mode;
3995 if (target && GET_MODE (target) == compute_mode)
3996 tquotient = target;
3997 else
3998 tquotient = gen_reg_rtx (compute_mode);
4000 size = GET_MODE_BITSIZE (compute_mode);
4001 #if 0
4002 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4003 (mode), and thereby get better code when OP1 is a constant. Do that
4004 later. It will require going over all usages of SIZE below. */
4005 size = GET_MODE_BITSIZE (mode);
4006 #endif
4008 /* Only deduct something for a REM if the last divide done was
4009 for a different constant. Then set the constant of the last
4010 divide. */
4011 max_cost = (unsignedp
4012 ? udiv_cost (speed, compute_mode)
4013 : sdiv_cost (speed, compute_mode));
4014 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4015 && INTVAL (op1) == last_div_const))
4016 max_cost -= (mul_cost (speed, compute_mode)
4017 + add_cost (speed, compute_mode));
4019 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4021 /* Now convert to the best mode to use. */
4022 if (compute_mode != mode)
4024 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4025 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4027 /* convert_modes may have placed op1 into a register, so we
4028 must recompute the following. */
4029 op1_is_constant = CONST_INT_P (op1);
4030 op1_is_pow2 = (op1_is_constant
4031 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4032 || (! unsignedp
4033 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4036 /* If one of the operands is a volatile MEM, copy it into a register. */
4038 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4039 op0 = force_reg (compute_mode, op0);
4040 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4041 op1 = force_reg (compute_mode, op1);
4043 /* If we need the remainder or if OP1 is constant, we need to
4044 put OP0 in a register in case it has any queued subexpressions. */
4045 if (rem_flag || op1_is_constant)
4046 op0 = force_reg (compute_mode, op0);
4048 last = get_last_insn ();
4050 /* Promote floor rounding to trunc rounding for unsigned operations. */
4051 if (unsignedp)
4053 if (code == FLOOR_DIV_EXPR)
4054 code = TRUNC_DIV_EXPR;
4055 if (code == FLOOR_MOD_EXPR)
4056 code = TRUNC_MOD_EXPR;
4057 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4058 code = TRUNC_DIV_EXPR;
4061 if (op1 != const0_rtx)
4062 switch (code)
4064 case TRUNC_MOD_EXPR:
4065 case TRUNC_DIV_EXPR:
4066 if (op1_is_constant)
4068 if (unsignedp)
4070 unsigned HOST_WIDE_INT mh, ml;
4071 int pre_shift, post_shift;
4072 int dummy;
4073 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4074 & GET_MODE_MASK (compute_mode));
4076 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4078 pre_shift = floor_log2 (d);
4079 if (rem_flag)
4081 unsigned HOST_WIDE_INT mask
4082 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4083 remainder
4084 = expand_binop (compute_mode, and_optab, op0,
4085 gen_int_mode (mask, compute_mode),
4086 remainder, 1,
4087 OPTAB_LIB_WIDEN);
4088 if (remainder)
4089 return gen_lowpart (mode, remainder);
4091 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4092 pre_shift, tquotient, 1);
4094 else if (size <= HOST_BITS_PER_WIDE_INT)
4096 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4098 /* Most significant bit of divisor is set; emit an scc
4099 insn. */
4100 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4101 compute_mode, 1, 1);
4103 else
4105 /* Find a suitable multiplier and right shift count
4106 instead of multiplying with D. */
4108 mh = choose_multiplier (d, size, size,
4109 &ml, &post_shift, &dummy);
4111 /* If the suggested multiplier is more than SIZE bits,
4112 we can do better for even divisors, using an
4113 initial right shift. */
4114 if (mh != 0 && (d & 1) == 0)
4116 pre_shift = floor_log2 (d & -d);
4117 mh = choose_multiplier (d >> pre_shift, size,
4118 size - pre_shift,
4119 &ml, &post_shift, &dummy);
4120 gcc_assert (!mh);
4122 else
4123 pre_shift = 0;
4125 if (mh != 0)
4127 rtx t1, t2, t3, t4;
4129 if (post_shift - 1 >= BITS_PER_WORD)
4130 goto fail1;
4132 extra_cost
4133 = (shift_cost (speed, compute_mode, post_shift - 1)
4134 + shift_cost (speed, compute_mode, 1)
4135 + 2 * add_cost (speed, compute_mode));
4136 t1 = expmed_mult_highpart
4137 (compute_mode, op0,
4138 gen_int_mode (ml, compute_mode),
4139 NULL_RTX, 1, max_cost - extra_cost);
4140 if (t1 == 0)
4141 goto fail1;
4142 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4143 op0, t1),
4144 NULL_RTX);
4145 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4146 t2, 1, NULL_RTX, 1);
4147 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4148 t1, t3),
4149 NULL_RTX);
4150 quotient = expand_shift
4151 (RSHIFT_EXPR, compute_mode, t4,
4152 post_shift - 1, tquotient, 1);
4154 else
4156 rtx t1, t2;
4158 if (pre_shift >= BITS_PER_WORD
4159 || post_shift >= BITS_PER_WORD)
4160 goto fail1;
4162 t1 = expand_shift
4163 (RSHIFT_EXPR, compute_mode, op0,
4164 pre_shift, NULL_RTX, 1);
4165 extra_cost
4166 = (shift_cost (speed, compute_mode, pre_shift)
4167 + shift_cost (speed, compute_mode, post_shift));
4168 t2 = expmed_mult_highpart
4169 (compute_mode, t1,
4170 gen_int_mode (ml, compute_mode),
4171 NULL_RTX, 1, max_cost - extra_cost);
4172 if (t2 == 0)
4173 goto fail1;
4174 quotient = expand_shift
4175 (RSHIFT_EXPR, compute_mode, t2,
4176 post_shift, tquotient, 1);
4180 else /* Too wide mode to use tricky code */
4181 break;
4183 insn = get_last_insn ();
4184 if (insn != last)
4185 set_dst_reg_note (insn, REG_EQUAL,
4186 gen_rtx_UDIV (compute_mode, op0, op1),
4187 quotient);
4189 else /* TRUNC_DIV, signed */
4191 unsigned HOST_WIDE_INT ml;
4192 int lgup, post_shift;
4193 rtx mlr;
4194 HOST_WIDE_INT d = INTVAL (op1);
4195 unsigned HOST_WIDE_INT abs_d;
4197 /* Since d might be INT_MIN, we have to cast to
4198 unsigned HOST_WIDE_INT before negating to avoid
4199 undefined signed overflow. */
4200 abs_d = (d >= 0
4201 ? (unsigned HOST_WIDE_INT) d
4202 : - (unsigned HOST_WIDE_INT) d);
4204 /* n rem d = n rem -d */
4205 if (rem_flag && d < 0)
4207 d = abs_d;
4208 op1 = gen_int_mode (abs_d, compute_mode);
4211 if (d == 1)
4212 quotient = op0;
4213 else if (d == -1)
4214 quotient = expand_unop (compute_mode, neg_optab, op0,
4215 tquotient, 0);
4216 else if (HOST_BITS_PER_WIDE_INT >= size
4217 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4219 /* This case is not handled correctly below. */
4220 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4221 compute_mode, 1, 1);
4222 if (quotient == 0)
4223 goto fail1;
4225 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4226 && (rem_flag
4227 ? smod_pow2_cheap (speed, compute_mode)
4228 : sdiv_pow2_cheap (speed, compute_mode))
4229 /* We assume that cheap metric is true if the
4230 optab has an expander for this mode. */
4231 && ((optab_handler ((rem_flag ? smod_optab
4232 : sdiv_optab),
4233 compute_mode)
4234 != CODE_FOR_nothing)
4235 || (optab_handler (sdivmod_optab,
4236 compute_mode)
4237 != CODE_FOR_nothing)))
4239 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4241 if (rem_flag)
4243 remainder = expand_smod_pow2 (compute_mode, op0, d);
4244 if (remainder)
4245 return gen_lowpart (mode, remainder);
4248 if (sdiv_pow2_cheap (speed, compute_mode)
4249 && ((optab_handler (sdiv_optab, compute_mode)
4250 != CODE_FOR_nothing)
4251 || (optab_handler (sdivmod_optab, compute_mode)
4252 != CODE_FOR_nothing)))
4253 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4254 compute_mode, op0,
4255 gen_int_mode (abs_d,
4256 compute_mode),
4257 NULL_RTX, 0);
4258 else
4259 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4261 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4262 negate the quotient. */
4263 if (d < 0)
4265 insn = get_last_insn ();
4266 if (insn != last
4267 && abs_d < ((unsigned HOST_WIDE_INT) 1
4268 << (HOST_BITS_PER_WIDE_INT - 1)))
4269 set_dst_reg_note (insn, REG_EQUAL,
4270 gen_rtx_DIV (compute_mode, op0,
4271 gen_int_mode
4272 (abs_d,
4273 compute_mode)),
4274 quotient);
4276 quotient = expand_unop (compute_mode, neg_optab,
4277 quotient, quotient, 0);
4280 else if (size <= HOST_BITS_PER_WIDE_INT)
4282 choose_multiplier (abs_d, size, size - 1,
4283 &ml, &post_shift, &lgup);
4284 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4286 rtx t1, t2, t3;
4288 if (post_shift >= BITS_PER_WORD
4289 || size - 1 >= BITS_PER_WORD)
4290 goto fail1;
4292 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4293 + shift_cost (speed, compute_mode, size - 1)
4294 + add_cost (speed, compute_mode));
4295 t1 = expmed_mult_highpart
4296 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4297 NULL_RTX, 0, max_cost - extra_cost);
4298 if (t1 == 0)
4299 goto fail1;
4300 t2 = expand_shift
4301 (RSHIFT_EXPR, compute_mode, t1,
4302 post_shift, NULL_RTX, 0);
4303 t3 = expand_shift
4304 (RSHIFT_EXPR, compute_mode, op0,
4305 size - 1, NULL_RTX, 0);
4306 if (d < 0)
4307 quotient
4308 = force_operand (gen_rtx_MINUS (compute_mode,
4309 t3, t2),
4310 tquotient);
4311 else
4312 quotient
4313 = force_operand (gen_rtx_MINUS (compute_mode,
4314 t2, t3),
4315 tquotient);
4317 else
4319 rtx t1, t2, t3, t4;
4321 if (post_shift >= BITS_PER_WORD
4322 || size - 1 >= BITS_PER_WORD)
4323 goto fail1;
4325 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4326 mlr = gen_int_mode (ml, compute_mode);
4327 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4328 + shift_cost (speed, compute_mode, size - 1)
4329 + 2 * add_cost (speed, compute_mode));
4330 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4331 NULL_RTX, 0,
4332 max_cost - extra_cost);
4333 if (t1 == 0)
4334 goto fail1;
4335 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4336 t1, op0),
4337 NULL_RTX);
4338 t3 = expand_shift
4339 (RSHIFT_EXPR, compute_mode, t2,
4340 post_shift, NULL_RTX, 0);
4341 t4 = expand_shift
4342 (RSHIFT_EXPR, compute_mode, op0,
4343 size - 1, NULL_RTX, 0);
4344 if (d < 0)
4345 quotient
4346 = force_operand (gen_rtx_MINUS (compute_mode,
4347 t4, t3),
4348 tquotient);
4349 else
4350 quotient
4351 = force_operand (gen_rtx_MINUS (compute_mode,
4352 t3, t4),
4353 tquotient);
4356 else /* Too wide mode to use tricky code */
4357 break;
4359 insn = get_last_insn ();
4360 if (insn != last)
4361 set_dst_reg_note (insn, REG_EQUAL,
4362 gen_rtx_DIV (compute_mode, op0, op1),
4363 quotient);
4365 break;
4367 fail1:
4368 delete_insns_since (last);
4369 break;
4371 case FLOOR_DIV_EXPR:
4372 case FLOOR_MOD_EXPR:
4373 /* We will come here only for signed operations. */
4374 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4376 unsigned HOST_WIDE_INT mh, ml;
4377 int pre_shift, lgup, post_shift;
4378 HOST_WIDE_INT d = INTVAL (op1);
4380 if (d > 0)
4382 /* We could just as easily deal with negative constants here,
4383 but it does not seem worth the trouble for GCC 2.6. */
4384 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4386 pre_shift = floor_log2 (d);
4387 if (rem_flag)
4389 unsigned HOST_WIDE_INT mask
4390 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4391 remainder = expand_binop
4392 (compute_mode, and_optab, op0,
4393 gen_int_mode (mask, compute_mode),
4394 remainder, 0, OPTAB_LIB_WIDEN);
4395 if (remainder)
4396 return gen_lowpart (mode, remainder);
4398 quotient = expand_shift
4399 (RSHIFT_EXPR, compute_mode, op0,
4400 pre_shift, tquotient, 0);
4402 else
4404 rtx t1, t2, t3, t4;
4406 mh = choose_multiplier (d, size, size - 1,
4407 &ml, &post_shift, &lgup);
4408 gcc_assert (!mh);
4410 if (post_shift < BITS_PER_WORD
4411 && size - 1 < BITS_PER_WORD)
4413 t1 = expand_shift
4414 (RSHIFT_EXPR, compute_mode, op0,
4415 size - 1, NULL_RTX, 0);
4416 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4417 NULL_RTX, 0, OPTAB_WIDEN);
4418 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4419 + shift_cost (speed, compute_mode, size - 1)
4420 + 2 * add_cost (speed, compute_mode));
4421 t3 = expmed_mult_highpart
4422 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4423 NULL_RTX, 1, max_cost - extra_cost);
4424 if (t3 != 0)
4426 t4 = expand_shift
4427 (RSHIFT_EXPR, compute_mode, t3,
4428 post_shift, NULL_RTX, 1);
4429 quotient = expand_binop (compute_mode, xor_optab,
4430 t4, t1, tquotient, 0,
4431 OPTAB_WIDEN);
4436 else
4438 rtx nsign, t1, t2, t3, t4;
4439 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4440 op0, constm1_rtx), NULL_RTX);
4441 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4442 0, OPTAB_WIDEN);
4443 nsign = expand_shift
4444 (RSHIFT_EXPR, compute_mode, t2,
4445 size - 1, NULL_RTX, 0);
4446 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4447 NULL_RTX);
4448 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4449 NULL_RTX, 0);
4450 if (t4)
4452 rtx t5;
4453 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4454 NULL_RTX, 0);
4455 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4456 t4, t5),
4457 tquotient);
4462 if (quotient != 0)
4463 break;
4464 delete_insns_since (last);
4466 /* Try using an instruction that produces both the quotient and
4467 remainder, using truncation. We can easily compensate the quotient
4468 or remainder to get floor rounding, once we have the remainder.
4469 Notice that we compute also the final remainder value here,
4470 and return the result right away. */
4471 if (target == 0 || GET_MODE (target) != compute_mode)
4472 target = gen_reg_rtx (compute_mode);
4474 if (rem_flag)
4476 remainder
4477 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4478 quotient = gen_reg_rtx (compute_mode);
4480 else
4482 quotient
4483 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4484 remainder = gen_reg_rtx (compute_mode);
4487 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4488 quotient, remainder, 0))
4490 /* This could be computed with a branch-less sequence.
4491 Save that for later. */
4492 rtx tem;
4493 rtx_code_label *label = gen_label_rtx ();
4494 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4495 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4496 NULL_RTX, 0, OPTAB_WIDEN);
4497 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4498 expand_dec (quotient, const1_rtx);
4499 expand_inc (remainder, op1);
4500 emit_label (label);
4501 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4504 /* No luck with division elimination or divmod. Have to do it
4505 by conditionally adjusting op0 *and* the result. */
4507 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4508 rtx adjusted_op0;
4509 rtx tem;
4511 quotient = gen_reg_rtx (compute_mode);
4512 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4513 label1 = gen_label_rtx ();
4514 label2 = gen_label_rtx ();
4515 label3 = gen_label_rtx ();
4516 label4 = gen_label_rtx ();
4517 label5 = gen_label_rtx ();
4518 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4519 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4520 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4521 quotient, 0, OPTAB_LIB_WIDEN);
4522 if (tem != quotient)
4523 emit_move_insn (quotient, tem);
4524 emit_jump_insn (targetm.gen_jump (label5));
4525 emit_barrier ();
4526 emit_label (label1);
4527 expand_inc (adjusted_op0, const1_rtx);
4528 emit_jump_insn (targetm.gen_jump (label4));
4529 emit_barrier ();
4530 emit_label (label2);
4531 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4532 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4533 quotient, 0, OPTAB_LIB_WIDEN);
4534 if (tem != quotient)
4535 emit_move_insn (quotient, tem);
4536 emit_jump_insn (targetm.gen_jump (label5));
4537 emit_barrier ();
4538 emit_label (label3);
4539 expand_dec (adjusted_op0, const1_rtx);
4540 emit_label (label4);
4541 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4542 quotient, 0, OPTAB_LIB_WIDEN);
4543 if (tem != quotient)
4544 emit_move_insn (quotient, tem);
4545 expand_dec (quotient, const1_rtx);
4546 emit_label (label5);
4548 break;
4550 case CEIL_DIV_EXPR:
4551 case CEIL_MOD_EXPR:
4552 if (unsignedp)
4554 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4556 rtx t1, t2, t3;
4557 unsigned HOST_WIDE_INT d = INTVAL (op1);
4558 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4559 floor_log2 (d), tquotient, 1);
4560 t2 = expand_binop (compute_mode, and_optab, op0,
4561 gen_int_mode (d - 1, compute_mode),
4562 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4563 t3 = gen_reg_rtx (compute_mode);
4564 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4565 compute_mode, 1, 1);
4566 if (t3 == 0)
4568 rtx_code_label *lab;
4569 lab = gen_label_rtx ();
4570 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4571 expand_inc (t1, const1_rtx);
4572 emit_label (lab);
4573 quotient = t1;
4575 else
4576 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4577 t1, t3),
4578 tquotient);
4579 break;
4582 /* Try using an instruction that produces both the quotient and
4583 remainder, using truncation. We can easily compensate the
4584 quotient or remainder to get ceiling rounding, once we have the
4585 remainder. Notice that we compute also the final remainder
4586 value here, and return the result right away. */
4587 if (target == 0 || GET_MODE (target) != compute_mode)
4588 target = gen_reg_rtx (compute_mode);
4590 if (rem_flag)
4592 remainder = (REG_P (target)
4593 ? target : gen_reg_rtx (compute_mode));
4594 quotient = gen_reg_rtx (compute_mode);
4596 else
4598 quotient = (REG_P (target)
4599 ? target : gen_reg_rtx (compute_mode));
4600 remainder = gen_reg_rtx (compute_mode);
4603 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4604 remainder, 1))
4606 /* This could be computed with a branch-less sequence.
4607 Save that for later. */
4608 rtx_code_label *label = gen_label_rtx ();
4609 do_cmp_and_jump (remainder, const0_rtx, EQ,
4610 compute_mode, label);
4611 expand_inc (quotient, const1_rtx);
4612 expand_dec (remainder, op1);
4613 emit_label (label);
4614 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4617 /* No luck with division elimination or divmod. Have to do it
4618 by conditionally adjusting op0 *and* the result. */
4620 rtx_code_label *label1, *label2;
4621 rtx adjusted_op0, tem;
4623 quotient = gen_reg_rtx (compute_mode);
4624 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4625 label1 = gen_label_rtx ();
4626 label2 = gen_label_rtx ();
4627 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4628 compute_mode, label1);
4629 emit_move_insn (quotient, const0_rtx);
4630 emit_jump_insn (targetm.gen_jump (label2));
4631 emit_barrier ();
4632 emit_label (label1);
4633 expand_dec (adjusted_op0, const1_rtx);
4634 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4635 quotient, 1, OPTAB_LIB_WIDEN);
4636 if (tem != quotient)
4637 emit_move_insn (quotient, tem);
4638 expand_inc (quotient, const1_rtx);
4639 emit_label (label2);
4642 else /* signed */
4644 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4645 && INTVAL (op1) >= 0)
4647 /* This is extremely similar to the code for the unsigned case
4648 above. For 2.7 we should merge these variants, but for
4649 2.6.1 I don't want to touch the code for unsigned since that
4650 get used in C. The signed case will only be used by other
4651 languages (Ada). */
4653 rtx t1, t2, t3;
4654 unsigned HOST_WIDE_INT d = INTVAL (op1);
4655 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4656 floor_log2 (d), tquotient, 0);
4657 t2 = expand_binop (compute_mode, and_optab, op0,
4658 gen_int_mode (d - 1, compute_mode),
4659 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4660 t3 = gen_reg_rtx (compute_mode);
4661 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4662 compute_mode, 1, 1);
4663 if (t3 == 0)
4665 rtx_code_label *lab;
4666 lab = gen_label_rtx ();
4667 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4668 expand_inc (t1, const1_rtx);
4669 emit_label (lab);
4670 quotient = t1;
4672 else
4673 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4674 t1, t3),
4675 tquotient);
4676 break;
4679 /* Try using an instruction that produces both the quotient and
4680 remainder, using truncation. We can easily compensate the
4681 quotient or remainder to get ceiling rounding, once we have the
4682 remainder. Notice that we compute also the final remainder
4683 value here, and return the result right away. */
4684 if (target == 0 || GET_MODE (target) != compute_mode)
4685 target = gen_reg_rtx (compute_mode);
4686 if (rem_flag)
4688 remainder= (REG_P (target)
4689 ? target : gen_reg_rtx (compute_mode));
4690 quotient = gen_reg_rtx (compute_mode);
4692 else
4694 quotient = (REG_P (target)
4695 ? target : gen_reg_rtx (compute_mode));
4696 remainder = gen_reg_rtx (compute_mode);
4699 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4700 remainder, 0))
4702 /* This could be computed with a branch-less sequence.
4703 Save that for later. */
4704 rtx tem;
4705 rtx_code_label *label = gen_label_rtx ();
4706 do_cmp_and_jump (remainder, const0_rtx, EQ,
4707 compute_mode, label);
4708 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4709 NULL_RTX, 0, OPTAB_WIDEN);
4710 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4711 expand_inc (quotient, const1_rtx);
4712 expand_dec (remainder, op1);
4713 emit_label (label);
4714 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4717 /* No luck with division elimination or divmod. Have to do it
4718 by conditionally adjusting op0 *and* the result. */
4720 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4721 rtx adjusted_op0;
4722 rtx tem;
4724 quotient = gen_reg_rtx (compute_mode);
4725 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4726 label1 = gen_label_rtx ();
4727 label2 = gen_label_rtx ();
4728 label3 = gen_label_rtx ();
4729 label4 = gen_label_rtx ();
4730 label5 = gen_label_rtx ();
4731 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4732 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4733 compute_mode, label1);
4734 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4735 quotient, 0, OPTAB_LIB_WIDEN);
4736 if (tem != quotient)
4737 emit_move_insn (quotient, tem);
4738 emit_jump_insn (targetm.gen_jump (label5));
4739 emit_barrier ();
4740 emit_label (label1);
4741 expand_dec (adjusted_op0, const1_rtx);
4742 emit_jump_insn (targetm.gen_jump (label4));
4743 emit_barrier ();
4744 emit_label (label2);
4745 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4746 compute_mode, label3);
4747 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4748 quotient, 0, OPTAB_LIB_WIDEN);
4749 if (tem != quotient)
4750 emit_move_insn (quotient, tem);
4751 emit_jump_insn (targetm.gen_jump (label5));
4752 emit_barrier ();
4753 emit_label (label3);
4754 expand_inc (adjusted_op0, const1_rtx);
4755 emit_label (label4);
4756 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4757 quotient, 0, OPTAB_LIB_WIDEN);
4758 if (tem != quotient)
4759 emit_move_insn (quotient, tem);
4760 expand_inc (quotient, const1_rtx);
4761 emit_label (label5);
4764 break;
4766 case EXACT_DIV_EXPR:
4767 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4769 HOST_WIDE_INT d = INTVAL (op1);
4770 unsigned HOST_WIDE_INT ml;
4771 int pre_shift;
4772 rtx t1;
4774 pre_shift = floor_log2 (d & -d);
4775 ml = invert_mod2n (d >> pre_shift, size);
4776 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4777 pre_shift, NULL_RTX, unsignedp);
4778 quotient = expand_mult (compute_mode, t1,
4779 gen_int_mode (ml, compute_mode),
4780 NULL_RTX, 1);
4782 insn = get_last_insn ();
4783 set_dst_reg_note (insn, REG_EQUAL,
4784 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4785 compute_mode, op0, op1),
4786 quotient);
4788 break;
4790 case ROUND_DIV_EXPR:
4791 case ROUND_MOD_EXPR:
4792 if (unsignedp)
4794 rtx tem;
4795 rtx_code_label *label;
4796 label = gen_label_rtx ();
4797 quotient = gen_reg_rtx (compute_mode);
4798 remainder = gen_reg_rtx (compute_mode);
4799 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4801 rtx tem;
4802 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4803 quotient, 1, OPTAB_LIB_WIDEN);
4804 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4805 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4806 remainder, 1, OPTAB_LIB_WIDEN);
4808 tem = plus_constant (compute_mode, op1, -1);
4809 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4810 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4811 expand_inc (quotient, const1_rtx);
4812 expand_dec (remainder, op1);
4813 emit_label (label);
4815 else
4817 rtx abs_rem, abs_op1, tem, mask;
4818 rtx_code_label *label;
4819 label = gen_label_rtx ();
4820 quotient = gen_reg_rtx (compute_mode);
4821 remainder = gen_reg_rtx (compute_mode);
4822 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4824 rtx tem;
4825 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4826 quotient, 0, OPTAB_LIB_WIDEN);
4827 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4828 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4829 remainder, 0, OPTAB_LIB_WIDEN);
4831 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4832 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4833 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4834 1, NULL_RTX, 1);
4835 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4836 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4837 NULL_RTX, 0, OPTAB_WIDEN);
4838 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4839 size - 1, NULL_RTX, 0);
4840 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4841 NULL_RTX, 0, OPTAB_WIDEN);
4842 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4843 NULL_RTX, 0, OPTAB_WIDEN);
4844 expand_inc (quotient, tem);
4845 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4846 NULL_RTX, 0, OPTAB_WIDEN);
4847 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4848 NULL_RTX, 0, OPTAB_WIDEN);
4849 expand_dec (remainder, tem);
4850 emit_label (label);
4852 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4854 default:
4855 gcc_unreachable ();
4858 if (quotient == 0)
4860 if (target && GET_MODE (target) != compute_mode)
4861 target = 0;
4863 if (rem_flag)
4865 /* Try to produce the remainder without producing the quotient.
4866 If we seem to have a divmod pattern that does not require widening,
4867 don't try widening here. We should really have a WIDEN argument
4868 to expand_twoval_binop, since what we'd really like to do here is
4869 1) try a mod insn in compute_mode
4870 2) try a divmod insn in compute_mode
4871 3) try a div insn in compute_mode and multiply-subtract to get
4872 remainder
4873 4) try the same things with widening allowed. */
4874 remainder
4875 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4876 op0, op1, target,
4877 unsignedp,
4878 ((optab_handler (optab2, compute_mode)
4879 != CODE_FOR_nothing)
4880 ? OPTAB_DIRECT : OPTAB_WIDEN));
4881 if (remainder == 0)
4883 /* No luck there. Can we do remainder and divide at once
4884 without a library call? */
4885 remainder = gen_reg_rtx (compute_mode);
4886 if (! expand_twoval_binop ((unsignedp
4887 ? udivmod_optab
4888 : sdivmod_optab),
4889 op0, op1,
4890 NULL_RTX, remainder, unsignedp))
4891 remainder = 0;
4894 if (remainder)
4895 return gen_lowpart (mode, remainder);
4898 /* Produce the quotient. Try a quotient insn, but not a library call.
4899 If we have a divmod in this mode, use it in preference to widening
4900 the div (for this test we assume it will not fail). Note that optab2
4901 is set to the one of the two optabs that the call below will use. */
4902 quotient
4903 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4904 op0, op1, rem_flag ? NULL_RTX : target,
4905 unsignedp,
4906 ((optab_handler (optab2, compute_mode)
4907 != CODE_FOR_nothing)
4908 ? OPTAB_DIRECT : OPTAB_WIDEN));
4910 if (quotient == 0)
4912 /* No luck there. Try a quotient-and-remainder insn,
4913 keeping the quotient alone. */
4914 quotient = gen_reg_rtx (compute_mode);
4915 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4916 op0, op1,
4917 quotient, NULL_RTX, unsignedp))
4919 quotient = 0;
4920 if (! rem_flag)
4921 /* Still no luck. If we are not computing the remainder,
4922 use a library call for the quotient. */
4923 quotient = sign_expand_binop (compute_mode,
4924 udiv_optab, sdiv_optab,
4925 op0, op1, target,
4926 unsignedp, OPTAB_LIB_WIDEN);
4931 if (rem_flag)
4933 if (target && GET_MODE (target) != compute_mode)
4934 target = 0;
4936 if (quotient == 0)
4938 /* No divide instruction either. Use library for remainder. */
4939 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4940 op0, op1, target,
4941 unsignedp, OPTAB_LIB_WIDEN);
4942 /* No remainder function. Try a quotient-and-remainder
4943 function, keeping the remainder. */
4944 if (!remainder)
4946 remainder = gen_reg_rtx (compute_mode);
4947 if (!expand_twoval_binop_libfunc
4948 (unsignedp ? udivmod_optab : sdivmod_optab,
4949 op0, op1,
4950 NULL_RTX, remainder,
4951 unsignedp ? UMOD : MOD))
4952 remainder = NULL_RTX;
4955 else
4957 /* We divided. Now finish doing X - Y * (X / Y). */
4958 remainder = expand_mult (compute_mode, quotient, op1,
4959 NULL_RTX, unsignedp);
4960 remainder = expand_binop (compute_mode, sub_optab, op0,
4961 remainder, target, unsignedp,
4962 OPTAB_LIB_WIDEN);
4966 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4969 /* Return a tree node with data type TYPE, describing the value of X.
4970 Usually this is an VAR_DECL, if there is no obvious better choice.
4971 X may be an expression, however we only support those expressions
4972 generated by loop.c. */
4974 tree
4975 make_tree (tree type, rtx x)
4977 tree t;
4979 switch (GET_CODE (x))
4981 case CONST_INT:
4982 case CONST_WIDE_INT:
4983 t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4984 return t;
4986 case CONST_DOUBLE:
4987 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
4988 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
4989 t = wide_int_to_tree (type,
4990 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
4991 HOST_BITS_PER_WIDE_INT * 2));
4992 else
4993 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
4995 return t;
4997 case CONST_VECTOR:
4999 int units = CONST_VECTOR_NUNITS (x);
5000 tree itype = TREE_TYPE (type);
5001 tree *elts;
5002 int i;
5004 /* Build a tree with vector elements. */
5005 elts = XALLOCAVEC (tree, units);
5006 for (i = units - 1; i >= 0; --i)
5008 rtx elt = CONST_VECTOR_ELT (x, i);
5009 elts[i] = make_tree (itype, elt);
5012 return build_vector (type, elts);
5015 case PLUS:
5016 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5017 make_tree (type, XEXP (x, 1)));
5019 case MINUS:
5020 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5021 make_tree (type, XEXP (x, 1)));
5023 case NEG:
5024 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5026 case MULT:
5027 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5028 make_tree (type, XEXP (x, 1)));
5030 case ASHIFT:
5031 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5032 make_tree (type, XEXP (x, 1)));
5034 case LSHIFTRT:
5035 t = unsigned_type_for (type);
5036 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5037 make_tree (t, XEXP (x, 0)),
5038 make_tree (type, XEXP (x, 1))));
5040 case ASHIFTRT:
5041 t = signed_type_for (type);
5042 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5043 make_tree (t, XEXP (x, 0)),
5044 make_tree (type, XEXP (x, 1))));
5046 case DIV:
5047 if (TREE_CODE (type) != REAL_TYPE)
5048 t = signed_type_for (type);
5049 else
5050 t = type;
5052 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5053 make_tree (t, XEXP (x, 0)),
5054 make_tree (t, XEXP (x, 1))));
5055 case UDIV:
5056 t = unsigned_type_for (type);
5057 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5058 make_tree (t, XEXP (x, 0)),
5059 make_tree (t, XEXP (x, 1))));
5061 case SIGN_EXTEND:
5062 case ZERO_EXTEND:
5063 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5064 GET_CODE (x) == ZERO_EXTEND);
5065 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5067 case CONST:
5068 return make_tree (type, XEXP (x, 0));
5070 case SYMBOL_REF:
5071 t = SYMBOL_REF_DECL (x);
5072 if (t)
5073 return fold_convert (type, build_fold_addr_expr (t));
5074 /* else fall through. */
5076 default:
5077 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5079 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5080 address mode to pointer mode. */
5081 if (POINTER_TYPE_P (type))
5082 x = convert_memory_address_addr_space
5083 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5085 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5086 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5087 t->decl_with_rtl.rtl = x;
5089 return t;
5093 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5094 and returning TARGET.
5096 If TARGET is 0, a pseudo-register or constant is returned. */
5099 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5101 rtx tem = 0;
5103 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5104 tem = simplify_binary_operation (AND, mode, op0, op1);
5105 if (tem == 0)
5106 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5108 if (target == 0)
5109 target = tem;
5110 else if (tem != target)
5111 emit_move_insn (target, tem);
5112 return target;
5115 /* Helper function for emit_store_flag. */
5117 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5118 machine_mode mode, machine_mode compare_mode,
5119 int unsignedp, rtx x, rtx y, int normalizep,
5120 machine_mode target_mode)
5122 struct expand_operand ops[4];
5123 rtx op0, comparison, subtarget;
5124 rtx_insn *last;
5125 machine_mode result_mode = targetm.cstore_mode (icode);
5127 last = get_last_insn ();
5128 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5129 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5130 if (!x || !y)
5132 delete_insns_since (last);
5133 return NULL_RTX;
5136 if (target_mode == VOIDmode)
5137 target_mode = result_mode;
5138 if (!target)
5139 target = gen_reg_rtx (target_mode);
5141 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5143 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5144 create_fixed_operand (&ops[1], comparison);
5145 create_fixed_operand (&ops[2], x);
5146 create_fixed_operand (&ops[3], y);
5147 if (!maybe_expand_insn (icode, 4, ops))
5149 delete_insns_since (last);
5150 return NULL_RTX;
5152 subtarget = ops[0].value;
5154 /* If we are converting to a wider mode, first convert to
5155 TARGET_MODE, then normalize. This produces better combining
5156 opportunities on machines that have a SIGN_EXTRACT when we are
5157 testing a single bit. This mostly benefits the 68k.
5159 If STORE_FLAG_VALUE does not have the sign bit set when
5160 interpreted in MODE, we can do this conversion as unsigned, which
5161 is usually more efficient. */
5162 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5164 convert_move (target, subtarget,
5165 val_signbit_known_clear_p (result_mode,
5166 STORE_FLAG_VALUE));
5167 op0 = target;
5168 result_mode = target_mode;
5170 else
5171 op0 = subtarget;
5173 /* If we want to keep subexpressions around, don't reuse our last
5174 target. */
5175 if (optimize)
5176 subtarget = 0;
5178 /* Now normalize to the proper value in MODE. Sometimes we don't
5179 have to do anything. */
5180 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5182 /* STORE_FLAG_VALUE might be the most negative number, so write
5183 the comparison this way to avoid a compiler-time warning. */
5184 else if (- normalizep == STORE_FLAG_VALUE)
5185 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5187 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5188 it hard to use a value of just the sign bit due to ANSI integer
5189 constant typing rules. */
5190 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5191 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5192 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5193 normalizep == 1);
5194 else
5196 gcc_assert (STORE_FLAG_VALUE & 1);
5198 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5199 if (normalizep == -1)
5200 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5203 /* If we were converting to a smaller mode, do the conversion now. */
5204 if (target_mode != result_mode)
5206 convert_move (target, op0, 0);
5207 return target;
5209 else
5210 return op0;
5214 /* A subroutine of emit_store_flag only including "tricks" that do not
5215 need a recursive call. These are kept separate to avoid infinite
5216 loops. */
5218 static rtx
5219 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5220 machine_mode mode, int unsignedp, int normalizep,
5221 machine_mode target_mode)
5223 rtx subtarget;
5224 enum insn_code icode;
5225 machine_mode compare_mode;
5226 enum mode_class mclass;
5227 enum rtx_code scode;
5229 if (unsignedp)
5230 code = unsigned_condition (code);
5231 scode = swap_condition (code);
5233 /* If one operand is constant, make it the second one. Only do this
5234 if the other operand is not constant as well. */
5236 if (swap_commutative_operands_p (op0, op1))
5238 std::swap (op0, op1);
5239 code = swap_condition (code);
5242 if (mode == VOIDmode)
5243 mode = GET_MODE (op0);
5245 /* For some comparisons with 1 and -1, we can convert this to
5246 comparisons with zero. This will often produce more opportunities for
5247 store-flag insns. */
5249 switch (code)
5251 case LT:
5252 if (op1 == const1_rtx)
5253 op1 = const0_rtx, code = LE;
5254 break;
5255 case LE:
5256 if (op1 == constm1_rtx)
5257 op1 = const0_rtx, code = LT;
5258 break;
5259 case GE:
5260 if (op1 == const1_rtx)
5261 op1 = const0_rtx, code = GT;
5262 break;
5263 case GT:
5264 if (op1 == constm1_rtx)
5265 op1 = const0_rtx, code = GE;
5266 break;
5267 case GEU:
5268 if (op1 == const1_rtx)
5269 op1 = const0_rtx, code = NE;
5270 break;
5271 case LTU:
5272 if (op1 == const1_rtx)
5273 op1 = const0_rtx, code = EQ;
5274 break;
5275 default:
5276 break;
5279 /* If we are comparing a double-word integer with zero or -1, we can
5280 convert the comparison into one involving a single word. */
5281 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5282 && GET_MODE_CLASS (mode) == MODE_INT
5283 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5285 rtx tem;
5286 if ((code == EQ || code == NE)
5287 && (op1 == const0_rtx || op1 == constm1_rtx))
5289 rtx op00, op01;
5291 /* Do a logical OR or AND of the two words and compare the
5292 result. */
5293 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5294 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5295 tem = expand_binop (word_mode,
5296 op1 == const0_rtx ? ior_optab : and_optab,
5297 op00, op01, NULL_RTX, unsignedp,
5298 OPTAB_DIRECT);
5300 if (tem != 0)
5301 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5302 unsignedp, normalizep);
5304 else if ((code == LT || code == GE) && op1 == const0_rtx)
5306 rtx op0h;
5308 /* If testing the sign bit, can just test on high word. */
5309 op0h = simplify_gen_subreg (word_mode, op0, mode,
5310 subreg_highpart_offset (word_mode,
5311 mode));
5312 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5313 unsignedp, normalizep);
5315 else
5316 tem = NULL_RTX;
5318 if (tem)
5320 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5321 return tem;
5322 if (!target)
5323 target = gen_reg_rtx (target_mode);
5325 convert_move (target, tem,
5326 !val_signbit_known_set_p (word_mode,
5327 (normalizep ? normalizep
5328 : STORE_FLAG_VALUE)));
5329 return target;
5333 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5334 complement of A (for GE) and shifting the sign bit to the low bit. */
5335 if (op1 == const0_rtx && (code == LT || code == GE)
5336 && GET_MODE_CLASS (mode) == MODE_INT
5337 && (normalizep || STORE_FLAG_VALUE == 1
5338 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5340 subtarget = target;
5342 if (!target)
5343 target_mode = mode;
5345 /* If the result is to be wider than OP0, it is best to convert it
5346 first. If it is to be narrower, it is *incorrect* to convert it
5347 first. */
5348 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5350 op0 = convert_modes (target_mode, mode, op0, 0);
5351 mode = target_mode;
5354 if (target_mode != mode)
5355 subtarget = 0;
5357 if (code == GE)
5358 op0 = expand_unop (mode, one_cmpl_optab, op0,
5359 ((STORE_FLAG_VALUE == 1 || normalizep)
5360 ? 0 : subtarget), 0);
5362 if (STORE_FLAG_VALUE == 1 || normalizep)
5363 /* If we are supposed to produce a 0/1 value, we want to do
5364 a logical shift from the sign bit to the low-order bit; for
5365 a -1/0 value, we do an arithmetic shift. */
5366 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5367 GET_MODE_BITSIZE (mode) - 1,
5368 subtarget, normalizep != -1);
5370 if (mode != target_mode)
5371 op0 = convert_modes (target_mode, mode, op0, 0);
5373 return op0;
5376 mclass = GET_MODE_CLASS (mode);
5377 for (compare_mode = mode; compare_mode != VOIDmode;
5378 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5380 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5381 icode = optab_handler (cstore_optab, optab_mode);
5382 if (icode != CODE_FOR_nothing)
5384 do_pending_stack_adjust ();
5385 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5386 unsignedp, op0, op1, normalizep, target_mode);
5387 if (tem)
5388 return tem;
5390 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5392 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5393 unsignedp, op1, op0, normalizep, target_mode);
5394 if (tem)
5395 return tem;
5397 break;
5401 return 0;
5404 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5405 and storing in TARGET. Normally return TARGET.
5406 Return 0 if that cannot be done.
5408 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5409 it is VOIDmode, they cannot both be CONST_INT.
5411 UNSIGNEDP is for the case where we have to widen the operands
5412 to perform the operation. It says to use zero-extension.
5414 NORMALIZEP is 1 if we should convert the result to be either zero
5415 or one. Normalize is -1 if we should convert the result to be
5416 either zero or -1. If NORMALIZEP is zero, the result will be left
5417 "raw" out of the scc insn. */
5420 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5421 machine_mode mode, int unsignedp, int normalizep)
5423 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5424 enum rtx_code rcode;
5425 rtx subtarget;
5426 rtx tem, trueval;
5427 rtx_insn *last;
5429 /* If we compare constants, we shouldn't use a store-flag operation,
5430 but a constant load. We can get there via the vanilla route that
5431 usually generates a compare-branch sequence, but will in this case
5432 fold the comparison to a constant, and thus elide the branch. */
5433 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5434 return NULL_RTX;
5436 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5437 target_mode);
5438 if (tem)
5439 return tem;
5441 /* If we reached here, we can't do this with a scc insn, however there
5442 are some comparisons that can be done in other ways. Don't do any
5443 of these cases if branches are very cheap. */
5444 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5445 return 0;
5447 /* See what we need to return. We can only return a 1, -1, or the
5448 sign bit. */
5450 if (normalizep == 0)
5452 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5453 normalizep = STORE_FLAG_VALUE;
5455 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5457 else
5458 return 0;
5461 last = get_last_insn ();
5463 /* If optimizing, use different pseudo registers for each insn, instead
5464 of reusing the same pseudo. This leads to better CSE, but slows
5465 down the compiler, since there are more pseudos */
5466 subtarget = (!optimize
5467 && (target_mode == mode)) ? target : NULL_RTX;
5468 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5470 /* For floating-point comparisons, try the reverse comparison or try
5471 changing the "orderedness" of the comparison. */
5472 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5474 enum rtx_code first_code;
5475 bool and_them;
5477 rcode = reverse_condition_maybe_unordered (code);
5478 if (can_compare_p (rcode, mode, ccp_store_flag)
5479 && (code == ORDERED || code == UNORDERED
5480 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5481 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5483 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5484 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5486 /* For the reverse comparison, use either an addition or a XOR. */
5487 if (want_add
5488 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5489 optimize_insn_for_speed_p ()) == 0)
5491 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5492 STORE_FLAG_VALUE, target_mode);
5493 if (tem)
5494 return expand_binop (target_mode, add_optab, tem,
5495 gen_int_mode (normalizep, target_mode),
5496 target, 0, OPTAB_WIDEN);
5498 else if (!want_add
5499 && rtx_cost (trueval, mode, XOR, 1,
5500 optimize_insn_for_speed_p ()) == 0)
5502 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5503 normalizep, target_mode);
5504 if (tem)
5505 return expand_binop (target_mode, xor_optab, tem, trueval,
5506 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5510 delete_insns_since (last);
5512 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5513 if (code == ORDERED || code == UNORDERED)
5514 return 0;
5516 and_them = split_comparison (code, mode, &first_code, &code);
5518 /* If there are no NaNs, the first comparison should always fall through.
5519 Effectively change the comparison to the other one. */
5520 if (!HONOR_NANS (mode))
5522 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5523 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5524 target_mode);
5527 if (!HAVE_conditional_move)
5528 return 0;
5530 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5531 conditional move. */
5532 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5533 normalizep, target_mode);
5534 if (tem == 0)
5535 return 0;
5537 if (and_them)
5538 tem = emit_conditional_move (target, code, op0, op1, mode,
5539 tem, const0_rtx, GET_MODE (tem), 0);
5540 else
5541 tem = emit_conditional_move (target, code, op0, op1, mode,
5542 trueval, tem, GET_MODE (tem), 0);
5544 if (tem == 0)
5545 delete_insns_since (last);
5546 return tem;
5549 /* The remaining tricks only apply to integer comparisons. */
5551 if (GET_MODE_CLASS (mode) != MODE_INT)
5552 return 0;
5554 /* If this is an equality comparison of integers, we can try to exclusive-or
5555 (or subtract) the two operands and use a recursive call to try the
5556 comparison with zero. Don't do any of these cases if branches are
5557 very cheap. */
5559 if ((code == EQ || code == NE) && op1 != const0_rtx)
5561 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5562 OPTAB_WIDEN);
5564 if (tem == 0)
5565 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5566 OPTAB_WIDEN);
5567 if (tem != 0)
5568 tem = emit_store_flag (target, code, tem, const0_rtx,
5569 mode, unsignedp, normalizep);
5570 if (tem != 0)
5571 return tem;
5573 delete_insns_since (last);
5576 /* For integer comparisons, try the reverse comparison. However, for
5577 small X and if we'd have anyway to extend, implementing "X != 0"
5578 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5579 rcode = reverse_condition (code);
5580 if (can_compare_p (rcode, mode, ccp_store_flag)
5581 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5582 && code == NE
5583 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5584 && op1 == const0_rtx))
5586 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5587 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5589 /* Again, for the reverse comparison, use either an addition or a XOR. */
5590 if (want_add
5591 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5592 optimize_insn_for_speed_p ()) == 0)
5594 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5595 STORE_FLAG_VALUE, target_mode);
5596 if (tem != 0)
5597 tem = expand_binop (target_mode, add_optab, tem,
5598 gen_int_mode (normalizep, target_mode),
5599 target, 0, OPTAB_WIDEN);
5601 else if (!want_add
5602 && rtx_cost (trueval, mode, XOR, 1,
5603 optimize_insn_for_speed_p ()) == 0)
5605 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5606 normalizep, target_mode);
5607 if (tem != 0)
5608 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5609 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5612 if (tem != 0)
5613 return tem;
5614 delete_insns_since (last);
5617 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5618 the constant zero. Reject all other comparisons at this point. Only
5619 do LE and GT if branches are expensive since they are expensive on
5620 2-operand machines. */
5622 if (op1 != const0_rtx
5623 || (code != EQ && code != NE
5624 && (BRANCH_COST (optimize_insn_for_speed_p (),
5625 false) <= 1 || (code != LE && code != GT))))
5626 return 0;
5628 /* Try to put the result of the comparison in the sign bit. Assume we can't
5629 do the necessary operation below. */
5631 tem = 0;
5633 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5634 the sign bit set. */
5636 if (code == LE)
5638 /* This is destructive, so SUBTARGET can't be OP0. */
5639 if (rtx_equal_p (subtarget, op0))
5640 subtarget = 0;
5642 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5643 OPTAB_WIDEN);
5644 if (tem)
5645 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5646 OPTAB_WIDEN);
5649 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5650 number of bits in the mode of OP0, minus one. */
5652 if (code == GT)
5654 if (rtx_equal_p (subtarget, op0))
5655 subtarget = 0;
5657 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5658 GET_MODE_BITSIZE (mode) - 1,
5659 subtarget, 0);
5660 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5661 OPTAB_WIDEN);
5664 if (code == EQ || code == NE)
5666 /* For EQ or NE, one way to do the comparison is to apply an operation
5667 that converts the operand into a positive number if it is nonzero
5668 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5669 for NE we negate. This puts the result in the sign bit. Then we
5670 normalize with a shift, if needed.
5672 Two operations that can do the above actions are ABS and FFS, so try
5673 them. If that doesn't work, and MODE is smaller than a full word,
5674 we can use zero-extension to the wider mode (an unsigned conversion)
5675 as the operation. */
5677 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5678 that is compensated by the subsequent overflow when subtracting
5679 one / negating. */
5681 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5682 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5683 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5684 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5685 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5687 tem = convert_modes (word_mode, mode, op0, 1);
5688 mode = word_mode;
5691 if (tem != 0)
5693 if (code == EQ)
5694 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5695 0, OPTAB_WIDEN);
5696 else
5697 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5700 /* If we couldn't do it that way, for NE we can "or" the two's complement
5701 of the value with itself. For EQ, we take the one's complement of
5702 that "or", which is an extra insn, so we only handle EQ if branches
5703 are expensive. */
5705 if (tem == 0
5706 && (code == NE
5707 || BRANCH_COST (optimize_insn_for_speed_p (),
5708 false) > 1))
5710 if (rtx_equal_p (subtarget, op0))
5711 subtarget = 0;
5713 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5714 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5715 OPTAB_WIDEN);
5717 if (tem && code == EQ)
5718 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5722 if (tem && normalizep)
5723 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5724 GET_MODE_BITSIZE (mode) - 1,
5725 subtarget, normalizep == 1);
5727 if (tem)
5729 if (!target)
5731 else if (GET_MODE (tem) != target_mode)
5733 convert_move (target, tem, 0);
5734 tem = target;
5736 else if (!subtarget)
5738 emit_move_insn (target, tem);
5739 tem = target;
5742 else
5743 delete_insns_since (last);
5745 return tem;
5748 /* Like emit_store_flag, but always succeeds. */
5751 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5752 machine_mode mode, int unsignedp, int normalizep)
5754 rtx tem;
5755 rtx_code_label *label;
5756 rtx trueval, falseval;
5758 /* First see if emit_store_flag can do the job. */
5759 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5760 if (tem != 0)
5761 return tem;
5763 if (!target)
5764 target = gen_reg_rtx (word_mode);
5766 /* If this failed, we have to do this with set/compare/jump/set code.
5767 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5768 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5769 if (code == NE
5770 && GET_MODE_CLASS (mode) == MODE_INT
5771 && REG_P (target)
5772 && op0 == target
5773 && op1 == const0_rtx)
5775 label = gen_label_rtx ();
5776 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5777 NULL_RTX, NULL, label, -1);
5778 emit_move_insn (target, trueval);
5779 emit_label (label);
5780 return target;
5783 if (!REG_P (target)
5784 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5785 target = gen_reg_rtx (GET_MODE (target));
5787 /* Jump in the right direction if the target cannot implement CODE
5788 but can jump on its reverse condition. */
5789 falseval = const0_rtx;
5790 if (! can_compare_p (code, mode, ccp_jump)
5791 && (! FLOAT_MODE_P (mode)
5792 || code == ORDERED || code == UNORDERED
5793 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5794 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5796 enum rtx_code rcode;
5797 if (FLOAT_MODE_P (mode))
5798 rcode = reverse_condition_maybe_unordered (code);
5799 else
5800 rcode = reverse_condition (code);
5802 /* Canonicalize to UNORDERED for the libcall. */
5803 if (can_compare_p (rcode, mode, ccp_jump)
5804 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5806 falseval = trueval;
5807 trueval = const0_rtx;
5808 code = rcode;
5812 emit_move_insn (target, trueval);
5813 label = gen_label_rtx ();
5814 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5815 label, -1);
5817 emit_move_insn (target, falseval);
5818 emit_label (label);
5820 return target;
5823 /* Perform possibly multi-word comparison and conditional jump to LABEL
5824 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5825 now a thin wrapper around do_compare_rtx_and_jump. */
5827 static void
5828 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5829 rtx_code_label *label)
5831 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5832 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5833 NULL, label, -1);