rs6000: Merge the var_shift yes/no alternatives
[official-gcc.git] / gcc / expmed.c
blobe76b6fcc724247cc15a9e0cd47b141d95d4cfd45
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2014 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "tm_p.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "expr.h"
34 #include "optabs.h"
35 #include "recog.h"
36 #include "langhooks.h"
37 #include "df.h"
38 #include "target.h"
39 #include "expmed.h"
41 struct target_expmed default_target_expmed;
42 #if SWITCHABLE_TARGET
43 struct target_expmed *this_target_expmed = &default_target_expmed;
44 #endif
46 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 rtx);
51 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT, rtx, int);
62 static rtx extract_fixed_bit_field_1 (enum machine_mode, rtx,
63 unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT, rtx, int);
65 static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
66 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
67 unsigned HOST_WIDE_INT, int);
68 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
69 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
70 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
72 /* Return a constant integer mask value of mode MODE with BITSIZE ones
73 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
74 The mask is truncated if necessary to the width of mode MODE. The
75 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
77 static inline rtx
78 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, bool complement)
80 return immed_wide_int_const
81 (wi::shifted_mask (bitpos, bitsize, complement,
82 GET_MODE_PRECISION (mode)), mode);
85 /* Test whether a value is zero of a power of two. */
86 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
87 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
89 struct init_expmed_rtl
91 struct rtx_def reg;
92 struct rtx_def plus;
93 struct rtx_def neg;
94 struct rtx_def mult;
95 struct rtx_def sdiv;
96 struct rtx_def udiv;
97 struct rtx_def sdiv_32;
98 struct rtx_def smod_32;
99 struct rtx_def wide_mult;
100 struct rtx_def wide_lshr;
101 struct rtx_def wide_trunc;
102 struct rtx_def shift;
103 struct rtx_def shift_mult;
104 struct rtx_def shift_add;
105 struct rtx_def shift_sub0;
106 struct rtx_def shift_sub1;
107 struct rtx_def zext;
108 struct rtx_def trunc;
110 rtx pow2[MAX_BITS_PER_WORD];
111 rtx cint[MAX_BITS_PER_WORD];
114 static void
115 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
116 enum machine_mode from_mode, bool speed)
118 int to_size, from_size;
119 rtx which;
121 /* We're given no information about the true size of a partial integer,
122 only the size of the "full" integer it requires for storage. For
123 comparison purposes here, reduce the bit size by one in that case. */
124 to_size = (GET_MODE_BITSIZE (to_mode)
125 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
126 from_size = (GET_MODE_BITSIZE (from_mode)
127 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
129 /* Assume cost of zero-extend and sign-extend is the same. */
130 which = (to_size < from_size ? &all->trunc : &all->zext);
132 PUT_MODE (&all->reg, from_mode);
133 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
136 static void
137 init_expmed_one_mode (struct init_expmed_rtl *all,
138 enum machine_mode mode, int speed)
140 int m, n, mode_bitsize;
141 enum machine_mode mode_from;
143 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
145 PUT_MODE (&all->reg, mode);
146 PUT_MODE (&all->plus, mode);
147 PUT_MODE (&all->neg, mode);
148 PUT_MODE (&all->mult, mode);
149 PUT_MODE (&all->sdiv, mode);
150 PUT_MODE (&all->udiv, mode);
151 PUT_MODE (&all->sdiv_32, mode);
152 PUT_MODE (&all->smod_32, mode);
153 PUT_MODE (&all->wide_trunc, mode);
154 PUT_MODE (&all->shift, mode);
155 PUT_MODE (&all->shift_mult, mode);
156 PUT_MODE (&all->shift_add, mode);
157 PUT_MODE (&all->shift_sub0, mode);
158 PUT_MODE (&all->shift_sub1, mode);
159 PUT_MODE (&all->zext, mode);
160 PUT_MODE (&all->trunc, mode);
162 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
163 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
164 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
165 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
166 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
168 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
169 <= 2 * add_cost (speed, mode)));
170 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
171 <= 4 * add_cost (speed, mode)));
173 set_shift_cost (speed, mode, 0, 0);
175 int cost = add_cost (speed, mode);
176 set_shiftadd_cost (speed, mode, 0, cost);
177 set_shiftsub0_cost (speed, mode, 0, cost);
178 set_shiftsub1_cost (speed, mode, 0, cost);
181 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
182 for (m = 1; m < n; m++)
184 XEXP (&all->shift, 1) = all->cint[m];
185 XEXP (&all->shift_mult, 1) = all->pow2[m];
187 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
188 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
189 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
190 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
193 if (SCALAR_INT_MODE_P (mode))
195 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
196 mode_from = (enum machine_mode)(mode_from + 1))
197 init_expmed_one_conv (all, mode, mode_from, speed);
199 if (GET_MODE_CLASS (mode) == MODE_INT)
201 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
202 if (wider_mode != VOIDmode)
204 PUT_MODE (&all->zext, wider_mode);
205 PUT_MODE (&all->wide_mult, wider_mode);
206 PUT_MODE (&all->wide_lshr, wider_mode);
207 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
209 set_mul_widen_cost (speed, wider_mode,
210 set_src_cost (&all->wide_mult, speed));
211 set_mul_highpart_cost (speed, mode,
212 set_src_cost (&all->wide_trunc, speed));
217 void
218 init_expmed (void)
220 struct init_expmed_rtl all;
221 enum machine_mode mode;
222 int m, speed;
224 memset (&all, 0, sizeof all);
225 for (m = 1; m < MAX_BITS_PER_WORD; m++)
227 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
228 all.cint[m] = GEN_INT (m);
231 PUT_CODE (&all.reg, REG);
232 /* Avoid using hard regs in ways which may be unsupported. */
233 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
235 PUT_CODE (&all.plus, PLUS);
236 XEXP (&all.plus, 0) = &all.reg;
237 XEXP (&all.plus, 1) = &all.reg;
239 PUT_CODE (&all.neg, NEG);
240 XEXP (&all.neg, 0) = &all.reg;
242 PUT_CODE (&all.mult, MULT);
243 XEXP (&all.mult, 0) = &all.reg;
244 XEXP (&all.mult, 1) = &all.reg;
246 PUT_CODE (&all.sdiv, DIV);
247 XEXP (&all.sdiv, 0) = &all.reg;
248 XEXP (&all.sdiv, 1) = &all.reg;
250 PUT_CODE (&all.udiv, UDIV);
251 XEXP (&all.udiv, 0) = &all.reg;
252 XEXP (&all.udiv, 1) = &all.reg;
254 PUT_CODE (&all.sdiv_32, DIV);
255 XEXP (&all.sdiv_32, 0) = &all.reg;
256 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
258 PUT_CODE (&all.smod_32, MOD);
259 XEXP (&all.smod_32, 0) = &all.reg;
260 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
262 PUT_CODE (&all.zext, ZERO_EXTEND);
263 XEXP (&all.zext, 0) = &all.reg;
265 PUT_CODE (&all.wide_mult, MULT);
266 XEXP (&all.wide_mult, 0) = &all.zext;
267 XEXP (&all.wide_mult, 1) = &all.zext;
269 PUT_CODE (&all.wide_lshr, LSHIFTRT);
270 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
272 PUT_CODE (&all.wide_trunc, TRUNCATE);
273 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
275 PUT_CODE (&all.shift, ASHIFT);
276 XEXP (&all.shift, 0) = &all.reg;
278 PUT_CODE (&all.shift_mult, MULT);
279 XEXP (&all.shift_mult, 0) = &all.reg;
281 PUT_CODE (&all.shift_add, PLUS);
282 XEXP (&all.shift_add, 0) = &all.shift_mult;
283 XEXP (&all.shift_add, 1) = &all.reg;
285 PUT_CODE (&all.shift_sub0, MINUS);
286 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
287 XEXP (&all.shift_sub0, 1) = &all.reg;
289 PUT_CODE (&all.shift_sub1, MINUS);
290 XEXP (&all.shift_sub1, 0) = &all.reg;
291 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
293 PUT_CODE (&all.trunc, TRUNCATE);
294 XEXP (&all.trunc, 0) = &all.reg;
296 for (speed = 0; speed < 2; speed++)
298 crtl->maybe_hot_insn_p = speed;
299 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
301 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
302 mode = (enum machine_mode)(mode + 1))
303 init_expmed_one_mode (&all, mode, speed);
305 if (MIN_MODE_PARTIAL_INT != VOIDmode)
306 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
307 mode = (enum machine_mode)(mode + 1))
308 init_expmed_one_mode (&all, mode, speed);
310 if (MIN_MODE_VECTOR_INT != VOIDmode)
311 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
312 mode = (enum machine_mode)(mode + 1))
313 init_expmed_one_mode (&all, mode, speed);
316 if (alg_hash_used_p ())
318 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
319 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
321 else
322 set_alg_hash_used_p (true);
323 default_rtl_profile ();
326 /* Return an rtx representing minus the value of X.
327 MODE is the intended mode of the result,
328 useful if X is a CONST_INT. */
331 negate_rtx (enum machine_mode mode, rtx x)
333 rtx result = simplify_unary_operation (NEG, mode, x, mode);
335 if (result == 0)
336 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
338 return result;
341 /* Adjust bitfield memory MEM so that it points to the first unit of mode
342 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
343 If MODE is BLKmode, return a reference to every byte in the bitfield.
344 Set *NEW_BITNUM to the bit position of the field within the new memory. */
346 static rtx
347 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
348 unsigned HOST_WIDE_INT bitsize,
349 unsigned HOST_WIDE_INT bitnum,
350 unsigned HOST_WIDE_INT *new_bitnum)
352 if (mode == BLKmode)
354 *new_bitnum = bitnum % BITS_PER_UNIT;
355 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
356 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
357 / BITS_PER_UNIT);
358 return adjust_bitfield_address_size (mem, mode, offset, size);
360 else
362 unsigned int unit = GET_MODE_BITSIZE (mode);
363 *new_bitnum = bitnum % unit;
364 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
365 return adjust_bitfield_address (mem, mode, offset);
369 /* The caller wants to perform insertion or extraction PATTERN on a
370 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
371 BITREGION_START and BITREGION_END are as for store_bit_field
372 and FIELDMODE is the natural mode of the field.
374 Search for a mode that is compatible with the memory access
375 restrictions and (where applicable) with a register insertion or
376 extraction. Return the new memory on success, storing the adjusted
377 bit position in *NEW_BITNUM. Return null otherwise. */
379 static rtx
380 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
381 rtx op0, HOST_WIDE_INT bitsize,
382 HOST_WIDE_INT bitnum,
383 unsigned HOST_WIDE_INT bitregion_start,
384 unsigned HOST_WIDE_INT bitregion_end,
385 enum machine_mode fieldmode,
386 unsigned HOST_WIDE_INT *new_bitnum)
388 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
389 bitregion_end, MEM_ALIGN (op0),
390 MEM_VOLATILE_P (op0));
391 enum machine_mode best_mode;
392 if (iter.next_mode (&best_mode))
394 /* We can use a memory in BEST_MODE. See whether this is true for
395 any wider modes. All other things being equal, we prefer to
396 use the widest mode possible because it tends to expose more
397 CSE opportunities. */
398 if (!iter.prefer_smaller_modes ())
400 /* Limit the search to the mode required by the corresponding
401 register insertion or extraction instruction, if any. */
402 enum machine_mode limit_mode = word_mode;
403 extraction_insn insn;
404 if (get_best_reg_extraction_insn (&insn, pattern,
405 GET_MODE_BITSIZE (best_mode),
406 fieldmode))
407 limit_mode = insn.field_mode;
409 enum machine_mode wider_mode;
410 while (iter.next_mode (&wider_mode)
411 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
412 best_mode = wider_mode;
414 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
415 new_bitnum);
417 return NULL_RTX;
420 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
421 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
422 offset is then BITNUM / BITS_PER_UNIT. */
424 static bool
425 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
426 unsigned HOST_WIDE_INT bitsize,
427 enum machine_mode struct_mode)
429 if (BYTES_BIG_ENDIAN)
430 return (bitnum % BITS_PER_UNIT == 0
431 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
432 || (bitnum + bitsize) % BITS_PER_WORD == 0));
433 else
434 return bitnum % BITS_PER_WORD == 0;
437 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
438 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
439 Return false if the access would touch memory outside the range
440 BITREGION_START to BITREGION_END for conformance to the C++ memory
441 model. */
443 static bool
444 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
445 unsigned HOST_WIDE_INT bitnum,
446 enum machine_mode fieldmode,
447 unsigned HOST_WIDE_INT bitregion_start,
448 unsigned HOST_WIDE_INT bitregion_end)
450 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
452 /* -fstrict-volatile-bitfields must be enabled and we must have a
453 volatile MEM. */
454 if (!MEM_P (op0)
455 || !MEM_VOLATILE_P (op0)
456 || flag_strict_volatile_bitfields <= 0)
457 return false;
459 /* Non-integral modes likely only happen with packed structures.
460 Punt. */
461 if (!SCALAR_INT_MODE_P (fieldmode))
462 return false;
464 /* The bit size must not be larger than the field mode, and
465 the field mode must not be larger than a word. */
466 if (bitsize > modesize || modesize > BITS_PER_WORD)
467 return false;
469 /* Check for cases of unaligned fields that must be split. */
470 if (bitnum % BITS_PER_UNIT + bitsize > modesize
471 || (STRICT_ALIGNMENT
472 && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
473 return false;
475 /* Check for cases where the C++ memory model applies. */
476 if (bitregion_end != 0
477 && (bitnum - bitnum % modesize < bitregion_start
478 || bitnum - bitnum % modesize + modesize > bitregion_end))
479 return false;
481 return true;
484 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
485 bit number BITNUM can be treated as a simple value of mode MODE. */
487 static bool
488 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
489 unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
491 return (MEM_P (op0)
492 && bitnum % BITS_PER_UNIT == 0
493 && bitsize == GET_MODE_BITSIZE (mode)
494 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
495 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
496 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
499 /* Try to use instruction INSV to store VALUE into a field of OP0.
500 BITSIZE and BITNUM are as for store_bit_field. */
502 static bool
503 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
504 unsigned HOST_WIDE_INT bitsize,
505 unsigned HOST_WIDE_INT bitnum,
506 rtx value)
508 struct expand_operand ops[4];
509 rtx value1;
510 rtx xop0 = op0;
511 rtx last = get_last_insn ();
512 bool copy_back = false;
514 enum machine_mode op_mode = insv->field_mode;
515 unsigned int unit = GET_MODE_BITSIZE (op_mode);
516 if (bitsize == 0 || bitsize > unit)
517 return false;
519 if (MEM_P (xop0))
520 /* Get a reference to the first byte of the field. */
521 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
522 &bitnum);
523 else
525 /* Convert from counting within OP0 to counting in OP_MODE. */
526 if (BYTES_BIG_ENDIAN)
527 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
529 /* If xop0 is a register, we need it in OP_MODE
530 to make it acceptable to the format of insv. */
531 if (GET_CODE (xop0) == SUBREG)
532 /* We can't just change the mode, because this might clobber op0,
533 and we will need the original value of op0 if insv fails. */
534 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
535 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
536 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
539 /* If the destination is a paradoxical subreg such that we need a
540 truncate to the inner mode, perform the insertion on a temporary and
541 truncate the result to the original destination. Note that we can't
542 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
543 X) 0)) is (reg:N X). */
544 if (GET_CODE (xop0) == SUBREG
545 && REG_P (SUBREG_REG (xop0))
546 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
547 op_mode))
549 rtx tem = gen_reg_rtx (op_mode);
550 emit_move_insn (tem, xop0);
551 xop0 = tem;
552 copy_back = true;
555 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
556 "backwards" from the size of the unit we are inserting into.
557 Otherwise, we count bits from the most significant on a
558 BYTES/BITS_BIG_ENDIAN machine. */
560 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
561 bitnum = unit - bitsize - bitnum;
563 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
564 value1 = value;
565 if (GET_MODE (value) != op_mode)
567 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
569 /* Optimization: Don't bother really extending VALUE
570 if it has all the bits we will actually use. However,
571 if we must narrow it, be sure we do it correctly. */
573 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
575 rtx tmp;
577 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
578 if (! tmp)
579 tmp = simplify_gen_subreg (op_mode,
580 force_reg (GET_MODE (value),
581 value1),
582 GET_MODE (value), 0);
583 value1 = tmp;
585 else
586 value1 = gen_lowpart (op_mode, value1);
588 else if (CONST_INT_P (value))
589 value1 = gen_int_mode (INTVAL (value), op_mode);
590 else
591 /* Parse phase is supposed to make VALUE's data type
592 match that of the component reference, which is a type
593 at least as wide as the field; so VALUE should have
594 a mode that corresponds to that type. */
595 gcc_assert (CONSTANT_P (value));
598 create_fixed_operand (&ops[0], xop0);
599 create_integer_operand (&ops[1], bitsize);
600 create_integer_operand (&ops[2], bitnum);
601 create_input_operand (&ops[3], value1, op_mode);
602 if (maybe_expand_insn (insv->icode, 4, ops))
604 if (copy_back)
605 convert_move (op0, xop0, true);
606 return true;
608 delete_insns_since (last);
609 return false;
612 /* A subroutine of store_bit_field, with the same arguments. Return true
613 if the operation could be implemented.
615 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
616 no other way of implementing the operation. If FALLBACK_P is false,
617 return false instead. */
619 static bool
620 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
621 unsigned HOST_WIDE_INT bitnum,
622 unsigned HOST_WIDE_INT bitregion_start,
623 unsigned HOST_WIDE_INT bitregion_end,
624 enum machine_mode fieldmode,
625 rtx value, bool fallback_p)
627 rtx op0 = str_rtx;
628 rtx orig_value;
630 while (GET_CODE (op0) == SUBREG)
632 /* The following line once was done only if WORDS_BIG_ENDIAN,
633 but I think that is a mistake. WORDS_BIG_ENDIAN is
634 meaningful at a much higher level; when structures are copied
635 between memory and regs, the higher-numbered regs
636 always get higher addresses. */
637 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
638 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
639 int byte_offset = 0;
641 /* Paradoxical subregs need special handling on big endian machines. */
642 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
644 int difference = inner_mode_size - outer_mode_size;
646 if (WORDS_BIG_ENDIAN)
647 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
648 if (BYTES_BIG_ENDIAN)
649 byte_offset += difference % UNITS_PER_WORD;
651 else
652 byte_offset = SUBREG_BYTE (op0);
654 bitnum += byte_offset * BITS_PER_UNIT;
655 op0 = SUBREG_REG (op0);
658 /* No action is needed if the target is a register and if the field
659 lies completely outside that register. This can occur if the source
660 code contains an out-of-bounds access to a small array. */
661 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
662 return true;
664 /* Use vec_set patterns for inserting parts of vectors whenever
665 available. */
666 if (VECTOR_MODE_P (GET_MODE (op0))
667 && !MEM_P (op0)
668 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
669 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
670 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
671 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
673 struct expand_operand ops[3];
674 enum machine_mode outermode = GET_MODE (op0);
675 enum machine_mode innermode = GET_MODE_INNER (outermode);
676 enum insn_code icode = optab_handler (vec_set_optab, outermode);
677 int pos = bitnum / GET_MODE_BITSIZE (innermode);
679 create_fixed_operand (&ops[0], op0);
680 create_input_operand (&ops[1], value, innermode);
681 create_integer_operand (&ops[2], pos);
682 if (maybe_expand_insn (icode, 3, ops))
683 return true;
686 /* If the target is a register, overwriting the entire object, or storing
687 a full-word or multi-word field can be done with just a SUBREG. */
688 if (!MEM_P (op0)
689 && bitsize == GET_MODE_BITSIZE (fieldmode)
690 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
691 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
693 /* Use the subreg machinery either to narrow OP0 to the required
694 words or to cope with mode punning between equal-sized modes.
695 In the latter case, use subreg on the rhs side, not lhs. */
696 rtx sub;
698 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
700 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
701 if (sub)
703 emit_move_insn (op0, sub);
704 return true;
707 else
709 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
710 bitnum / BITS_PER_UNIT);
711 if (sub)
713 emit_move_insn (sub, value);
714 return true;
719 /* If the target is memory, storing any naturally aligned field can be
720 done with a simple store. For targets that support fast unaligned
721 memory, any naturally sized, unit aligned field can be done directly. */
722 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
724 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
725 emit_move_insn (op0, value);
726 return true;
729 /* Make sure we are playing with integral modes. Pun with subregs
730 if we aren't. This must come after the entire register case above,
731 since that case is valid for any mode. The following cases are only
732 valid for integral modes. */
734 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
735 if (imode != GET_MODE (op0))
737 if (MEM_P (op0))
738 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
739 else
741 gcc_assert (imode != BLKmode);
742 op0 = gen_lowpart (imode, op0);
747 /* Storing an lsb-aligned field in a register
748 can be done with a movstrict instruction. */
750 if (!MEM_P (op0)
751 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
752 && bitsize == GET_MODE_BITSIZE (fieldmode)
753 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
755 struct expand_operand ops[2];
756 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
757 rtx arg0 = op0;
758 unsigned HOST_WIDE_INT subreg_off;
760 if (GET_CODE (arg0) == SUBREG)
762 /* Else we've got some float mode source being extracted into
763 a different float mode destination -- this combination of
764 subregs results in Severe Tire Damage. */
765 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
766 || GET_MODE_CLASS (fieldmode) == MODE_INT
767 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
768 arg0 = SUBREG_REG (arg0);
771 subreg_off = bitnum / BITS_PER_UNIT;
772 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
774 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
776 create_fixed_operand (&ops[0], arg0);
777 /* Shrink the source operand to FIELDMODE. */
778 create_convert_operand_to (&ops[1], value, fieldmode, false);
779 if (maybe_expand_insn (icode, 2, ops))
780 return true;
784 /* Handle fields bigger than a word. */
786 if (bitsize > BITS_PER_WORD)
788 /* Here we transfer the words of the field
789 in the order least significant first.
790 This is because the most significant word is the one which may
791 be less than full.
792 However, only do that if the value is not BLKmode. */
794 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
795 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
796 unsigned int i;
797 rtx last;
799 /* This is the mode we must force value to, so that there will be enough
800 subwords to extract. Note that fieldmode will often (always?) be
801 VOIDmode, because that is what store_field uses to indicate that this
802 is a bit field, but passing VOIDmode to operand_subword_force
803 is not allowed. */
804 fieldmode = GET_MODE (value);
805 if (fieldmode == VOIDmode)
806 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
808 last = get_last_insn ();
809 for (i = 0; i < nwords; i++)
811 /* If I is 0, use the low-order word in both field and target;
812 if I is 1, use the next to lowest word; and so on. */
813 unsigned int wordnum = (backwards
814 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
815 - i - 1
816 : i);
817 unsigned int bit_offset = (backwards
818 ? MAX ((int) bitsize - ((int) i + 1)
819 * BITS_PER_WORD,
821 : (int) i * BITS_PER_WORD);
822 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
823 unsigned HOST_WIDE_INT new_bitsize =
824 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
826 /* If the remaining chunk doesn't have full wordsize we have
827 to make sure that for big endian machines the higher order
828 bits are used. */
829 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
830 value_word = simplify_expand_binop (word_mode, lshr_optab,
831 value_word,
832 GEN_INT (BITS_PER_WORD
833 - new_bitsize),
834 NULL_RTX, true,
835 OPTAB_LIB_WIDEN);
837 if (!store_bit_field_1 (op0, new_bitsize,
838 bitnum + bit_offset,
839 bitregion_start, bitregion_end,
840 word_mode,
841 value_word, fallback_p))
843 delete_insns_since (last);
844 return false;
847 return true;
850 /* If VALUE has a floating-point or complex mode, access it as an
851 integer of the corresponding size. This can occur on a machine
852 with 64 bit registers that uses SFmode for float. It can also
853 occur for unaligned float or complex fields. */
854 orig_value = value;
855 if (GET_MODE (value) != VOIDmode
856 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
857 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
859 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
860 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
863 /* If OP0 is a multi-word register, narrow it to the affected word.
864 If the region spans two words, defer to store_split_bit_field. */
865 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
867 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
868 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
869 gcc_assert (op0);
870 bitnum %= BITS_PER_WORD;
871 if (bitnum + bitsize > BITS_PER_WORD)
873 if (!fallback_p)
874 return false;
876 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
877 bitregion_end, value);
878 return true;
882 /* From here on we can assume that the field to be stored in fits
883 within a word. If the destination is a register, it too fits
884 in a word. */
886 extraction_insn insv;
887 if (!MEM_P (op0)
888 && get_best_reg_extraction_insn (&insv, EP_insv,
889 GET_MODE_BITSIZE (GET_MODE (op0)),
890 fieldmode)
891 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
892 return true;
894 /* If OP0 is a memory, try copying it to a register and seeing if a
895 cheap register alternative is available. */
896 if (MEM_P (op0))
898 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
899 fieldmode)
900 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
901 return true;
903 rtx last = get_last_insn ();
905 /* Try loading part of OP0 into a register, inserting the bitfield
906 into that, and then copying the result back to OP0. */
907 unsigned HOST_WIDE_INT bitpos;
908 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
909 bitregion_start, bitregion_end,
910 fieldmode, &bitpos);
911 if (xop0)
913 rtx tempreg = copy_to_reg (xop0);
914 if (store_bit_field_1 (tempreg, bitsize, bitpos,
915 bitregion_start, bitregion_end,
916 fieldmode, orig_value, false))
918 emit_move_insn (xop0, tempreg);
919 return true;
921 delete_insns_since (last);
925 if (!fallback_p)
926 return false;
928 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
929 bitregion_end, value);
930 return true;
933 /* Generate code to store value from rtx VALUE
934 into a bit-field within structure STR_RTX
935 containing BITSIZE bits starting at bit BITNUM.
937 BITREGION_START is bitpos of the first bitfield in this region.
938 BITREGION_END is the bitpos of the ending bitfield in this region.
939 These two fields are 0, if the C++ memory model does not apply,
940 or we are not interested in keeping track of bitfield regions.
942 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
944 void
945 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
946 unsigned HOST_WIDE_INT bitnum,
947 unsigned HOST_WIDE_INT bitregion_start,
948 unsigned HOST_WIDE_INT bitregion_end,
949 enum machine_mode fieldmode,
950 rtx value)
952 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
953 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
954 bitregion_start, bitregion_end))
956 /* Storing any naturally aligned field can be done with a simple
957 store. For targets that support fast unaligned memory, any
958 naturally sized, unit aligned field can be done directly. */
959 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
961 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
962 bitnum / BITS_PER_UNIT);
963 emit_move_insn (str_rtx, value);
965 else
967 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
968 &bitnum);
969 /* Explicitly override the C/C++ memory model; ignore the
970 bit range so that we can do the access in the mode mandated
971 by -fstrict-volatile-bitfields instead. */
972 store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
975 return;
978 /* Under the C++0x memory model, we must not touch bits outside the
979 bit region. Adjust the address to start at the beginning of the
980 bit region. */
981 if (MEM_P (str_rtx) && bitregion_start > 0)
983 enum machine_mode bestmode;
984 HOST_WIDE_INT offset, size;
986 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
988 offset = bitregion_start / BITS_PER_UNIT;
989 bitnum -= bitregion_start;
990 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
991 bitregion_end -= bitregion_start;
992 bitregion_start = 0;
993 bestmode = get_best_mode (bitsize, bitnum,
994 bitregion_start, bitregion_end,
995 MEM_ALIGN (str_rtx), VOIDmode,
996 MEM_VOLATILE_P (str_rtx));
997 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1000 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1001 bitregion_start, bitregion_end,
1002 fieldmode, value, true))
1003 gcc_unreachable ();
1006 /* Use shifts and boolean operations to store VALUE into a bit field of
1007 width BITSIZE in OP0, starting at bit BITNUM. */
1009 static void
1010 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1011 unsigned HOST_WIDE_INT bitnum,
1012 unsigned HOST_WIDE_INT bitregion_start,
1013 unsigned HOST_WIDE_INT bitregion_end,
1014 rtx value)
1016 /* There is a case not handled here:
1017 a structure with a known alignment of just a halfword
1018 and a field split across two aligned halfwords within the structure.
1019 Or likewise a structure with a known alignment of just a byte
1020 and a field split across two bytes.
1021 Such cases are not supposed to be able to occur. */
1023 if (MEM_P (op0))
1025 enum machine_mode mode = GET_MODE (op0);
1026 if (GET_MODE_BITSIZE (mode) == 0
1027 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1028 mode = word_mode;
1029 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1030 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1032 if (mode == VOIDmode)
1034 /* The only way this should occur is if the field spans word
1035 boundaries. */
1036 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1037 bitregion_end, value);
1038 return;
1041 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1044 store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1047 /* Helper function for store_fixed_bit_field, stores
1048 the bit field always using the MODE of OP0. */
1050 static void
1051 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1052 unsigned HOST_WIDE_INT bitnum,
1053 rtx value)
1055 enum machine_mode mode;
1056 rtx temp;
1057 int all_zero = 0;
1058 int all_one = 0;
1060 mode = GET_MODE (op0);
1061 gcc_assert (SCALAR_INT_MODE_P (mode));
1063 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1064 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1066 if (BYTES_BIG_ENDIAN)
1067 /* BITNUM is the distance between our msb
1068 and that of the containing datum.
1069 Convert it to the distance from the lsb. */
1070 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1072 /* Now BITNUM is always the distance between our lsb
1073 and that of OP0. */
1075 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1076 we must first convert its mode to MODE. */
1078 if (CONST_INT_P (value))
1080 HOST_WIDE_INT v = INTVAL (value);
1082 if (bitsize < HOST_BITS_PER_WIDE_INT)
1083 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1085 if (v == 0)
1086 all_zero = 1;
1087 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1088 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1089 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1090 all_one = 1;
1092 value = lshift_value (mode, v, bitnum);
1094 else
1096 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1097 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1099 if (GET_MODE (value) != mode)
1100 value = convert_to_mode (mode, value, 1);
1102 if (must_and)
1103 value = expand_binop (mode, and_optab, value,
1104 mask_rtx (mode, 0, bitsize, 0),
1105 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1106 if (bitnum > 0)
1107 value = expand_shift (LSHIFT_EXPR, mode, value,
1108 bitnum, NULL_RTX, 1);
1111 /* Now clear the chosen bits in OP0,
1112 except that if VALUE is -1 we need not bother. */
1113 /* We keep the intermediates in registers to allow CSE to combine
1114 consecutive bitfield assignments. */
1116 temp = force_reg (mode, op0);
1118 if (! all_one)
1120 temp = expand_binop (mode, and_optab, temp,
1121 mask_rtx (mode, bitnum, bitsize, 1),
1122 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1123 temp = force_reg (mode, temp);
1126 /* Now logical-or VALUE into OP0, unless it is zero. */
1128 if (! all_zero)
1130 temp = expand_binop (mode, ior_optab, temp, value,
1131 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1132 temp = force_reg (mode, temp);
1135 if (op0 != temp)
1137 op0 = copy_rtx (op0);
1138 emit_move_insn (op0, temp);
1142 /* Store a bit field that is split across multiple accessible memory objects.
1144 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1145 BITSIZE is the field width; BITPOS the position of its first bit
1146 (within the word).
1147 VALUE is the value to store.
1149 This does not yet handle fields wider than BITS_PER_WORD. */
1151 static void
1152 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1153 unsigned HOST_WIDE_INT bitpos,
1154 unsigned HOST_WIDE_INT bitregion_start,
1155 unsigned HOST_WIDE_INT bitregion_end,
1156 rtx value)
1158 unsigned int unit;
1159 unsigned int bitsdone = 0;
1161 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1162 much at a time. */
1163 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1164 unit = BITS_PER_WORD;
1165 else
1166 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1168 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1169 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1170 again, and we will mutually recurse forever. */
1171 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1172 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1174 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1175 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1176 that VALUE might be a floating-point constant. */
1177 if (CONSTANT_P (value) && !CONST_INT_P (value))
1179 rtx word = gen_lowpart_common (word_mode, value);
1181 if (word && (value != word))
1182 value = word;
1183 else
1184 value = gen_lowpart_common (word_mode,
1185 force_reg (GET_MODE (value) != VOIDmode
1186 ? GET_MODE (value)
1187 : word_mode, value));
1190 while (bitsdone < bitsize)
1192 unsigned HOST_WIDE_INT thissize;
1193 rtx part, word;
1194 unsigned HOST_WIDE_INT thispos;
1195 unsigned HOST_WIDE_INT offset;
1197 offset = (bitpos + bitsdone) / unit;
1198 thispos = (bitpos + bitsdone) % unit;
1200 /* When region of bytes we can touch is restricted, decrease
1201 UNIT close to the end of the region as needed. If op0 is a REG
1202 or SUBREG of REG, don't do this, as there can't be data races
1203 on a register and we can expand shorter code in some cases. */
1204 if (bitregion_end
1205 && unit > BITS_PER_UNIT
1206 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1207 && !REG_P (op0)
1208 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1210 unit = unit / 2;
1211 continue;
1214 /* THISSIZE must not overrun a word boundary. Otherwise,
1215 store_fixed_bit_field will call us again, and we will mutually
1216 recurse forever. */
1217 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1218 thissize = MIN (thissize, unit - thispos);
1220 if (BYTES_BIG_ENDIAN)
1222 /* Fetch successively less significant portions. */
1223 if (CONST_INT_P (value))
1224 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1225 >> (bitsize - bitsdone - thissize))
1226 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1227 else
1229 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1230 /* The args are chosen so that the last part includes the
1231 lsb. Give extract_bit_field the value it needs (with
1232 endianness compensation) to fetch the piece we want. */
1233 part = extract_fixed_bit_field (word_mode, value, thissize,
1234 total_bits - bitsize + bitsdone,
1235 NULL_RTX, 1);
1238 else
1240 /* Fetch successively more significant portions. */
1241 if (CONST_INT_P (value))
1242 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1243 >> bitsdone)
1244 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1245 else
1246 part = extract_fixed_bit_field (word_mode, value, thissize,
1247 bitsdone, NULL_RTX, 1);
1250 /* If OP0 is a register, then handle OFFSET here.
1252 When handling multiword bitfields, extract_bit_field may pass
1253 down a word_mode SUBREG of a larger REG for a bitfield that actually
1254 crosses a word boundary. Thus, for a SUBREG, we must find
1255 the current word starting from the base register. */
1256 if (GET_CODE (op0) == SUBREG)
1258 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1259 + (offset * unit / BITS_PER_WORD);
1260 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1261 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1262 word = word_offset ? const0_rtx : op0;
1263 else
1264 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1265 GET_MODE (SUBREG_REG (op0)));
1266 offset &= BITS_PER_WORD / unit - 1;
1268 else if (REG_P (op0))
1270 enum machine_mode op0_mode = GET_MODE (op0);
1271 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1272 word = offset ? const0_rtx : op0;
1273 else
1274 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1275 GET_MODE (op0));
1276 offset &= BITS_PER_WORD / unit - 1;
1278 else
1279 word = op0;
1281 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1282 it is just an out-of-bounds access. Ignore it. */
1283 if (word != const0_rtx)
1284 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1285 bitregion_start, bitregion_end, part);
1286 bitsdone += thissize;
1290 /* A subroutine of extract_bit_field_1 that converts return value X
1291 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1292 to extract_bit_field. */
1294 static rtx
1295 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1296 enum machine_mode tmode, bool unsignedp)
1298 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1299 return x;
1301 /* If the x mode is not a scalar integral, first convert to the
1302 integer mode of that size and then access it as a floating-point
1303 value via a SUBREG. */
1304 if (!SCALAR_INT_MODE_P (tmode))
1306 enum machine_mode smode;
1308 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1309 x = convert_to_mode (smode, x, unsignedp);
1310 x = force_reg (smode, x);
1311 return gen_lowpart (tmode, x);
1314 return convert_to_mode (tmode, x, unsignedp);
1317 /* Try to use an ext(z)v pattern to extract a field from OP0.
1318 Return the extracted value on success, otherwise return null.
1319 EXT_MODE is the mode of the extraction and the other arguments
1320 are as for extract_bit_field. */
1322 static rtx
1323 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1324 unsigned HOST_WIDE_INT bitsize,
1325 unsigned HOST_WIDE_INT bitnum,
1326 int unsignedp, rtx target,
1327 enum machine_mode mode, enum machine_mode tmode)
1329 struct expand_operand ops[4];
1330 rtx spec_target = target;
1331 rtx spec_target_subreg = 0;
1332 enum machine_mode ext_mode = extv->field_mode;
1333 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1335 if (bitsize == 0 || unit < bitsize)
1336 return NULL_RTX;
1338 if (MEM_P (op0))
1339 /* Get a reference to the first byte of the field. */
1340 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1341 &bitnum);
1342 else
1344 /* Convert from counting within OP0 to counting in EXT_MODE. */
1345 if (BYTES_BIG_ENDIAN)
1346 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1348 /* If op0 is a register, we need it in EXT_MODE to make it
1349 acceptable to the format of ext(z)v. */
1350 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1351 return NULL_RTX;
1352 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1353 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1356 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1357 "backwards" from the size of the unit we are extracting from.
1358 Otherwise, we count bits from the most significant on a
1359 BYTES/BITS_BIG_ENDIAN machine. */
1361 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1362 bitnum = unit - bitsize - bitnum;
1364 if (target == 0)
1365 target = spec_target = gen_reg_rtx (tmode);
1367 if (GET_MODE (target) != ext_mode)
1369 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1370 between the mode of the extraction (word_mode) and the target
1371 mode. Instead, create a temporary and use convert_move to set
1372 the target. */
1373 if (REG_P (target)
1374 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1376 target = gen_lowpart (ext_mode, target);
1377 if (GET_MODE_PRECISION (ext_mode)
1378 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1379 spec_target_subreg = target;
1381 else
1382 target = gen_reg_rtx (ext_mode);
1385 create_output_operand (&ops[0], target, ext_mode);
1386 create_fixed_operand (&ops[1], op0);
1387 create_integer_operand (&ops[2], bitsize);
1388 create_integer_operand (&ops[3], bitnum);
1389 if (maybe_expand_insn (extv->icode, 4, ops))
1391 target = ops[0].value;
1392 if (target == spec_target)
1393 return target;
1394 if (target == spec_target_subreg)
1395 return spec_target;
1396 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1398 return NULL_RTX;
1401 /* A subroutine of extract_bit_field, with the same arguments.
1402 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1403 if we can find no other means of implementing the operation.
1404 if FALLBACK_P is false, return NULL instead. */
1406 static rtx
1407 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1408 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1409 enum machine_mode mode, enum machine_mode tmode,
1410 bool fallback_p)
1412 rtx op0 = str_rtx;
1413 enum machine_mode int_mode;
1414 enum machine_mode mode1;
1416 if (tmode == VOIDmode)
1417 tmode = mode;
1419 while (GET_CODE (op0) == SUBREG)
1421 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1422 op0 = SUBREG_REG (op0);
1425 /* If we have an out-of-bounds access to a register, just return an
1426 uninitialized register of the required mode. This can occur if the
1427 source code contains an out-of-bounds access to a small array. */
1428 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1429 return gen_reg_rtx (tmode);
1431 if (REG_P (op0)
1432 && mode == GET_MODE (op0)
1433 && bitnum == 0
1434 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1436 /* We're trying to extract a full register from itself. */
1437 return op0;
1440 /* See if we can get a better vector mode before extracting. */
1441 if (VECTOR_MODE_P (GET_MODE (op0))
1442 && !MEM_P (op0)
1443 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1445 enum machine_mode new_mode;
1447 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1448 new_mode = MIN_MODE_VECTOR_FLOAT;
1449 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1450 new_mode = MIN_MODE_VECTOR_FRACT;
1451 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1452 new_mode = MIN_MODE_VECTOR_UFRACT;
1453 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1454 new_mode = MIN_MODE_VECTOR_ACCUM;
1455 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1456 new_mode = MIN_MODE_VECTOR_UACCUM;
1457 else
1458 new_mode = MIN_MODE_VECTOR_INT;
1460 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1461 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1462 && targetm.vector_mode_supported_p (new_mode))
1463 break;
1464 if (new_mode != VOIDmode)
1465 op0 = gen_lowpart (new_mode, op0);
1468 /* Use vec_extract patterns for extracting parts of vectors whenever
1469 available. */
1470 if (VECTOR_MODE_P (GET_MODE (op0))
1471 && !MEM_P (op0)
1472 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1473 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1474 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1476 struct expand_operand ops[3];
1477 enum machine_mode outermode = GET_MODE (op0);
1478 enum machine_mode innermode = GET_MODE_INNER (outermode);
1479 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1480 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1482 create_output_operand (&ops[0], target, innermode);
1483 create_input_operand (&ops[1], op0, outermode);
1484 create_integer_operand (&ops[2], pos);
1485 if (maybe_expand_insn (icode, 3, ops))
1487 target = ops[0].value;
1488 if (GET_MODE (target) != mode)
1489 return gen_lowpart (tmode, target);
1490 return target;
1494 /* Make sure we are playing with integral modes. Pun with subregs
1495 if we aren't. */
1497 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1498 if (imode != GET_MODE (op0))
1500 if (MEM_P (op0))
1501 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1502 else if (imode != BLKmode)
1504 op0 = gen_lowpart (imode, op0);
1506 /* If we got a SUBREG, force it into a register since we
1507 aren't going to be able to do another SUBREG on it. */
1508 if (GET_CODE (op0) == SUBREG)
1509 op0 = force_reg (imode, op0);
1511 else if (REG_P (op0))
1513 rtx reg, subreg;
1514 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1515 MODE_INT);
1516 reg = gen_reg_rtx (imode);
1517 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1518 emit_move_insn (subreg, op0);
1519 op0 = reg;
1520 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1522 else
1524 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1525 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1526 emit_move_insn (mem, op0);
1527 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1532 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1533 If that's wrong, the solution is to test for it and set TARGET to 0
1534 if needed. */
1536 /* Get the mode of the field to use for atomic access or subreg
1537 conversion. */
1538 mode1 = mode;
1539 if (SCALAR_INT_MODE_P (tmode))
1541 enum machine_mode try_mode = mode_for_size (bitsize,
1542 GET_MODE_CLASS (tmode), 0);
1543 if (try_mode != BLKmode)
1544 mode1 = try_mode;
1546 gcc_assert (mode1 != BLKmode);
1548 /* Extraction of a full MODE1 value can be done with a subreg as long
1549 as the least significant bit of the value is the least significant
1550 bit of either OP0 or a word of OP0. */
1551 if (!MEM_P (op0)
1552 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1553 && bitsize == GET_MODE_BITSIZE (mode1)
1554 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1556 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1557 bitnum / BITS_PER_UNIT);
1558 if (sub)
1559 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1562 /* Extraction of a full MODE1 value can be done with a load as long as
1563 the field is on a byte boundary and is sufficiently aligned. */
1564 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1566 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1567 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1570 /* Handle fields bigger than a word. */
1572 if (bitsize > BITS_PER_WORD)
1574 /* Here we transfer the words of the field
1575 in the order least significant first.
1576 This is because the most significant word is the one which may
1577 be less than full. */
1579 unsigned int backwards = WORDS_BIG_ENDIAN;
1580 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1581 unsigned int i;
1582 rtx last;
1584 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1585 target = gen_reg_rtx (mode);
1587 /* Indicate for flow that the entire target reg is being set. */
1588 emit_clobber (target);
1590 last = get_last_insn ();
1591 for (i = 0; i < nwords; i++)
1593 /* If I is 0, use the low-order word in both field and target;
1594 if I is 1, use the next to lowest word; and so on. */
1595 /* Word number in TARGET to use. */
1596 unsigned int wordnum
1597 = (backwards
1598 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1599 : i);
1600 /* Offset from start of field in OP0. */
1601 unsigned int bit_offset = (backwards
1602 ? MAX ((int) bitsize - ((int) i + 1)
1603 * BITS_PER_WORD,
1605 : (int) i * BITS_PER_WORD);
1606 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1607 rtx result_part
1608 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1609 bitsize - i * BITS_PER_WORD),
1610 bitnum + bit_offset, 1, target_part,
1611 mode, word_mode, fallback_p);
1613 gcc_assert (target_part);
1614 if (!result_part)
1616 delete_insns_since (last);
1617 return NULL;
1620 if (result_part != target_part)
1621 emit_move_insn (target_part, result_part);
1624 if (unsignedp)
1626 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1627 need to be zero'd out. */
1628 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1630 unsigned int i, total_words;
1632 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1633 for (i = nwords; i < total_words; i++)
1634 emit_move_insn
1635 (operand_subword (target,
1636 backwards ? total_words - i - 1 : i,
1637 1, VOIDmode),
1638 const0_rtx);
1640 return target;
1643 /* Signed bit field: sign-extend with two arithmetic shifts. */
1644 target = expand_shift (LSHIFT_EXPR, mode, target,
1645 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1646 return expand_shift (RSHIFT_EXPR, mode, target,
1647 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1650 /* If OP0 is a multi-word register, narrow it to the affected word.
1651 If the region spans two words, defer to extract_split_bit_field. */
1652 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1654 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1655 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1656 bitnum %= BITS_PER_WORD;
1657 if (bitnum + bitsize > BITS_PER_WORD)
1659 if (!fallback_p)
1660 return NULL_RTX;
1661 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1662 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1666 /* From here on we know the desired field is smaller than a word.
1667 If OP0 is a register, it too fits within a word. */
1668 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1669 extraction_insn extv;
1670 if (!MEM_P (op0)
1671 /* ??? We could limit the structure size to the part of OP0 that
1672 contains the field, with appropriate checks for endianness
1673 and TRULY_NOOP_TRUNCATION. */
1674 && get_best_reg_extraction_insn (&extv, pattern,
1675 GET_MODE_BITSIZE (GET_MODE (op0)),
1676 tmode))
1678 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1679 unsignedp, target, mode,
1680 tmode);
1681 if (result)
1682 return result;
1685 /* If OP0 is a memory, try copying it to a register and seeing if a
1686 cheap register alternative is available. */
1687 if (MEM_P (op0))
1689 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1690 tmode))
1692 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1693 bitnum, unsignedp,
1694 target, mode,
1695 tmode);
1696 if (result)
1697 return result;
1700 rtx last = get_last_insn ();
1702 /* Try loading part of OP0 into a register and extracting the
1703 bitfield from that. */
1704 unsigned HOST_WIDE_INT bitpos;
1705 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1706 0, 0, tmode, &bitpos);
1707 if (xop0)
1709 xop0 = copy_to_reg (xop0);
1710 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1711 unsignedp, target,
1712 mode, tmode, false);
1713 if (result)
1714 return result;
1715 delete_insns_since (last);
1719 if (!fallback_p)
1720 return NULL;
1722 /* Find a correspondingly-sized integer field, so we can apply
1723 shifts and masks to it. */
1724 int_mode = int_mode_for_mode (tmode);
1725 if (int_mode == BLKmode)
1726 int_mode = int_mode_for_mode (mode);
1727 /* Should probably push op0 out to memory and then do a load. */
1728 gcc_assert (int_mode != BLKmode);
1730 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1731 target, unsignedp);
1732 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1735 /* Generate code to extract a byte-field from STR_RTX
1736 containing BITSIZE bits, starting at BITNUM,
1737 and put it in TARGET if possible (if TARGET is nonzero).
1738 Regardless of TARGET, we return the rtx for where the value is placed.
1740 STR_RTX is the structure containing the byte (a REG or MEM).
1741 UNSIGNEDP is nonzero if this is an unsigned bit field.
1742 MODE is the natural mode of the field value once extracted.
1743 TMODE is the mode the caller would like the value to have;
1744 but the value may be returned with type MODE instead.
1746 If a TARGET is specified and we can store in it at no extra cost,
1747 we do so, and return TARGET.
1748 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1749 if they are equally easy. */
1752 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1753 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1754 enum machine_mode mode, enum machine_mode tmode)
1756 enum machine_mode mode1;
1758 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1759 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1760 mode1 = GET_MODE (str_rtx);
1761 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1762 mode1 = GET_MODE (target);
1763 else
1764 mode1 = tmode;
1766 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1768 rtx result;
1770 /* Extraction of a full MODE1 value can be done with a load as long as
1771 the field is on a byte boundary and is sufficiently aligned. */
1772 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1773 result = adjust_bitfield_address (str_rtx, mode1,
1774 bitnum / BITS_PER_UNIT);
1775 else
1777 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1778 &bitnum);
1779 result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1780 target, unsignedp);
1783 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1786 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1787 target, mode, tmode, true);
1790 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1791 from bit BITNUM of OP0.
1793 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1794 If TARGET is nonzero, attempts to store the value there
1795 and return TARGET, but this is not guaranteed.
1796 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1798 static rtx
1799 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1800 unsigned HOST_WIDE_INT bitsize,
1801 unsigned HOST_WIDE_INT bitnum, rtx target,
1802 int unsignedp)
1804 if (MEM_P (op0))
1806 enum machine_mode mode
1807 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1808 MEM_VOLATILE_P (op0));
1810 if (mode == VOIDmode)
1811 /* The only way this should occur is if the field spans word
1812 boundaries. */
1813 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1815 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1818 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1819 target, unsignedp);
1822 /* Helper function for extract_fixed_bit_field, extracts
1823 the bit field always using the MODE of OP0. */
1825 static rtx
1826 extract_fixed_bit_field_1 (enum machine_mode tmode, rtx op0,
1827 unsigned HOST_WIDE_INT bitsize,
1828 unsigned HOST_WIDE_INT bitnum, rtx target,
1829 int unsignedp)
1831 enum machine_mode mode = GET_MODE (op0);
1832 gcc_assert (SCALAR_INT_MODE_P (mode));
1834 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1835 for invalid input, such as extract equivalent of f5 from
1836 gcc.dg/pr48335-2.c. */
1838 if (BYTES_BIG_ENDIAN)
1839 /* BITNUM is the distance between our msb and that of OP0.
1840 Convert it to the distance from the lsb. */
1841 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1843 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1844 We have reduced the big-endian case to the little-endian case. */
1846 if (unsignedp)
1848 if (bitnum)
1850 /* If the field does not already start at the lsb,
1851 shift it so it does. */
1852 /* Maybe propagate the target for the shift. */
1853 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1854 if (tmode != mode)
1855 subtarget = 0;
1856 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1858 /* Convert the value to the desired mode. */
1859 if (mode != tmode)
1860 op0 = convert_to_mode (tmode, op0, 1);
1862 /* Unless the msb of the field used to be the msb when we shifted,
1863 mask out the upper bits. */
1865 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1866 return expand_binop (GET_MODE (op0), and_optab, op0,
1867 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1868 target, 1, OPTAB_LIB_WIDEN);
1869 return op0;
1872 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1873 then arithmetic-shift its lsb to the lsb of the word. */
1874 op0 = force_reg (mode, op0);
1876 /* Find the narrowest integer mode that contains the field. */
1878 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1879 mode = GET_MODE_WIDER_MODE (mode))
1880 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1882 op0 = convert_to_mode (mode, op0, 0);
1883 break;
1886 if (mode != tmode)
1887 target = 0;
1889 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1891 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1892 /* Maybe propagate the target for the shift. */
1893 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1894 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1897 return expand_shift (RSHIFT_EXPR, mode, op0,
1898 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1901 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1902 VALUE << BITPOS. */
1904 static rtx
1905 lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
1906 int bitpos)
1908 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1911 /* Extract a bit field that is split across two words
1912 and return an RTX for the result.
1914 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1915 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1916 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1918 static rtx
1919 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1920 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1922 unsigned int unit;
1923 unsigned int bitsdone = 0;
1924 rtx result = NULL_RTX;
1925 int first = 1;
1927 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1928 much at a time. */
1929 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1930 unit = BITS_PER_WORD;
1931 else
1932 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1934 while (bitsdone < bitsize)
1936 unsigned HOST_WIDE_INT thissize;
1937 rtx part, word;
1938 unsigned HOST_WIDE_INT thispos;
1939 unsigned HOST_WIDE_INT offset;
1941 offset = (bitpos + bitsdone) / unit;
1942 thispos = (bitpos + bitsdone) % unit;
1944 /* THISSIZE must not overrun a word boundary. Otherwise,
1945 extract_fixed_bit_field will call us again, and we will mutually
1946 recurse forever. */
1947 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1948 thissize = MIN (thissize, unit - thispos);
1950 /* If OP0 is a register, then handle OFFSET here.
1952 When handling multiword bitfields, extract_bit_field may pass
1953 down a word_mode SUBREG of a larger REG for a bitfield that actually
1954 crosses a word boundary. Thus, for a SUBREG, we must find
1955 the current word starting from the base register. */
1956 if (GET_CODE (op0) == SUBREG)
1958 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1959 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1960 GET_MODE (SUBREG_REG (op0)));
1961 offset = 0;
1963 else if (REG_P (op0))
1965 word = operand_subword_force (op0, offset, GET_MODE (op0));
1966 offset = 0;
1968 else
1969 word = op0;
1971 /* Extract the parts in bit-counting order,
1972 whose meaning is determined by BYTES_PER_UNIT.
1973 OFFSET is in UNITs, and UNIT is in bits. */
1974 part = extract_fixed_bit_field (word_mode, word, thissize,
1975 offset * unit + thispos, 0, 1);
1976 bitsdone += thissize;
1978 /* Shift this part into place for the result. */
1979 if (BYTES_BIG_ENDIAN)
1981 if (bitsize != bitsdone)
1982 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1983 bitsize - bitsdone, 0, 1);
1985 else
1987 if (bitsdone != thissize)
1988 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1989 bitsdone - thissize, 0, 1);
1992 if (first)
1993 result = part;
1994 else
1995 /* Combine the parts with bitwise or. This works
1996 because we extracted each part as an unsigned bit field. */
1997 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1998 OPTAB_LIB_WIDEN);
2000 first = 0;
2003 /* Unsigned bit field: we are done. */
2004 if (unsignedp)
2005 return result;
2006 /* Signed bit field: sign-extend with two arithmetic shifts. */
2007 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2008 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2009 return expand_shift (RSHIFT_EXPR, word_mode, result,
2010 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2013 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2014 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2015 MODE, fill the upper bits with zeros. Fail if the layout of either
2016 mode is unknown (as for CC modes) or if the extraction would involve
2017 unprofitable mode punning. Return the value on success, otherwise
2018 return null.
2020 This is different from gen_lowpart* in these respects:
2022 - the returned value must always be considered an rvalue
2024 - when MODE is wider than SRC_MODE, the extraction involves
2025 a zero extension
2027 - when MODE is smaller than SRC_MODE, the extraction involves
2028 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2030 In other words, this routine performs a computation, whereas the
2031 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2032 operations. */
2035 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2037 enum machine_mode int_mode, src_int_mode;
2039 if (mode == src_mode)
2040 return src;
2042 if (CONSTANT_P (src))
2044 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2045 fails, it will happily create (subreg (symbol_ref)) or similar
2046 invalid SUBREGs. */
2047 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2048 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2049 if (ret)
2050 return ret;
2052 if (GET_MODE (src) == VOIDmode
2053 || !validate_subreg (mode, src_mode, src, byte))
2054 return NULL_RTX;
2056 src = force_reg (GET_MODE (src), src);
2057 return gen_rtx_SUBREG (mode, src, byte);
2060 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2061 return NULL_RTX;
2063 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2064 && MODES_TIEABLE_P (mode, src_mode))
2066 rtx x = gen_lowpart_common (mode, src);
2067 if (x)
2068 return x;
2071 src_int_mode = int_mode_for_mode (src_mode);
2072 int_mode = int_mode_for_mode (mode);
2073 if (src_int_mode == BLKmode || int_mode == BLKmode)
2074 return NULL_RTX;
2076 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2077 return NULL_RTX;
2078 if (!MODES_TIEABLE_P (int_mode, mode))
2079 return NULL_RTX;
2081 src = gen_lowpart (src_int_mode, src);
2082 src = convert_modes (int_mode, src_int_mode, src, true);
2083 src = gen_lowpart (mode, src);
2084 return src;
2087 /* Add INC into TARGET. */
2089 void
2090 expand_inc (rtx target, rtx inc)
2092 rtx value = expand_binop (GET_MODE (target), add_optab,
2093 target, inc,
2094 target, 0, OPTAB_LIB_WIDEN);
2095 if (value != target)
2096 emit_move_insn (target, value);
2099 /* Subtract DEC from TARGET. */
2101 void
2102 expand_dec (rtx target, rtx dec)
2104 rtx value = expand_binop (GET_MODE (target), sub_optab,
2105 target, dec,
2106 target, 0, OPTAB_LIB_WIDEN);
2107 if (value != target)
2108 emit_move_insn (target, value);
2111 /* Output a shift instruction for expression code CODE,
2112 with SHIFTED being the rtx for the value to shift,
2113 and AMOUNT the rtx for the amount to shift by.
2114 Store the result in the rtx TARGET, if that is convenient.
2115 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2116 Return the rtx for where the value is. */
2118 static rtx
2119 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2120 rtx amount, rtx target, int unsignedp)
2122 rtx op1, temp = 0;
2123 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2124 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2125 optab lshift_optab = ashl_optab;
2126 optab rshift_arith_optab = ashr_optab;
2127 optab rshift_uns_optab = lshr_optab;
2128 optab lrotate_optab = rotl_optab;
2129 optab rrotate_optab = rotr_optab;
2130 enum machine_mode op1_mode;
2131 int attempt;
2132 bool speed = optimize_insn_for_speed_p ();
2134 op1 = amount;
2135 op1_mode = GET_MODE (op1);
2137 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2138 shift amount is a vector, use the vector/vector shift patterns. */
2139 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2141 lshift_optab = vashl_optab;
2142 rshift_arith_optab = vashr_optab;
2143 rshift_uns_optab = vlshr_optab;
2144 lrotate_optab = vrotl_optab;
2145 rrotate_optab = vrotr_optab;
2148 /* Previously detected shift-counts computed by NEGATE_EXPR
2149 and shifted in the other direction; but that does not work
2150 on all machines. */
2152 if (SHIFT_COUNT_TRUNCATED)
2154 if (CONST_INT_P (op1)
2155 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2156 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2157 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2158 % GET_MODE_BITSIZE (mode));
2159 else if (GET_CODE (op1) == SUBREG
2160 && subreg_lowpart_p (op1)
2161 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2162 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2163 op1 = SUBREG_REG (op1);
2166 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2167 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2168 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2169 amount instead. */
2170 if (rotate
2171 && CONST_INT_P (op1)
2172 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (mode) / 2 + left,
2173 GET_MODE_BITSIZE (mode) - 1))
2175 op1 = GEN_INT (GET_MODE_BITSIZE (mode) - INTVAL (op1));
2176 left = !left;
2177 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2180 if (op1 == const0_rtx)
2181 return shifted;
2183 /* Check whether its cheaper to implement a left shift by a constant
2184 bit count by a sequence of additions. */
2185 if (code == LSHIFT_EXPR
2186 && CONST_INT_P (op1)
2187 && INTVAL (op1) > 0
2188 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2189 && INTVAL (op1) < MAX_BITS_PER_WORD
2190 && (shift_cost (speed, mode, INTVAL (op1))
2191 > INTVAL (op1) * add_cost (speed, mode))
2192 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2194 int i;
2195 for (i = 0; i < INTVAL (op1); i++)
2197 temp = force_reg (mode, shifted);
2198 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2199 unsignedp, OPTAB_LIB_WIDEN);
2201 return shifted;
2204 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2206 enum optab_methods methods;
2208 if (attempt == 0)
2209 methods = OPTAB_DIRECT;
2210 else if (attempt == 1)
2211 methods = OPTAB_WIDEN;
2212 else
2213 methods = OPTAB_LIB_WIDEN;
2215 if (rotate)
2217 /* Widening does not work for rotation. */
2218 if (methods == OPTAB_WIDEN)
2219 continue;
2220 else if (methods == OPTAB_LIB_WIDEN)
2222 /* If we have been unable to open-code this by a rotation,
2223 do it as the IOR of two shifts. I.e., to rotate A
2224 by N bits, compute
2225 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2226 where C is the bitsize of A.
2228 It is theoretically possible that the target machine might
2229 not be able to perform either shift and hence we would
2230 be making two libcalls rather than just the one for the
2231 shift (similarly if IOR could not be done). We will allow
2232 this extremely unlikely lossage to avoid complicating the
2233 code below. */
2235 rtx subtarget = target == shifted ? 0 : target;
2236 rtx new_amount, other_amount;
2237 rtx temp1;
2239 new_amount = op1;
2240 if (op1 == const0_rtx)
2241 return shifted;
2242 else if (CONST_INT_P (op1))
2243 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2244 - INTVAL (op1));
2245 else
2247 other_amount
2248 = simplify_gen_unary (NEG, GET_MODE (op1),
2249 op1, GET_MODE (op1));
2250 HOST_WIDE_INT mask = GET_MODE_PRECISION (mode) - 1;
2251 other_amount
2252 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2253 gen_int_mode (mask, GET_MODE (op1)));
2256 shifted = force_reg (mode, shifted);
2258 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2259 mode, shifted, new_amount, 0, 1);
2260 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2261 mode, shifted, other_amount,
2262 subtarget, 1);
2263 return expand_binop (mode, ior_optab, temp, temp1, target,
2264 unsignedp, methods);
2267 temp = expand_binop (mode,
2268 left ? lrotate_optab : rrotate_optab,
2269 shifted, op1, target, unsignedp, methods);
2271 else if (unsignedp)
2272 temp = expand_binop (mode,
2273 left ? lshift_optab : rshift_uns_optab,
2274 shifted, op1, target, unsignedp, methods);
2276 /* Do arithmetic shifts.
2277 Also, if we are going to widen the operand, we can just as well
2278 use an arithmetic right-shift instead of a logical one. */
2279 if (temp == 0 && ! rotate
2280 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2282 enum optab_methods methods1 = methods;
2284 /* If trying to widen a log shift to an arithmetic shift,
2285 don't accept an arithmetic shift of the same size. */
2286 if (unsignedp)
2287 methods1 = OPTAB_MUST_WIDEN;
2289 /* Arithmetic shift */
2291 temp = expand_binop (mode,
2292 left ? lshift_optab : rshift_arith_optab,
2293 shifted, op1, target, unsignedp, methods1);
2296 /* We used to try extzv here for logical right shifts, but that was
2297 only useful for one machine, the VAX, and caused poor code
2298 generation there for lshrdi3, so the code was deleted and a
2299 define_expand for lshrsi3 was added to vax.md. */
2302 gcc_assert (temp);
2303 return temp;
2306 /* Output a shift instruction for expression code CODE,
2307 with SHIFTED being the rtx for the value to shift,
2308 and AMOUNT the amount to shift by.
2309 Store the result in the rtx TARGET, if that is convenient.
2310 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2311 Return the rtx for where the value is. */
2314 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2315 int amount, rtx target, int unsignedp)
2317 return expand_shift_1 (code, mode,
2318 shifted, GEN_INT (amount), target, unsignedp);
2321 /* Output a shift instruction for expression code CODE,
2322 with SHIFTED being the rtx for the value to shift,
2323 and AMOUNT the tree for the amount to shift by.
2324 Store the result in the rtx TARGET, if that is convenient.
2325 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2326 Return the rtx for where the value is. */
2329 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2330 tree amount, rtx target, int unsignedp)
2332 return expand_shift_1 (code, mode,
2333 shifted, expand_normal (amount), target, unsignedp);
2337 /* Indicates the type of fixup needed after a constant multiplication.
2338 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2339 the result should be negated, and ADD_VARIANT means that the
2340 multiplicand should be added to the result. */
2341 enum mult_variant {basic_variant, negate_variant, add_variant};
2343 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2344 const struct mult_cost *, enum machine_mode mode);
2345 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2346 struct algorithm *, enum mult_variant *, int);
2347 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2348 const struct algorithm *, enum mult_variant);
2349 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2350 static rtx extract_high_half (enum machine_mode, rtx);
2351 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2352 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2353 int, int);
2354 /* Compute and return the best algorithm for multiplying by T.
2355 The algorithm must cost less than cost_limit
2356 If retval.cost >= COST_LIMIT, no algorithm was found and all
2357 other field of the returned struct are undefined.
2358 MODE is the machine mode of the multiplication. */
2360 static void
2361 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2362 const struct mult_cost *cost_limit, enum machine_mode mode)
2364 int m;
2365 struct algorithm *alg_in, *best_alg;
2366 struct mult_cost best_cost;
2367 struct mult_cost new_limit;
2368 int op_cost, op_latency;
2369 unsigned HOST_WIDE_INT orig_t = t;
2370 unsigned HOST_WIDE_INT q;
2371 int maxm, hash_index;
2372 bool cache_hit = false;
2373 enum alg_code cache_alg = alg_zero;
2374 bool speed = optimize_insn_for_speed_p ();
2375 enum machine_mode imode;
2376 struct alg_hash_entry *entry_ptr;
2378 /* Indicate that no algorithm is yet found. If no algorithm
2379 is found, this value will be returned and indicate failure. */
2380 alg_out->cost.cost = cost_limit->cost + 1;
2381 alg_out->cost.latency = cost_limit->latency + 1;
2383 if (cost_limit->cost < 0
2384 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2385 return;
2387 /* Be prepared for vector modes. */
2388 imode = GET_MODE_INNER (mode);
2389 if (imode == VOIDmode)
2390 imode = mode;
2392 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2394 /* Restrict the bits of "t" to the multiplication's mode. */
2395 t &= GET_MODE_MASK (imode);
2397 /* t == 1 can be done in zero cost. */
2398 if (t == 1)
2400 alg_out->ops = 1;
2401 alg_out->cost.cost = 0;
2402 alg_out->cost.latency = 0;
2403 alg_out->op[0] = alg_m;
2404 return;
2407 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2408 fail now. */
2409 if (t == 0)
2411 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2412 return;
2413 else
2415 alg_out->ops = 1;
2416 alg_out->cost.cost = zero_cost (speed);
2417 alg_out->cost.latency = zero_cost (speed);
2418 alg_out->op[0] = alg_zero;
2419 return;
2423 /* We'll be needing a couple extra algorithm structures now. */
2425 alg_in = XALLOCA (struct algorithm);
2426 best_alg = XALLOCA (struct algorithm);
2427 best_cost = *cost_limit;
2429 /* Compute the hash index. */
2430 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2432 /* See if we already know what to do for T. */
2433 entry_ptr = alg_hash_entry_ptr (hash_index);
2434 if (entry_ptr->t == t
2435 && entry_ptr->mode == mode
2436 && entry_ptr->mode == mode
2437 && entry_ptr->speed == speed
2438 && entry_ptr->alg != alg_unknown)
2440 cache_alg = entry_ptr->alg;
2442 if (cache_alg == alg_impossible)
2444 /* The cache tells us that it's impossible to synthesize
2445 multiplication by T within entry_ptr->cost. */
2446 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2447 /* COST_LIMIT is at least as restrictive as the one
2448 recorded in the hash table, in which case we have no
2449 hope of synthesizing a multiplication. Just
2450 return. */
2451 return;
2453 /* If we get here, COST_LIMIT is less restrictive than the
2454 one recorded in the hash table, so we may be able to
2455 synthesize a multiplication. Proceed as if we didn't
2456 have the cache entry. */
2458 else
2460 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2461 /* The cached algorithm shows that this multiplication
2462 requires more cost than COST_LIMIT. Just return. This
2463 way, we don't clobber this cache entry with
2464 alg_impossible but retain useful information. */
2465 return;
2467 cache_hit = true;
2469 switch (cache_alg)
2471 case alg_shift:
2472 goto do_alg_shift;
2474 case alg_add_t_m2:
2475 case alg_sub_t_m2:
2476 goto do_alg_addsub_t_m2;
2478 case alg_add_factor:
2479 case alg_sub_factor:
2480 goto do_alg_addsub_factor;
2482 case alg_add_t2_m:
2483 goto do_alg_add_t2_m;
2485 case alg_sub_t2_m:
2486 goto do_alg_sub_t2_m;
2488 default:
2489 gcc_unreachable ();
2494 /* If we have a group of zero bits at the low-order part of T, try
2495 multiplying by the remaining bits and then doing a shift. */
2497 if ((t & 1) == 0)
2499 do_alg_shift:
2500 m = floor_log2 (t & -t); /* m = number of low zero bits */
2501 if (m < maxm)
2503 q = t >> m;
2504 /* The function expand_shift will choose between a shift and
2505 a sequence of additions, so the observed cost is given as
2506 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2507 op_cost = m * add_cost (speed, mode);
2508 if (shift_cost (speed, mode, m) < op_cost)
2509 op_cost = shift_cost (speed, mode, m);
2510 new_limit.cost = best_cost.cost - op_cost;
2511 new_limit.latency = best_cost.latency - op_cost;
2512 synth_mult (alg_in, q, &new_limit, mode);
2514 alg_in->cost.cost += op_cost;
2515 alg_in->cost.latency += op_cost;
2516 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2518 struct algorithm *x;
2519 best_cost = alg_in->cost;
2520 x = alg_in, alg_in = best_alg, best_alg = x;
2521 best_alg->log[best_alg->ops] = m;
2522 best_alg->op[best_alg->ops] = alg_shift;
2525 /* See if treating ORIG_T as a signed number yields a better
2526 sequence. Try this sequence only for a negative ORIG_T
2527 as it would be useless for a non-negative ORIG_T. */
2528 if ((HOST_WIDE_INT) orig_t < 0)
2530 /* Shift ORIG_T as follows because a right shift of a
2531 negative-valued signed type is implementation
2532 defined. */
2533 q = ~(~orig_t >> m);
2534 /* The function expand_shift will choose between a shift
2535 and a sequence of additions, so the observed cost is
2536 given as MIN (m * add_cost(speed, mode),
2537 shift_cost(speed, mode, m)). */
2538 op_cost = m * add_cost (speed, mode);
2539 if (shift_cost (speed, mode, m) < op_cost)
2540 op_cost = shift_cost (speed, mode, m);
2541 new_limit.cost = best_cost.cost - op_cost;
2542 new_limit.latency = best_cost.latency - op_cost;
2543 synth_mult (alg_in, q, &new_limit, mode);
2545 alg_in->cost.cost += op_cost;
2546 alg_in->cost.latency += op_cost;
2547 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2549 struct algorithm *x;
2550 best_cost = alg_in->cost;
2551 x = alg_in, alg_in = best_alg, best_alg = x;
2552 best_alg->log[best_alg->ops] = m;
2553 best_alg->op[best_alg->ops] = alg_shift;
2557 if (cache_hit)
2558 goto done;
2561 /* If we have an odd number, add or subtract one. */
2562 if ((t & 1) != 0)
2564 unsigned HOST_WIDE_INT w;
2566 do_alg_addsub_t_m2:
2567 for (w = 1; (w & t) != 0; w <<= 1)
2569 /* If T was -1, then W will be zero after the loop. This is another
2570 case where T ends with ...111. Handling this with (T + 1) and
2571 subtract 1 produces slightly better code and results in algorithm
2572 selection much faster than treating it like the ...0111 case
2573 below. */
2574 if (w == 0
2575 || (w > 2
2576 /* Reject the case where t is 3.
2577 Thus we prefer addition in that case. */
2578 && t != 3))
2580 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2582 op_cost = add_cost (speed, mode);
2583 new_limit.cost = best_cost.cost - op_cost;
2584 new_limit.latency = best_cost.latency - op_cost;
2585 synth_mult (alg_in, t + 1, &new_limit, mode);
2587 alg_in->cost.cost += op_cost;
2588 alg_in->cost.latency += op_cost;
2589 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2591 struct algorithm *x;
2592 best_cost = alg_in->cost;
2593 x = alg_in, alg_in = best_alg, best_alg = x;
2594 best_alg->log[best_alg->ops] = 0;
2595 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2598 else
2600 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2602 op_cost = add_cost (speed, mode);
2603 new_limit.cost = best_cost.cost - op_cost;
2604 new_limit.latency = best_cost.latency - op_cost;
2605 synth_mult (alg_in, t - 1, &new_limit, mode);
2607 alg_in->cost.cost += op_cost;
2608 alg_in->cost.latency += op_cost;
2609 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2611 struct algorithm *x;
2612 best_cost = alg_in->cost;
2613 x = alg_in, alg_in = best_alg, best_alg = x;
2614 best_alg->log[best_alg->ops] = 0;
2615 best_alg->op[best_alg->ops] = alg_add_t_m2;
2619 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2620 quickly with a - a * n for some appropriate constant n. */
2621 m = exact_log2 (-orig_t + 1);
2622 if (m >= 0 && m < maxm)
2624 op_cost = shiftsub1_cost (speed, mode, m);
2625 new_limit.cost = best_cost.cost - op_cost;
2626 new_limit.latency = best_cost.latency - op_cost;
2627 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2628 &new_limit, mode);
2630 alg_in->cost.cost += op_cost;
2631 alg_in->cost.latency += op_cost;
2632 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2634 struct algorithm *x;
2635 best_cost = alg_in->cost;
2636 x = alg_in, alg_in = best_alg, best_alg = x;
2637 best_alg->log[best_alg->ops] = m;
2638 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2642 if (cache_hit)
2643 goto done;
2646 /* Look for factors of t of the form
2647 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2648 If we find such a factor, we can multiply by t using an algorithm that
2649 multiplies by q, shift the result by m and add/subtract it to itself.
2651 We search for large factors first and loop down, even if large factors
2652 are less probable than small; if we find a large factor we will find a
2653 good sequence quickly, and therefore be able to prune (by decreasing
2654 COST_LIMIT) the search. */
2656 do_alg_addsub_factor:
2657 for (m = floor_log2 (t - 1); m >= 2; m--)
2659 unsigned HOST_WIDE_INT d;
2661 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2662 if (t % d == 0 && t > d && m < maxm
2663 && (!cache_hit || cache_alg == alg_add_factor))
2665 /* If the target has a cheap shift-and-add instruction use
2666 that in preference to a shift insn followed by an add insn.
2667 Assume that the shift-and-add is "atomic" with a latency
2668 equal to its cost, otherwise assume that on superscalar
2669 hardware the shift may be executed concurrently with the
2670 earlier steps in the algorithm. */
2671 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2672 if (shiftadd_cost (speed, mode, m) < op_cost)
2674 op_cost = shiftadd_cost (speed, mode, m);
2675 op_latency = op_cost;
2677 else
2678 op_latency = add_cost (speed, mode);
2680 new_limit.cost = best_cost.cost - op_cost;
2681 new_limit.latency = best_cost.latency - op_latency;
2682 synth_mult (alg_in, t / d, &new_limit, mode);
2684 alg_in->cost.cost += op_cost;
2685 alg_in->cost.latency += op_latency;
2686 if (alg_in->cost.latency < op_cost)
2687 alg_in->cost.latency = op_cost;
2688 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2690 struct algorithm *x;
2691 best_cost = alg_in->cost;
2692 x = alg_in, alg_in = best_alg, best_alg = x;
2693 best_alg->log[best_alg->ops] = m;
2694 best_alg->op[best_alg->ops] = alg_add_factor;
2696 /* Other factors will have been taken care of in the recursion. */
2697 break;
2700 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2701 if (t % d == 0 && t > d && m < maxm
2702 && (!cache_hit || cache_alg == alg_sub_factor))
2704 /* If the target has a cheap shift-and-subtract insn use
2705 that in preference to a shift insn followed by a sub insn.
2706 Assume that the shift-and-sub is "atomic" with a latency
2707 equal to it's cost, otherwise assume that on superscalar
2708 hardware the shift may be executed concurrently with the
2709 earlier steps in the algorithm. */
2710 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2711 if (shiftsub0_cost (speed, mode, m) < op_cost)
2713 op_cost = shiftsub0_cost (speed, mode, m);
2714 op_latency = op_cost;
2716 else
2717 op_latency = add_cost (speed, mode);
2719 new_limit.cost = best_cost.cost - op_cost;
2720 new_limit.latency = best_cost.latency - op_latency;
2721 synth_mult (alg_in, t / d, &new_limit, mode);
2723 alg_in->cost.cost += op_cost;
2724 alg_in->cost.latency += op_latency;
2725 if (alg_in->cost.latency < op_cost)
2726 alg_in->cost.latency = op_cost;
2727 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2729 struct algorithm *x;
2730 best_cost = alg_in->cost;
2731 x = alg_in, alg_in = best_alg, best_alg = x;
2732 best_alg->log[best_alg->ops] = m;
2733 best_alg->op[best_alg->ops] = alg_sub_factor;
2735 break;
2738 if (cache_hit)
2739 goto done;
2741 /* Try shift-and-add (load effective address) instructions,
2742 i.e. do a*3, a*5, a*9. */
2743 if ((t & 1) != 0)
2745 do_alg_add_t2_m:
2746 q = t - 1;
2747 q = q & -q;
2748 m = exact_log2 (q);
2749 if (m >= 0 && m < maxm)
2751 op_cost = shiftadd_cost (speed, mode, m);
2752 new_limit.cost = best_cost.cost - op_cost;
2753 new_limit.latency = best_cost.latency - op_cost;
2754 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2756 alg_in->cost.cost += op_cost;
2757 alg_in->cost.latency += op_cost;
2758 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2760 struct algorithm *x;
2761 best_cost = alg_in->cost;
2762 x = alg_in, alg_in = best_alg, best_alg = x;
2763 best_alg->log[best_alg->ops] = m;
2764 best_alg->op[best_alg->ops] = alg_add_t2_m;
2767 if (cache_hit)
2768 goto done;
2770 do_alg_sub_t2_m:
2771 q = t + 1;
2772 q = q & -q;
2773 m = exact_log2 (q);
2774 if (m >= 0 && m < maxm)
2776 op_cost = shiftsub0_cost (speed, mode, m);
2777 new_limit.cost = best_cost.cost - op_cost;
2778 new_limit.latency = best_cost.latency - op_cost;
2779 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2781 alg_in->cost.cost += op_cost;
2782 alg_in->cost.latency += op_cost;
2783 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2785 struct algorithm *x;
2786 best_cost = alg_in->cost;
2787 x = alg_in, alg_in = best_alg, best_alg = x;
2788 best_alg->log[best_alg->ops] = m;
2789 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2792 if (cache_hit)
2793 goto done;
2796 done:
2797 /* If best_cost has not decreased, we have not found any algorithm. */
2798 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2800 /* We failed to find an algorithm. Record alg_impossible for
2801 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2802 we are asked to find an algorithm for T within the same or
2803 lower COST_LIMIT, we can immediately return to the
2804 caller. */
2805 entry_ptr->t = t;
2806 entry_ptr->mode = mode;
2807 entry_ptr->speed = speed;
2808 entry_ptr->alg = alg_impossible;
2809 entry_ptr->cost = *cost_limit;
2810 return;
2813 /* Cache the result. */
2814 if (!cache_hit)
2816 entry_ptr->t = t;
2817 entry_ptr->mode = mode;
2818 entry_ptr->speed = speed;
2819 entry_ptr->alg = best_alg->op[best_alg->ops];
2820 entry_ptr->cost.cost = best_cost.cost;
2821 entry_ptr->cost.latency = best_cost.latency;
2824 /* If we are getting a too long sequence for `struct algorithm'
2825 to record, make this search fail. */
2826 if (best_alg->ops == MAX_BITS_PER_WORD)
2827 return;
2829 /* Copy the algorithm from temporary space to the space at alg_out.
2830 We avoid using structure assignment because the majority of
2831 best_alg is normally undefined, and this is a critical function. */
2832 alg_out->ops = best_alg->ops + 1;
2833 alg_out->cost = best_cost;
2834 memcpy (alg_out->op, best_alg->op,
2835 alg_out->ops * sizeof *alg_out->op);
2836 memcpy (alg_out->log, best_alg->log,
2837 alg_out->ops * sizeof *alg_out->log);
2840 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2841 Try three variations:
2843 - a shift/add sequence based on VAL itself
2844 - a shift/add sequence based on -VAL, followed by a negation
2845 - a shift/add sequence based on VAL - 1, followed by an addition.
2847 Return true if the cheapest of these cost less than MULT_COST,
2848 describing the algorithm in *ALG and final fixup in *VARIANT. */
2850 static bool
2851 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2852 struct algorithm *alg, enum mult_variant *variant,
2853 int mult_cost)
2855 struct algorithm alg2;
2856 struct mult_cost limit;
2857 int op_cost;
2858 bool speed = optimize_insn_for_speed_p ();
2860 /* Fail quickly for impossible bounds. */
2861 if (mult_cost < 0)
2862 return false;
2864 /* Ensure that mult_cost provides a reasonable upper bound.
2865 Any constant multiplication can be performed with less
2866 than 2 * bits additions. */
2867 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2868 if (mult_cost > op_cost)
2869 mult_cost = op_cost;
2871 *variant = basic_variant;
2872 limit.cost = mult_cost;
2873 limit.latency = mult_cost;
2874 synth_mult (alg, val, &limit, mode);
2876 /* This works only if the inverted value actually fits in an
2877 `unsigned int' */
2878 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2880 op_cost = neg_cost (speed, mode);
2881 if (MULT_COST_LESS (&alg->cost, mult_cost))
2883 limit.cost = alg->cost.cost - op_cost;
2884 limit.latency = alg->cost.latency - op_cost;
2886 else
2888 limit.cost = mult_cost - op_cost;
2889 limit.latency = mult_cost - op_cost;
2892 synth_mult (&alg2, -val, &limit, mode);
2893 alg2.cost.cost += op_cost;
2894 alg2.cost.latency += op_cost;
2895 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2896 *alg = alg2, *variant = negate_variant;
2899 /* This proves very useful for division-by-constant. */
2900 op_cost = add_cost (speed, mode);
2901 if (MULT_COST_LESS (&alg->cost, mult_cost))
2903 limit.cost = alg->cost.cost - op_cost;
2904 limit.latency = alg->cost.latency - op_cost;
2906 else
2908 limit.cost = mult_cost - op_cost;
2909 limit.latency = mult_cost - op_cost;
2912 synth_mult (&alg2, val - 1, &limit, mode);
2913 alg2.cost.cost += op_cost;
2914 alg2.cost.latency += op_cost;
2915 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2916 *alg = alg2, *variant = add_variant;
2918 return MULT_COST_LESS (&alg->cost, mult_cost);
2921 /* A subroutine of expand_mult, used for constant multiplications.
2922 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2923 convenient. Use the shift/add sequence described by ALG and apply
2924 the final fixup specified by VARIANT. */
2926 static rtx
2927 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2928 rtx target, const struct algorithm *alg,
2929 enum mult_variant variant)
2931 HOST_WIDE_INT val_so_far;
2932 rtx insn, accum, tem;
2933 int opno;
2934 enum machine_mode nmode;
2936 /* Avoid referencing memory over and over and invalid sharing
2937 on SUBREGs. */
2938 op0 = force_reg (mode, op0);
2940 /* ACCUM starts out either as OP0 or as a zero, depending on
2941 the first operation. */
2943 if (alg->op[0] == alg_zero)
2945 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2946 val_so_far = 0;
2948 else if (alg->op[0] == alg_m)
2950 accum = copy_to_mode_reg (mode, op0);
2951 val_so_far = 1;
2953 else
2954 gcc_unreachable ();
2956 for (opno = 1; opno < alg->ops; opno++)
2958 int log = alg->log[opno];
2959 rtx shift_subtarget = optimize ? 0 : accum;
2960 rtx add_target
2961 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2962 && !optimize)
2963 ? target : 0;
2964 rtx accum_target = optimize ? 0 : accum;
2965 rtx accum_inner;
2967 switch (alg->op[opno])
2969 case alg_shift:
2970 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2971 /* REG_EQUAL note will be attached to the following insn. */
2972 emit_move_insn (accum, tem);
2973 val_so_far <<= log;
2974 break;
2976 case alg_add_t_m2:
2977 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2978 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2979 add_target ? add_target : accum_target);
2980 val_so_far += (HOST_WIDE_INT) 1 << log;
2981 break;
2983 case alg_sub_t_m2:
2984 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2985 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2986 add_target ? add_target : accum_target);
2987 val_so_far -= (HOST_WIDE_INT) 1 << log;
2988 break;
2990 case alg_add_t2_m:
2991 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2992 log, shift_subtarget, 0);
2993 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2994 add_target ? add_target : accum_target);
2995 val_so_far = (val_so_far << log) + 1;
2996 break;
2998 case alg_sub_t2_m:
2999 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3000 log, shift_subtarget, 0);
3001 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3002 add_target ? add_target : accum_target);
3003 val_so_far = (val_so_far << log) - 1;
3004 break;
3006 case alg_add_factor:
3007 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3008 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3009 add_target ? add_target : accum_target);
3010 val_so_far += val_so_far << log;
3011 break;
3013 case alg_sub_factor:
3014 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3015 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3016 (add_target
3017 ? add_target : (optimize ? 0 : tem)));
3018 val_so_far = (val_so_far << log) - val_so_far;
3019 break;
3021 default:
3022 gcc_unreachable ();
3025 if (SCALAR_INT_MODE_P (mode))
3027 /* Write a REG_EQUAL note on the last insn so that we can cse
3028 multiplication sequences. Note that if ACCUM is a SUBREG,
3029 we've set the inner register and must properly indicate that. */
3030 tem = op0, nmode = mode;
3031 accum_inner = accum;
3032 if (GET_CODE (accum) == SUBREG)
3034 accum_inner = SUBREG_REG (accum);
3035 nmode = GET_MODE (accum_inner);
3036 tem = gen_lowpart (nmode, op0);
3039 insn = get_last_insn ();
3040 set_dst_reg_note (insn, REG_EQUAL,
3041 gen_rtx_MULT (nmode, tem,
3042 gen_int_mode (val_so_far, nmode)),
3043 accum_inner);
3047 if (variant == negate_variant)
3049 val_so_far = -val_so_far;
3050 accum = expand_unop (mode, neg_optab, accum, target, 0);
3052 else if (variant == add_variant)
3054 val_so_far = val_so_far + 1;
3055 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3058 /* Compare only the bits of val and val_so_far that are significant
3059 in the result mode, to avoid sign-/zero-extension confusion. */
3060 nmode = GET_MODE_INNER (mode);
3061 if (nmode == VOIDmode)
3062 nmode = mode;
3063 val &= GET_MODE_MASK (nmode);
3064 val_so_far &= GET_MODE_MASK (nmode);
3065 gcc_assert (val == val_so_far);
3067 return accum;
3070 /* Perform a multiplication and return an rtx for the result.
3071 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3072 TARGET is a suggestion for where to store the result (an rtx).
3074 We check specially for a constant integer as OP1.
3075 If you want this check for OP0 as well, then before calling
3076 you should swap the two operands if OP0 would be constant. */
3079 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3080 int unsignedp)
3082 enum mult_variant variant;
3083 struct algorithm algorithm;
3084 rtx scalar_op1;
3085 int max_cost;
3086 bool speed = optimize_insn_for_speed_p ();
3087 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3089 if (CONSTANT_P (op0))
3091 rtx temp = op0;
3092 op0 = op1;
3093 op1 = temp;
3096 /* For vectors, there are several simplifications that can be made if
3097 all elements of the vector constant are identical. */
3098 scalar_op1 = op1;
3099 if (GET_CODE (op1) == CONST_VECTOR)
3101 int i, n = CONST_VECTOR_NUNITS (op1);
3102 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3103 for (i = 1; i < n; ++i)
3104 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3105 goto skip_scalar;
3108 if (INTEGRAL_MODE_P (mode))
3110 rtx fake_reg;
3111 HOST_WIDE_INT coeff;
3112 bool is_neg;
3113 int mode_bitsize;
3115 if (op1 == CONST0_RTX (mode))
3116 return op1;
3117 if (op1 == CONST1_RTX (mode))
3118 return op0;
3119 if (op1 == CONSTM1_RTX (mode))
3120 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3121 op0, target, 0);
3123 if (do_trapv)
3124 goto skip_synth;
3126 /* If mode is integer vector mode, check if the backend supports
3127 vector lshift (by scalar or vector) at all. If not, we can't use
3128 synthetized multiply. */
3129 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3130 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3131 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3132 goto skip_synth;
3134 /* These are the operations that are potentially turned into
3135 a sequence of shifts and additions. */
3136 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3138 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3139 less than or equal in size to `unsigned int' this doesn't matter.
3140 If the mode is larger than `unsigned int', then synth_mult works
3141 only if the constant value exactly fits in an `unsigned int' without
3142 any truncation. This means that multiplying by negative values does
3143 not work; results are off by 2^32 on a 32 bit machine. */
3144 if (CONST_INT_P (scalar_op1))
3146 coeff = INTVAL (scalar_op1);
3147 is_neg = coeff < 0;
3149 #if TARGET_SUPPORTS_WIDE_INT
3150 else if (CONST_WIDE_INT_P (scalar_op1))
3151 #else
3152 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3153 #endif
3155 int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3156 /* Perfect power of 2 (other than 1, which is handled above). */
3157 if (shift > 0)
3158 return expand_shift (LSHIFT_EXPR, mode, op0,
3159 shift, target, unsignedp);
3160 else
3161 goto skip_synth;
3163 else
3164 goto skip_synth;
3166 /* We used to test optimize here, on the grounds that it's better to
3167 produce a smaller program when -O is not used. But this causes
3168 such a terrible slowdown sometimes that it seems better to always
3169 use synth_mult. */
3171 /* Special case powers of two. */
3172 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3173 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3174 return expand_shift (LSHIFT_EXPR, mode, op0,
3175 floor_log2 (coeff), target, unsignedp);
3177 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3179 /* Attempt to handle multiplication of DImode values by negative
3180 coefficients, by performing the multiplication by a positive
3181 multiplier and then inverting the result. */
3182 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3184 /* Its safe to use -coeff even for INT_MIN, as the
3185 result is interpreted as an unsigned coefficient.
3186 Exclude cost of op0 from max_cost to match the cost
3187 calculation of the synth_mult. */
3188 coeff = -(unsigned HOST_WIDE_INT) coeff;
3189 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3190 - neg_cost (speed, mode));
3191 if (max_cost <= 0)
3192 goto skip_synth;
3194 /* Special case powers of two. */
3195 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3197 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3198 floor_log2 (coeff), target, unsignedp);
3199 return expand_unop (mode, neg_optab, temp, target, 0);
3202 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3203 max_cost))
3205 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3206 &algorithm, variant);
3207 return expand_unop (mode, neg_optab, temp, target, 0);
3209 goto skip_synth;
3212 /* Exclude cost of op0 from max_cost to match the cost
3213 calculation of the synth_mult. */
3214 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3215 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3216 return expand_mult_const (mode, op0, coeff, target,
3217 &algorithm, variant);
3219 skip_synth:
3221 /* Expand x*2.0 as x+x. */
3222 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3224 REAL_VALUE_TYPE d;
3225 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3227 if (REAL_VALUES_EQUAL (d, dconst2))
3229 op0 = force_reg (GET_MODE (op0), op0);
3230 return expand_binop (mode, add_optab, op0, op0,
3231 target, unsignedp, OPTAB_LIB_WIDEN);
3234 skip_scalar:
3236 /* This used to use umul_optab if unsigned, but for non-widening multiply
3237 there is no difference between signed and unsigned. */
3238 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3239 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3240 gcc_assert (op0);
3241 return op0;
3244 /* Return a cost estimate for multiplying a register by the given
3245 COEFFicient in the given MODE and SPEED. */
3248 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3250 int max_cost;
3251 struct algorithm algorithm;
3252 enum mult_variant variant;
3254 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3255 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3256 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3257 return algorithm.cost.cost;
3258 else
3259 return max_cost;
3262 /* Perform a widening multiplication and return an rtx for the result.
3263 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3264 TARGET is a suggestion for where to store the result (an rtx).
3265 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3266 or smul_widen_optab.
3268 We check specially for a constant integer as OP1, comparing the
3269 cost of a widening multiply against the cost of a sequence of shifts
3270 and adds. */
3273 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3274 int unsignedp, optab this_optab)
3276 bool speed = optimize_insn_for_speed_p ();
3277 rtx cop1;
3279 if (CONST_INT_P (op1)
3280 && GET_MODE (op0) != VOIDmode
3281 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3282 this_optab == umul_widen_optab))
3283 && CONST_INT_P (cop1)
3284 && (INTVAL (cop1) >= 0
3285 || HWI_COMPUTABLE_MODE_P (mode)))
3287 HOST_WIDE_INT coeff = INTVAL (cop1);
3288 int max_cost;
3289 enum mult_variant variant;
3290 struct algorithm algorithm;
3292 /* Special case powers of two. */
3293 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3295 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3296 return expand_shift (LSHIFT_EXPR, mode, op0,
3297 floor_log2 (coeff), target, unsignedp);
3300 /* Exclude cost of op0 from max_cost to match the cost
3301 calculation of the synth_mult. */
3302 max_cost = mul_widen_cost (speed, mode);
3303 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3304 max_cost))
3306 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3307 return expand_mult_const (mode, op0, coeff, target,
3308 &algorithm, variant);
3311 return expand_binop (mode, this_optab, op0, op1, target,
3312 unsignedp, OPTAB_LIB_WIDEN);
3315 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3316 replace division by D, and put the least significant N bits of the result
3317 in *MULTIPLIER_PTR and return the most significant bit.
3319 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3320 needed precision is in PRECISION (should be <= N).
3322 PRECISION should be as small as possible so this function can choose
3323 multiplier more freely.
3325 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3326 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3328 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3329 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3331 unsigned HOST_WIDE_INT
3332 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3333 unsigned HOST_WIDE_INT *multiplier_ptr,
3334 int *post_shift_ptr, int *lgup_ptr)
3336 int lgup, post_shift;
3337 int pow, pow2;
3339 /* lgup = ceil(log2(divisor)); */
3340 lgup = ceil_log2 (d);
3342 gcc_assert (lgup <= n);
3344 pow = n + lgup;
3345 pow2 = n + lgup - precision;
3347 /* mlow = 2^(N + lgup)/d */
3348 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3349 wide_int mlow = wi::udiv_trunc (val, d);
3351 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3352 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3353 wide_int mhigh = wi::udiv_trunc (val, d);
3355 /* If precision == N, then mlow, mhigh exceed 2^N
3356 (but they do not exceed 2^(N+1)). */
3358 /* Reduce to lowest terms. */
3359 for (post_shift = lgup; post_shift > 0; post_shift--)
3361 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3362 HOST_BITS_PER_WIDE_INT);
3363 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3364 HOST_BITS_PER_WIDE_INT);
3365 if (ml_lo >= mh_lo)
3366 break;
3368 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3369 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3372 *post_shift_ptr = post_shift;
3373 *lgup_ptr = lgup;
3374 if (n < HOST_BITS_PER_WIDE_INT)
3376 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3377 *multiplier_ptr = mhigh.to_uhwi () & mask;
3378 return mhigh.to_uhwi () >= mask;
3380 else
3382 *multiplier_ptr = mhigh.to_uhwi ();
3383 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3387 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3388 congruent to 1 (mod 2**N). */
3390 static unsigned HOST_WIDE_INT
3391 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3393 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3395 /* The algorithm notes that the choice y = x satisfies
3396 x*y == 1 mod 2^3, since x is assumed odd.
3397 Each iteration doubles the number of bits of significance in y. */
3399 unsigned HOST_WIDE_INT mask;
3400 unsigned HOST_WIDE_INT y = x;
3401 int nbit = 3;
3403 mask = (n == HOST_BITS_PER_WIDE_INT
3404 ? ~(unsigned HOST_WIDE_INT) 0
3405 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3407 while (nbit < n)
3409 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3410 nbit *= 2;
3412 return y;
3415 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3416 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3417 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3418 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3419 become signed.
3421 The result is put in TARGET if that is convenient.
3423 MODE is the mode of operation. */
3426 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3427 rtx op1, rtx target, int unsignedp)
3429 rtx tem;
3430 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3432 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3433 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3434 tem = expand_and (mode, tem, op1, NULL_RTX);
3435 adj_operand
3436 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3437 adj_operand);
3439 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3440 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3441 tem = expand_and (mode, tem, op0, NULL_RTX);
3442 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3443 target);
3445 return target;
3448 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3450 static rtx
3451 extract_high_half (enum machine_mode mode, rtx op)
3453 enum machine_mode wider_mode;
3455 if (mode == word_mode)
3456 return gen_highpart (mode, op);
3458 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3460 wider_mode = GET_MODE_WIDER_MODE (mode);
3461 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3462 GET_MODE_BITSIZE (mode), 0, 1);
3463 return convert_modes (mode, wider_mode, op, 0);
3466 /* Like expmed_mult_highpart, but only consider using a multiplication
3467 optab. OP1 is an rtx for the constant operand. */
3469 static rtx
3470 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3471 rtx target, int unsignedp, int max_cost)
3473 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3474 enum machine_mode wider_mode;
3475 optab moptab;
3476 rtx tem;
3477 int size;
3478 bool speed = optimize_insn_for_speed_p ();
3480 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3482 wider_mode = GET_MODE_WIDER_MODE (mode);
3483 size = GET_MODE_BITSIZE (mode);
3485 /* Firstly, try using a multiplication insn that only generates the needed
3486 high part of the product, and in the sign flavor of unsignedp. */
3487 if (mul_highpart_cost (speed, mode) < max_cost)
3489 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3490 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3491 unsignedp, OPTAB_DIRECT);
3492 if (tem)
3493 return tem;
3496 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3497 Need to adjust the result after the multiplication. */
3498 if (size - 1 < BITS_PER_WORD
3499 && (mul_highpart_cost (speed, mode)
3500 + 2 * shift_cost (speed, mode, size-1)
3501 + 4 * add_cost (speed, mode) < max_cost))
3503 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3504 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3505 unsignedp, OPTAB_DIRECT);
3506 if (tem)
3507 /* We used the wrong signedness. Adjust the result. */
3508 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3509 tem, unsignedp);
3512 /* Try widening multiplication. */
3513 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3514 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3515 && mul_widen_cost (speed, wider_mode) < max_cost)
3517 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3518 unsignedp, OPTAB_WIDEN);
3519 if (tem)
3520 return extract_high_half (mode, tem);
3523 /* Try widening the mode and perform a non-widening multiplication. */
3524 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3525 && size - 1 < BITS_PER_WORD
3526 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3527 < max_cost))
3529 rtx insns, wop0, wop1;
3531 /* We need to widen the operands, for example to ensure the
3532 constant multiplier is correctly sign or zero extended.
3533 Use a sequence to clean-up any instructions emitted by
3534 the conversions if things don't work out. */
3535 start_sequence ();
3536 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3537 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3538 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3539 unsignedp, OPTAB_WIDEN);
3540 insns = get_insns ();
3541 end_sequence ();
3543 if (tem)
3545 emit_insn (insns);
3546 return extract_high_half (mode, tem);
3550 /* Try widening multiplication of opposite signedness, and adjust. */
3551 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3552 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3553 && size - 1 < BITS_PER_WORD
3554 && (mul_widen_cost (speed, wider_mode)
3555 + 2 * shift_cost (speed, mode, size-1)
3556 + 4 * add_cost (speed, mode) < max_cost))
3558 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3559 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3560 if (tem != 0)
3562 tem = extract_high_half (mode, tem);
3563 /* We used the wrong signedness. Adjust the result. */
3564 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3565 target, unsignedp);
3569 return 0;
3572 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3573 putting the high half of the result in TARGET if that is convenient,
3574 and return where the result is. If the operation can not be performed,
3575 0 is returned.
3577 MODE is the mode of operation and result.
3579 UNSIGNEDP nonzero means unsigned multiply.
3581 MAX_COST is the total allowed cost for the expanded RTL. */
3583 static rtx
3584 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3585 rtx target, int unsignedp, int max_cost)
3587 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3588 unsigned HOST_WIDE_INT cnst1;
3589 int extra_cost;
3590 bool sign_adjust = false;
3591 enum mult_variant variant;
3592 struct algorithm alg;
3593 rtx tem;
3594 bool speed = optimize_insn_for_speed_p ();
3596 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3597 /* We can't support modes wider than HOST_BITS_PER_INT. */
3598 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3600 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3602 /* We can't optimize modes wider than BITS_PER_WORD.
3603 ??? We might be able to perform double-word arithmetic if
3604 mode == word_mode, however all the cost calculations in
3605 synth_mult etc. assume single-word operations. */
3606 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3607 return expmed_mult_highpart_optab (mode, op0, op1, target,
3608 unsignedp, max_cost);
3610 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3612 /* Check whether we try to multiply by a negative constant. */
3613 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3615 sign_adjust = true;
3616 extra_cost += add_cost (speed, mode);
3619 /* See whether shift/add multiplication is cheap enough. */
3620 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3621 max_cost - extra_cost))
3623 /* See whether the specialized multiplication optabs are
3624 cheaper than the shift/add version. */
3625 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3626 alg.cost.cost + extra_cost);
3627 if (tem)
3628 return tem;
3630 tem = convert_to_mode (wider_mode, op0, unsignedp);
3631 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3632 tem = extract_high_half (mode, tem);
3634 /* Adjust result for signedness. */
3635 if (sign_adjust)
3636 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3638 return tem;
3640 return expmed_mult_highpart_optab (mode, op0, op1, target,
3641 unsignedp, max_cost);
3645 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3647 static rtx
3648 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3650 rtx result, temp, shift, label;
3651 int logd;
3652 int prec = GET_MODE_PRECISION (mode);
3654 logd = floor_log2 (d);
3655 result = gen_reg_rtx (mode);
3657 /* Avoid conditional branches when they're expensive. */
3658 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3659 && optimize_insn_for_speed_p ())
3661 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3662 mode, 0, -1);
3663 if (signmask)
3665 HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3666 signmask = force_reg (mode, signmask);
3667 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3669 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3670 which instruction sequence to use. If logical right shifts
3671 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3672 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3674 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3675 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3676 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3677 > COSTS_N_INSNS (2)))
3679 temp = expand_binop (mode, xor_optab, op0, signmask,
3680 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3681 temp = expand_binop (mode, sub_optab, temp, signmask,
3682 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3683 temp = expand_binop (mode, and_optab, temp,
3684 gen_int_mode (masklow, mode),
3685 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3686 temp = expand_binop (mode, xor_optab, temp, signmask,
3687 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3688 temp = expand_binop (mode, sub_optab, temp, signmask,
3689 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3691 else
3693 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3694 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3695 signmask = force_reg (mode, signmask);
3697 temp = expand_binop (mode, add_optab, op0, signmask,
3698 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3699 temp = expand_binop (mode, and_optab, temp,
3700 gen_int_mode (masklow, mode),
3701 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3702 temp = expand_binop (mode, sub_optab, temp, signmask,
3703 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3705 return temp;
3709 /* Mask contains the mode's signbit and the significant bits of the
3710 modulus. By including the signbit in the operation, many targets
3711 can avoid an explicit compare operation in the following comparison
3712 against zero. */
3713 wide_int mask = wi::mask (logd, false, prec);
3714 mask = wi::set_bit (mask, prec - 1);
3716 temp = expand_binop (mode, and_optab, op0,
3717 immed_wide_int_const (mask, mode),
3718 result, 1, OPTAB_LIB_WIDEN);
3719 if (temp != result)
3720 emit_move_insn (result, temp);
3722 label = gen_label_rtx ();
3723 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3725 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3726 0, OPTAB_LIB_WIDEN);
3728 mask = wi::mask (logd, true, prec);
3729 temp = expand_binop (mode, ior_optab, temp,
3730 immed_wide_int_const (mask, mode),
3731 result, 1, OPTAB_LIB_WIDEN);
3732 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3733 0, OPTAB_LIB_WIDEN);
3734 if (temp != result)
3735 emit_move_insn (result, temp);
3736 emit_label (label);
3737 return result;
3740 /* Expand signed division of OP0 by a power of two D in mode MODE.
3741 This routine is only called for positive values of D. */
3743 static rtx
3744 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3746 rtx temp, label;
3747 int logd;
3749 logd = floor_log2 (d);
3751 if (d == 2
3752 && BRANCH_COST (optimize_insn_for_speed_p (),
3753 false) >= 1)
3755 temp = gen_reg_rtx (mode);
3756 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3757 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3758 0, OPTAB_LIB_WIDEN);
3759 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3762 #ifdef HAVE_conditional_move
3763 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3764 >= 2)
3766 rtx temp2;
3768 start_sequence ();
3769 temp2 = copy_to_mode_reg (mode, op0);
3770 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3771 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3772 temp = force_reg (mode, temp);
3774 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3775 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3776 mode, temp, temp2, mode, 0);
3777 if (temp2)
3779 rtx seq = get_insns ();
3780 end_sequence ();
3781 emit_insn (seq);
3782 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3784 end_sequence ();
3786 #endif
3788 if (BRANCH_COST (optimize_insn_for_speed_p (),
3789 false) >= 2)
3791 int ushift = GET_MODE_BITSIZE (mode) - logd;
3793 temp = gen_reg_rtx (mode);
3794 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3795 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3796 > COSTS_N_INSNS (1))
3797 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3798 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3799 else
3800 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3801 ushift, NULL_RTX, 1);
3802 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3803 0, OPTAB_LIB_WIDEN);
3804 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3807 label = gen_label_rtx ();
3808 temp = copy_to_mode_reg (mode, op0);
3809 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3810 expand_inc (temp, gen_int_mode (d - 1, mode));
3811 emit_label (label);
3812 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3815 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3816 if that is convenient, and returning where the result is.
3817 You may request either the quotient or the remainder as the result;
3818 specify REM_FLAG nonzero to get the remainder.
3820 CODE is the expression code for which kind of division this is;
3821 it controls how rounding is done. MODE is the machine mode to use.
3822 UNSIGNEDP nonzero means do unsigned division. */
3824 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3825 and then correct it by or'ing in missing high bits
3826 if result of ANDI is nonzero.
3827 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3828 This could optimize to a bfexts instruction.
3829 But C doesn't use these operations, so their optimizations are
3830 left for later. */
3831 /* ??? For modulo, we don't actually need the highpart of the first product,
3832 the low part will do nicely. And for small divisors, the second multiply
3833 can also be a low-part only multiply or even be completely left out.
3834 E.g. to calculate the remainder of a division by 3 with a 32 bit
3835 multiply, multiply with 0x55555556 and extract the upper two bits;
3836 the result is exact for inputs up to 0x1fffffff.
3837 The input range can be reduced by using cross-sum rules.
3838 For odd divisors >= 3, the following table gives right shift counts
3839 so that if a number is shifted by an integer multiple of the given
3840 amount, the remainder stays the same:
3841 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3842 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3843 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3844 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3845 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3847 Cross-sum rules for even numbers can be derived by leaving as many bits
3848 to the right alone as the divisor has zeros to the right.
3849 E.g. if x is an unsigned 32 bit number:
3850 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3854 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3855 rtx op0, rtx op1, rtx target, int unsignedp)
3857 enum machine_mode compute_mode;
3858 rtx tquotient;
3859 rtx quotient = 0, remainder = 0;
3860 rtx last;
3861 int size;
3862 rtx insn;
3863 optab optab1, optab2;
3864 int op1_is_constant, op1_is_pow2 = 0;
3865 int max_cost, extra_cost;
3866 static HOST_WIDE_INT last_div_const = 0;
3867 bool speed = optimize_insn_for_speed_p ();
3869 op1_is_constant = CONST_INT_P (op1);
3870 if (op1_is_constant)
3872 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3873 if (unsignedp)
3874 ext_op1 &= GET_MODE_MASK (mode);
3875 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3876 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3880 This is the structure of expand_divmod:
3882 First comes code to fix up the operands so we can perform the operations
3883 correctly and efficiently.
3885 Second comes a switch statement with code specific for each rounding mode.
3886 For some special operands this code emits all RTL for the desired
3887 operation, for other cases, it generates only a quotient and stores it in
3888 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3889 to indicate that it has not done anything.
3891 Last comes code that finishes the operation. If QUOTIENT is set and
3892 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3893 QUOTIENT is not set, it is computed using trunc rounding.
3895 We try to generate special code for division and remainder when OP1 is a
3896 constant. If |OP1| = 2**n we can use shifts and some other fast
3897 operations. For other values of OP1, we compute a carefully selected
3898 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3899 by m.
3901 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3902 half of the product. Different strategies for generating the product are
3903 implemented in expmed_mult_highpart.
3905 If what we actually want is the remainder, we generate that by another
3906 by-constant multiplication and a subtraction. */
3908 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3909 code below will malfunction if we are, so check here and handle
3910 the special case if so. */
3911 if (op1 == const1_rtx)
3912 return rem_flag ? const0_rtx : op0;
3914 /* When dividing by -1, we could get an overflow.
3915 negv_optab can handle overflows. */
3916 if (! unsignedp && op1 == constm1_rtx)
3918 if (rem_flag)
3919 return const0_rtx;
3920 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3921 ? negv_optab : neg_optab, op0, target, 0);
3924 if (target
3925 /* Don't use the function value register as a target
3926 since we have to read it as well as write it,
3927 and function-inlining gets confused by this. */
3928 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3929 /* Don't clobber an operand while doing a multi-step calculation. */
3930 || ((rem_flag || op1_is_constant)
3931 && (reg_mentioned_p (target, op0)
3932 || (MEM_P (op0) && MEM_P (target))))
3933 || reg_mentioned_p (target, op1)
3934 || (MEM_P (op1) && MEM_P (target))))
3935 target = 0;
3937 /* Get the mode in which to perform this computation. Normally it will
3938 be MODE, but sometimes we can't do the desired operation in MODE.
3939 If so, pick a wider mode in which we can do the operation. Convert
3940 to that mode at the start to avoid repeated conversions.
3942 First see what operations we need. These depend on the expression
3943 we are evaluating. (We assume that divxx3 insns exist under the
3944 same conditions that modxx3 insns and that these insns don't normally
3945 fail. If these assumptions are not correct, we may generate less
3946 efficient code in some cases.)
3948 Then see if we find a mode in which we can open-code that operation
3949 (either a division, modulus, or shift). Finally, check for the smallest
3950 mode for which we can do the operation with a library call. */
3952 /* We might want to refine this now that we have division-by-constant
3953 optimization. Since expmed_mult_highpart tries so many variants, it is
3954 not straightforward to generalize this. Maybe we should make an array
3955 of possible modes in init_expmed? Save this for GCC 2.7. */
3957 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3958 ? (unsignedp ? lshr_optab : ashr_optab)
3959 : (unsignedp ? udiv_optab : sdiv_optab));
3960 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3961 ? optab1
3962 : (unsignedp ? udivmod_optab : sdivmod_optab));
3964 for (compute_mode = mode; compute_mode != VOIDmode;
3965 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3966 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3967 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3968 break;
3970 if (compute_mode == VOIDmode)
3971 for (compute_mode = mode; compute_mode != VOIDmode;
3972 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3973 if (optab_libfunc (optab1, compute_mode)
3974 || optab_libfunc (optab2, compute_mode))
3975 break;
3977 /* If we still couldn't find a mode, use MODE, but expand_binop will
3978 probably die. */
3979 if (compute_mode == VOIDmode)
3980 compute_mode = mode;
3982 if (target && GET_MODE (target) == compute_mode)
3983 tquotient = target;
3984 else
3985 tquotient = gen_reg_rtx (compute_mode);
3987 size = GET_MODE_BITSIZE (compute_mode);
3988 #if 0
3989 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3990 (mode), and thereby get better code when OP1 is a constant. Do that
3991 later. It will require going over all usages of SIZE below. */
3992 size = GET_MODE_BITSIZE (mode);
3993 #endif
3995 /* Only deduct something for a REM if the last divide done was
3996 for a different constant. Then set the constant of the last
3997 divide. */
3998 max_cost = (unsignedp
3999 ? udiv_cost (speed, compute_mode)
4000 : sdiv_cost (speed, compute_mode));
4001 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4002 && INTVAL (op1) == last_div_const))
4003 max_cost -= (mul_cost (speed, compute_mode)
4004 + add_cost (speed, compute_mode));
4006 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4008 /* Now convert to the best mode to use. */
4009 if (compute_mode != mode)
4011 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4012 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4014 /* convert_modes may have placed op1 into a register, so we
4015 must recompute the following. */
4016 op1_is_constant = CONST_INT_P (op1);
4017 op1_is_pow2 = (op1_is_constant
4018 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4019 || (! unsignedp
4020 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4023 /* If one of the operands is a volatile MEM, copy it into a register. */
4025 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4026 op0 = force_reg (compute_mode, op0);
4027 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4028 op1 = force_reg (compute_mode, op1);
4030 /* If we need the remainder or if OP1 is constant, we need to
4031 put OP0 in a register in case it has any queued subexpressions. */
4032 if (rem_flag || op1_is_constant)
4033 op0 = force_reg (compute_mode, op0);
4035 last = get_last_insn ();
4037 /* Promote floor rounding to trunc rounding for unsigned operations. */
4038 if (unsignedp)
4040 if (code == FLOOR_DIV_EXPR)
4041 code = TRUNC_DIV_EXPR;
4042 if (code == FLOOR_MOD_EXPR)
4043 code = TRUNC_MOD_EXPR;
4044 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4045 code = TRUNC_DIV_EXPR;
4048 if (op1 != const0_rtx)
4049 switch (code)
4051 case TRUNC_MOD_EXPR:
4052 case TRUNC_DIV_EXPR:
4053 if (op1_is_constant)
4055 if (unsignedp)
4057 unsigned HOST_WIDE_INT mh, ml;
4058 int pre_shift, post_shift;
4059 int dummy;
4060 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4061 & GET_MODE_MASK (compute_mode));
4063 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4065 pre_shift = floor_log2 (d);
4066 if (rem_flag)
4068 unsigned HOST_WIDE_INT mask
4069 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4070 remainder
4071 = expand_binop (compute_mode, and_optab, op0,
4072 gen_int_mode (mask, compute_mode),
4073 remainder, 1,
4074 OPTAB_LIB_WIDEN);
4075 if (remainder)
4076 return gen_lowpart (mode, remainder);
4078 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4079 pre_shift, tquotient, 1);
4081 else if (size <= HOST_BITS_PER_WIDE_INT)
4083 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4085 /* Most significant bit of divisor is set; emit an scc
4086 insn. */
4087 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4088 compute_mode, 1, 1);
4090 else
4092 /* Find a suitable multiplier and right shift count
4093 instead of multiplying with D. */
4095 mh = choose_multiplier (d, size, size,
4096 &ml, &post_shift, &dummy);
4098 /* If the suggested multiplier is more than SIZE bits,
4099 we can do better for even divisors, using an
4100 initial right shift. */
4101 if (mh != 0 && (d & 1) == 0)
4103 pre_shift = floor_log2 (d & -d);
4104 mh = choose_multiplier (d >> pre_shift, size,
4105 size - pre_shift,
4106 &ml, &post_shift, &dummy);
4107 gcc_assert (!mh);
4109 else
4110 pre_shift = 0;
4112 if (mh != 0)
4114 rtx t1, t2, t3, t4;
4116 if (post_shift - 1 >= BITS_PER_WORD)
4117 goto fail1;
4119 extra_cost
4120 = (shift_cost (speed, compute_mode, post_shift - 1)
4121 + shift_cost (speed, compute_mode, 1)
4122 + 2 * add_cost (speed, compute_mode));
4123 t1 = expmed_mult_highpart
4124 (compute_mode, op0,
4125 gen_int_mode (ml, compute_mode),
4126 NULL_RTX, 1, max_cost - extra_cost);
4127 if (t1 == 0)
4128 goto fail1;
4129 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4130 op0, t1),
4131 NULL_RTX);
4132 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4133 t2, 1, NULL_RTX, 1);
4134 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4135 t1, t3),
4136 NULL_RTX);
4137 quotient = expand_shift
4138 (RSHIFT_EXPR, compute_mode, t4,
4139 post_shift - 1, tquotient, 1);
4141 else
4143 rtx t1, t2;
4145 if (pre_shift >= BITS_PER_WORD
4146 || post_shift >= BITS_PER_WORD)
4147 goto fail1;
4149 t1 = expand_shift
4150 (RSHIFT_EXPR, compute_mode, op0,
4151 pre_shift, NULL_RTX, 1);
4152 extra_cost
4153 = (shift_cost (speed, compute_mode, pre_shift)
4154 + shift_cost (speed, compute_mode, post_shift));
4155 t2 = expmed_mult_highpart
4156 (compute_mode, t1,
4157 gen_int_mode (ml, compute_mode),
4158 NULL_RTX, 1, max_cost - extra_cost);
4159 if (t2 == 0)
4160 goto fail1;
4161 quotient = expand_shift
4162 (RSHIFT_EXPR, compute_mode, t2,
4163 post_shift, tquotient, 1);
4167 else /* Too wide mode to use tricky code */
4168 break;
4170 insn = get_last_insn ();
4171 if (insn != last)
4172 set_dst_reg_note (insn, REG_EQUAL,
4173 gen_rtx_UDIV (compute_mode, op0, op1),
4174 quotient);
4176 else /* TRUNC_DIV, signed */
4178 unsigned HOST_WIDE_INT ml;
4179 int lgup, post_shift;
4180 rtx mlr;
4181 HOST_WIDE_INT d = INTVAL (op1);
4182 unsigned HOST_WIDE_INT abs_d;
4184 /* Since d might be INT_MIN, we have to cast to
4185 unsigned HOST_WIDE_INT before negating to avoid
4186 undefined signed overflow. */
4187 abs_d = (d >= 0
4188 ? (unsigned HOST_WIDE_INT) d
4189 : - (unsigned HOST_WIDE_INT) d);
4191 /* n rem d = n rem -d */
4192 if (rem_flag && d < 0)
4194 d = abs_d;
4195 op1 = gen_int_mode (abs_d, compute_mode);
4198 if (d == 1)
4199 quotient = op0;
4200 else if (d == -1)
4201 quotient = expand_unop (compute_mode, neg_optab, op0,
4202 tquotient, 0);
4203 else if (HOST_BITS_PER_WIDE_INT >= size
4204 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4206 /* This case is not handled correctly below. */
4207 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4208 compute_mode, 1, 1);
4209 if (quotient == 0)
4210 goto fail1;
4212 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4213 && (rem_flag
4214 ? smod_pow2_cheap (speed, compute_mode)
4215 : sdiv_pow2_cheap (speed, compute_mode))
4216 /* We assume that cheap metric is true if the
4217 optab has an expander for this mode. */
4218 && ((optab_handler ((rem_flag ? smod_optab
4219 : sdiv_optab),
4220 compute_mode)
4221 != CODE_FOR_nothing)
4222 || (optab_handler (sdivmod_optab,
4223 compute_mode)
4224 != CODE_FOR_nothing)))
4226 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4228 if (rem_flag)
4230 remainder = expand_smod_pow2 (compute_mode, op0, d);
4231 if (remainder)
4232 return gen_lowpart (mode, remainder);
4235 if (sdiv_pow2_cheap (speed, compute_mode)
4236 && ((optab_handler (sdiv_optab, compute_mode)
4237 != CODE_FOR_nothing)
4238 || (optab_handler (sdivmod_optab, compute_mode)
4239 != CODE_FOR_nothing)))
4240 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4241 compute_mode, op0,
4242 gen_int_mode (abs_d,
4243 compute_mode),
4244 NULL_RTX, 0);
4245 else
4246 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4248 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4249 negate the quotient. */
4250 if (d < 0)
4252 insn = get_last_insn ();
4253 if (insn != last
4254 && abs_d < ((unsigned HOST_WIDE_INT) 1
4255 << (HOST_BITS_PER_WIDE_INT - 1)))
4256 set_dst_reg_note (insn, REG_EQUAL,
4257 gen_rtx_DIV (compute_mode, op0,
4258 gen_int_mode
4259 (abs_d,
4260 compute_mode)),
4261 quotient);
4263 quotient = expand_unop (compute_mode, neg_optab,
4264 quotient, quotient, 0);
4267 else if (size <= HOST_BITS_PER_WIDE_INT)
4269 choose_multiplier (abs_d, size, size - 1,
4270 &ml, &post_shift, &lgup);
4271 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4273 rtx t1, t2, t3;
4275 if (post_shift >= BITS_PER_WORD
4276 || size - 1 >= BITS_PER_WORD)
4277 goto fail1;
4279 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4280 + shift_cost (speed, compute_mode, size - 1)
4281 + add_cost (speed, compute_mode));
4282 t1 = expmed_mult_highpart
4283 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4284 NULL_RTX, 0, max_cost - extra_cost);
4285 if (t1 == 0)
4286 goto fail1;
4287 t2 = expand_shift
4288 (RSHIFT_EXPR, compute_mode, t1,
4289 post_shift, NULL_RTX, 0);
4290 t3 = expand_shift
4291 (RSHIFT_EXPR, compute_mode, op0,
4292 size - 1, NULL_RTX, 0);
4293 if (d < 0)
4294 quotient
4295 = force_operand (gen_rtx_MINUS (compute_mode,
4296 t3, t2),
4297 tquotient);
4298 else
4299 quotient
4300 = force_operand (gen_rtx_MINUS (compute_mode,
4301 t2, t3),
4302 tquotient);
4304 else
4306 rtx t1, t2, t3, t4;
4308 if (post_shift >= BITS_PER_WORD
4309 || size - 1 >= BITS_PER_WORD)
4310 goto fail1;
4312 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4313 mlr = gen_int_mode (ml, compute_mode);
4314 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4315 + shift_cost (speed, compute_mode, size - 1)
4316 + 2 * add_cost (speed, compute_mode));
4317 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4318 NULL_RTX, 0,
4319 max_cost - extra_cost);
4320 if (t1 == 0)
4321 goto fail1;
4322 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4323 t1, op0),
4324 NULL_RTX);
4325 t3 = expand_shift
4326 (RSHIFT_EXPR, compute_mode, t2,
4327 post_shift, NULL_RTX, 0);
4328 t4 = expand_shift
4329 (RSHIFT_EXPR, compute_mode, op0,
4330 size - 1, NULL_RTX, 0);
4331 if (d < 0)
4332 quotient
4333 = force_operand (gen_rtx_MINUS (compute_mode,
4334 t4, t3),
4335 tquotient);
4336 else
4337 quotient
4338 = force_operand (gen_rtx_MINUS (compute_mode,
4339 t3, t4),
4340 tquotient);
4343 else /* Too wide mode to use tricky code */
4344 break;
4346 insn = get_last_insn ();
4347 if (insn != last)
4348 set_dst_reg_note (insn, REG_EQUAL,
4349 gen_rtx_DIV (compute_mode, op0, op1),
4350 quotient);
4352 break;
4354 fail1:
4355 delete_insns_since (last);
4356 break;
4358 case FLOOR_DIV_EXPR:
4359 case FLOOR_MOD_EXPR:
4360 /* We will come here only for signed operations. */
4361 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4363 unsigned HOST_WIDE_INT mh, ml;
4364 int pre_shift, lgup, post_shift;
4365 HOST_WIDE_INT d = INTVAL (op1);
4367 if (d > 0)
4369 /* We could just as easily deal with negative constants here,
4370 but it does not seem worth the trouble for GCC 2.6. */
4371 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4373 pre_shift = floor_log2 (d);
4374 if (rem_flag)
4376 unsigned HOST_WIDE_INT mask
4377 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4378 remainder = expand_binop
4379 (compute_mode, and_optab, op0,
4380 gen_int_mode (mask, compute_mode),
4381 remainder, 0, OPTAB_LIB_WIDEN);
4382 if (remainder)
4383 return gen_lowpart (mode, remainder);
4385 quotient = expand_shift
4386 (RSHIFT_EXPR, compute_mode, op0,
4387 pre_shift, tquotient, 0);
4389 else
4391 rtx t1, t2, t3, t4;
4393 mh = choose_multiplier (d, size, size - 1,
4394 &ml, &post_shift, &lgup);
4395 gcc_assert (!mh);
4397 if (post_shift < BITS_PER_WORD
4398 && size - 1 < BITS_PER_WORD)
4400 t1 = expand_shift
4401 (RSHIFT_EXPR, compute_mode, op0,
4402 size - 1, NULL_RTX, 0);
4403 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4404 NULL_RTX, 0, OPTAB_WIDEN);
4405 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4406 + shift_cost (speed, compute_mode, size - 1)
4407 + 2 * add_cost (speed, compute_mode));
4408 t3 = expmed_mult_highpart
4409 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4410 NULL_RTX, 1, max_cost - extra_cost);
4411 if (t3 != 0)
4413 t4 = expand_shift
4414 (RSHIFT_EXPR, compute_mode, t3,
4415 post_shift, NULL_RTX, 1);
4416 quotient = expand_binop (compute_mode, xor_optab,
4417 t4, t1, tquotient, 0,
4418 OPTAB_WIDEN);
4423 else
4425 rtx nsign, t1, t2, t3, t4;
4426 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4427 op0, constm1_rtx), NULL_RTX);
4428 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4429 0, OPTAB_WIDEN);
4430 nsign = expand_shift
4431 (RSHIFT_EXPR, compute_mode, t2,
4432 size - 1, NULL_RTX, 0);
4433 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4434 NULL_RTX);
4435 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4436 NULL_RTX, 0);
4437 if (t4)
4439 rtx t5;
4440 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4441 NULL_RTX, 0);
4442 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4443 t4, t5),
4444 tquotient);
4449 if (quotient != 0)
4450 break;
4451 delete_insns_since (last);
4453 /* Try using an instruction that produces both the quotient and
4454 remainder, using truncation. We can easily compensate the quotient
4455 or remainder to get floor rounding, once we have the remainder.
4456 Notice that we compute also the final remainder value here,
4457 and return the result right away. */
4458 if (target == 0 || GET_MODE (target) != compute_mode)
4459 target = gen_reg_rtx (compute_mode);
4461 if (rem_flag)
4463 remainder
4464 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4465 quotient = gen_reg_rtx (compute_mode);
4467 else
4469 quotient
4470 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4471 remainder = gen_reg_rtx (compute_mode);
4474 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4475 quotient, remainder, 0))
4477 /* This could be computed with a branch-less sequence.
4478 Save that for later. */
4479 rtx tem;
4480 rtx label = gen_label_rtx ();
4481 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4482 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4483 NULL_RTX, 0, OPTAB_WIDEN);
4484 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4485 expand_dec (quotient, const1_rtx);
4486 expand_inc (remainder, op1);
4487 emit_label (label);
4488 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4491 /* No luck with division elimination or divmod. Have to do it
4492 by conditionally adjusting op0 *and* the result. */
4494 rtx label1, label2, label3, label4, label5;
4495 rtx adjusted_op0;
4496 rtx tem;
4498 quotient = gen_reg_rtx (compute_mode);
4499 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4500 label1 = gen_label_rtx ();
4501 label2 = gen_label_rtx ();
4502 label3 = gen_label_rtx ();
4503 label4 = gen_label_rtx ();
4504 label5 = gen_label_rtx ();
4505 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4506 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4507 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4508 quotient, 0, OPTAB_LIB_WIDEN);
4509 if (tem != quotient)
4510 emit_move_insn (quotient, tem);
4511 emit_jump_insn (gen_jump (label5));
4512 emit_barrier ();
4513 emit_label (label1);
4514 expand_inc (adjusted_op0, const1_rtx);
4515 emit_jump_insn (gen_jump (label4));
4516 emit_barrier ();
4517 emit_label (label2);
4518 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4519 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4520 quotient, 0, OPTAB_LIB_WIDEN);
4521 if (tem != quotient)
4522 emit_move_insn (quotient, tem);
4523 emit_jump_insn (gen_jump (label5));
4524 emit_barrier ();
4525 emit_label (label3);
4526 expand_dec (adjusted_op0, const1_rtx);
4527 emit_label (label4);
4528 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4529 quotient, 0, OPTAB_LIB_WIDEN);
4530 if (tem != quotient)
4531 emit_move_insn (quotient, tem);
4532 expand_dec (quotient, const1_rtx);
4533 emit_label (label5);
4535 break;
4537 case CEIL_DIV_EXPR:
4538 case CEIL_MOD_EXPR:
4539 if (unsignedp)
4541 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4543 rtx t1, t2, t3;
4544 unsigned HOST_WIDE_INT d = INTVAL (op1);
4545 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4546 floor_log2 (d), tquotient, 1);
4547 t2 = expand_binop (compute_mode, and_optab, op0,
4548 gen_int_mode (d - 1, compute_mode),
4549 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4550 t3 = gen_reg_rtx (compute_mode);
4551 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4552 compute_mode, 1, 1);
4553 if (t3 == 0)
4555 rtx lab;
4556 lab = gen_label_rtx ();
4557 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4558 expand_inc (t1, const1_rtx);
4559 emit_label (lab);
4560 quotient = t1;
4562 else
4563 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4564 t1, t3),
4565 tquotient);
4566 break;
4569 /* Try using an instruction that produces both the quotient and
4570 remainder, using truncation. We can easily compensate the
4571 quotient or remainder to get ceiling rounding, once we have the
4572 remainder. Notice that we compute also the final remainder
4573 value here, and return the result right away. */
4574 if (target == 0 || GET_MODE (target) != compute_mode)
4575 target = gen_reg_rtx (compute_mode);
4577 if (rem_flag)
4579 remainder = (REG_P (target)
4580 ? target : gen_reg_rtx (compute_mode));
4581 quotient = gen_reg_rtx (compute_mode);
4583 else
4585 quotient = (REG_P (target)
4586 ? target : gen_reg_rtx (compute_mode));
4587 remainder = gen_reg_rtx (compute_mode);
4590 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4591 remainder, 1))
4593 /* This could be computed with a branch-less sequence.
4594 Save that for later. */
4595 rtx label = gen_label_rtx ();
4596 do_cmp_and_jump (remainder, const0_rtx, EQ,
4597 compute_mode, label);
4598 expand_inc (quotient, const1_rtx);
4599 expand_dec (remainder, op1);
4600 emit_label (label);
4601 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4604 /* No luck with division elimination or divmod. Have to do it
4605 by conditionally adjusting op0 *and* the result. */
4607 rtx label1, label2;
4608 rtx adjusted_op0, tem;
4610 quotient = gen_reg_rtx (compute_mode);
4611 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4612 label1 = gen_label_rtx ();
4613 label2 = gen_label_rtx ();
4614 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4615 compute_mode, label1);
4616 emit_move_insn (quotient, const0_rtx);
4617 emit_jump_insn (gen_jump (label2));
4618 emit_barrier ();
4619 emit_label (label1);
4620 expand_dec (adjusted_op0, const1_rtx);
4621 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4622 quotient, 1, OPTAB_LIB_WIDEN);
4623 if (tem != quotient)
4624 emit_move_insn (quotient, tem);
4625 expand_inc (quotient, const1_rtx);
4626 emit_label (label2);
4629 else /* signed */
4631 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4632 && INTVAL (op1) >= 0)
4634 /* This is extremely similar to the code for the unsigned case
4635 above. For 2.7 we should merge these variants, but for
4636 2.6.1 I don't want to touch the code for unsigned since that
4637 get used in C. The signed case will only be used by other
4638 languages (Ada). */
4640 rtx t1, t2, t3;
4641 unsigned HOST_WIDE_INT d = INTVAL (op1);
4642 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4643 floor_log2 (d), tquotient, 0);
4644 t2 = expand_binop (compute_mode, and_optab, op0,
4645 gen_int_mode (d - 1, compute_mode),
4646 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4647 t3 = gen_reg_rtx (compute_mode);
4648 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4649 compute_mode, 1, 1);
4650 if (t3 == 0)
4652 rtx lab;
4653 lab = gen_label_rtx ();
4654 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4655 expand_inc (t1, const1_rtx);
4656 emit_label (lab);
4657 quotient = t1;
4659 else
4660 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4661 t1, t3),
4662 tquotient);
4663 break;
4666 /* Try using an instruction that produces both the quotient and
4667 remainder, using truncation. We can easily compensate the
4668 quotient or remainder to get ceiling rounding, once we have the
4669 remainder. Notice that we compute also the final remainder
4670 value here, and return the result right away. */
4671 if (target == 0 || GET_MODE (target) != compute_mode)
4672 target = gen_reg_rtx (compute_mode);
4673 if (rem_flag)
4675 remainder= (REG_P (target)
4676 ? target : gen_reg_rtx (compute_mode));
4677 quotient = gen_reg_rtx (compute_mode);
4679 else
4681 quotient = (REG_P (target)
4682 ? target : gen_reg_rtx (compute_mode));
4683 remainder = gen_reg_rtx (compute_mode);
4686 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4687 remainder, 0))
4689 /* This could be computed with a branch-less sequence.
4690 Save that for later. */
4691 rtx tem;
4692 rtx label = gen_label_rtx ();
4693 do_cmp_and_jump (remainder, const0_rtx, EQ,
4694 compute_mode, label);
4695 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4696 NULL_RTX, 0, OPTAB_WIDEN);
4697 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4698 expand_inc (quotient, const1_rtx);
4699 expand_dec (remainder, op1);
4700 emit_label (label);
4701 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4704 /* No luck with division elimination or divmod. Have to do it
4705 by conditionally adjusting op0 *and* the result. */
4707 rtx label1, label2, label3, label4, label5;
4708 rtx adjusted_op0;
4709 rtx tem;
4711 quotient = gen_reg_rtx (compute_mode);
4712 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4713 label1 = gen_label_rtx ();
4714 label2 = gen_label_rtx ();
4715 label3 = gen_label_rtx ();
4716 label4 = gen_label_rtx ();
4717 label5 = gen_label_rtx ();
4718 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4719 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4720 compute_mode, label1);
4721 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4722 quotient, 0, OPTAB_LIB_WIDEN);
4723 if (tem != quotient)
4724 emit_move_insn (quotient, tem);
4725 emit_jump_insn (gen_jump (label5));
4726 emit_barrier ();
4727 emit_label (label1);
4728 expand_dec (adjusted_op0, const1_rtx);
4729 emit_jump_insn (gen_jump (label4));
4730 emit_barrier ();
4731 emit_label (label2);
4732 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4733 compute_mode, label3);
4734 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4735 quotient, 0, OPTAB_LIB_WIDEN);
4736 if (tem != quotient)
4737 emit_move_insn (quotient, tem);
4738 emit_jump_insn (gen_jump (label5));
4739 emit_barrier ();
4740 emit_label (label3);
4741 expand_inc (adjusted_op0, const1_rtx);
4742 emit_label (label4);
4743 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4744 quotient, 0, OPTAB_LIB_WIDEN);
4745 if (tem != quotient)
4746 emit_move_insn (quotient, tem);
4747 expand_inc (quotient, const1_rtx);
4748 emit_label (label5);
4751 break;
4753 case EXACT_DIV_EXPR:
4754 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4756 HOST_WIDE_INT d = INTVAL (op1);
4757 unsigned HOST_WIDE_INT ml;
4758 int pre_shift;
4759 rtx t1;
4761 pre_shift = floor_log2 (d & -d);
4762 ml = invert_mod2n (d >> pre_shift, size);
4763 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4764 pre_shift, NULL_RTX, unsignedp);
4765 quotient = expand_mult (compute_mode, t1,
4766 gen_int_mode (ml, compute_mode),
4767 NULL_RTX, 1);
4769 insn = get_last_insn ();
4770 set_dst_reg_note (insn, REG_EQUAL,
4771 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4772 compute_mode, op0, op1),
4773 quotient);
4775 break;
4777 case ROUND_DIV_EXPR:
4778 case ROUND_MOD_EXPR:
4779 if (unsignedp)
4781 rtx tem;
4782 rtx label;
4783 label = gen_label_rtx ();
4784 quotient = gen_reg_rtx (compute_mode);
4785 remainder = gen_reg_rtx (compute_mode);
4786 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4788 rtx tem;
4789 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4790 quotient, 1, OPTAB_LIB_WIDEN);
4791 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4792 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4793 remainder, 1, OPTAB_LIB_WIDEN);
4795 tem = plus_constant (compute_mode, op1, -1);
4796 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4797 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4798 expand_inc (quotient, const1_rtx);
4799 expand_dec (remainder, op1);
4800 emit_label (label);
4802 else
4804 rtx abs_rem, abs_op1, tem, mask;
4805 rtx label;
4806 label = gen_label_rtx ();
4807 quotient = gen_reg_rtx (compute_mode);
4808 remainder = gen_reg_rtx (compute_mode);
4809 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4811 rtx tem;
4812 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4813 quotient, 0, OPTAB_LIB_WIDEN);
4814 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4815 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4816 remainder, 0, OPTAB_LIB_WIDEN);
4818 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4819 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4820 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4821 1, NULL_RTX, 1);
4822 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4823 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4824 NULL_RTX, 0, OPTAB_WIDEN);
4825 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4826 size - 1, NULL_RTX, 0);
4827 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4828 NULL_RTX, 0, OPTAB_WIDEN);
4829 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4830 NULL_RTX, 0, OPTAB_WIDEN);
4831 expand_inc (quotient, tem);
4832 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4833 NULL_RTX, 0, OPTAB_WIDEN);
4834 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4835 NULL_RTX, 0, OPTAB_WIDEN);
4836 expand_dec (remainder, tem);
4837 emit_label (label);
4839 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4841 default:
4842 gcc_unreachable ();
4845 if (quotient == 0)
4847 if (target && GET_MODE (target) != compute_mode)
4848 target = 0;
4850 if (rem_flag)
4852 /* Try to produce the remainder without producing the quotient.
4853 If we seem to have a divmod pattern that does not require widening,
4854 don't try widening here. We should really have a WIDEN argument
4855 to expand_twoval_binop, since what we'd really like to do here is
4856 1) try a mod insn in compute_mode
4857 2) try a divmod insn in compute_mode
4858 3) try a div insn in compute_mode and multiply-subtract to get
4859 remainder
4860 4) try the same things with widening allowed. */
4861 remainder
4862 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4863 op0, op1, target,
4864 unsignedp,
4865 ((optab_handler (optab2, compute_mode)
4866 != CODE_FOR_nothing)
4867 ? OPTAB_DIRECT : OPTAB_WIDEN));
4868 if (remainder == 0)
4870 /* No luck there. Can we do remainder and divide at once
4871 without a library call? */
4872 remainder = gen_reg_rtx (compute_mode);
4873 if (! expand_twoval_binop ((unsignedp
4874 ? udivmod_optab
4875 : sdivmod_optab),
4876 op0, op1,
4877 NULL_RTX, remainder, unsignedp))
4878 remainder = 0;
4881 if (remainder)
4882 return gen_lowpart (mode, remainder);
4885 /* Produce the quotient. Try a quotient insn, but not a library call.
4886 If we have a divmod in this mode, use it in preference to widening
4887 the div (for this test we assume it will not fail). Note that optab2
4888 is set to the one of the two optabs that the call below will use. */
4889 quotient
4890 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4891 op0, op1, rem_flag ? NULL_RTX : target,
4892 unsignedp,
4893 ((optab_handler (optab2, compute_mode)
4894 != CODE_FOR_nothing)
4895 ? OPTAB_DIRECT : OPTAB_WIDEN));
4897 if (quotient == 0)
4899 /* No luck there. Try a quotient-and-remainder insn,
4900 keeping the quotient alone. */
4901 quotient = gen_reg_rtx (compute_mode);
4902 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4903 op0, op1,
4904 quotient, NULL_RTX, unsignedp))
4906 quotient = 0;
4907 if (! rem_flag)
4908 /* Still no luck. If we are not computing the remainder,
4909 use a library call for the quotient. */
4910 quotient = sign_expand_binop (compute_mode,
4911 udiv_optab, sdiv_optab,
4912 op0, op1, target,
4913 unsignedp, OPTAB_LIB_WIDEN);
4918 if (rem_flag)
4920 if (target && GET_MODE (target) != compute_mode)
4921 target = 0;
4923 if (quotient == 0)
4925 /* No divide instruction either. Use library for remainder. */
4926 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4927 op0, op1, target,
4928 unsignedp, OPTAB_LIB_WIDEN);
4929 /* No remainder function. Try a quotient-and-remainder
4930 function, keeping the remainder. */
4931 if (!remainder)
4933 remainder = gen_reg_rtx (compute_mode);
4934 if (!expand_twoval_binop_libfunc
4935 (unsignedp ? udivmod_optab : sdivmod_optab,
4936 op0, op1,
4937 NULL_RTX, remainder,
4938 unsignedp ? UMOD : MOD))
4939 remainder = NULL_RTX;
4942 else
4944 /* We divided. Now finish doing X - Y * (X / Y). */
4945 remainder = expand_mult (compute_mode, quotient, op1,
4946 NULL_RTX, unsignedp);
4947 remainder = expand_binop (compute_mode, sub_optab, op0,
4948 remainder, target, unsignedp,
4949 OPTAB_LIB_WIDEN);
4953 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4956 /* Return a tree node with data type TYPE, describing the value of X.
4957 Usually this is an VAR_DECL, if there is no obvious better choice.
4958 X may be an expression, however we only support those expressions
4959 generated by loop.c. */
4961 tree
4962 make_tree (tree type, rtx x)
4964 tree t;
4966 switch (GET_CODE (x))
4968 case CONST_INT:
4969 case CONST_WIDE_INT:
4970 t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4971 return t;
4973 case CONST_DOUBLE:
4974 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
4975 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
4976 t = wide_int_to_tree (type,
4977 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
4978 HOST_BITS_PER_WIDE_INT * 2));
4979 else
4981 REAL_VALUE_TYPE d;
4983 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4984 t = build_real (type, d);
4987 return t;
4989 case CONST_VECTOR:
4991 int units = CONST_VECTOR_NUNITS (x);
4992 tree itype = TREE_TYPE (type);
4993 tree *elts;
4994 int i;
4996 /* Build a tree with vector elements. */
4997 elts = XALLOCAVEC (tree, units);
4998 for (i = units - 1; i >= 0; --i)
5000 rtx elt = CONST_VECTOR_ELT (x, i);
5001 elts[i] = make_tree (itype, elt);
5004 return build_vector (type, elts);
5007 case PLUS:
5008 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5009 make_tree (type, XEXP (x, 1)));
5011 case MINUS:
5012 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5013 make_tree (type, XEXP (x, 1)));
5015 case NEG:
5016 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5018 case MULT:
5019 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5020 make_tree (type, XEXP (x, 1)));
5022 case ASHIFT:
5023 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5024 make_tree (type, XEXP (x, 1)));
5026 case LSHIFTRT:
5027 t = unsigned_type_for (type);
5028 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5029 make_tree (t, XEXP (x, 0)),
5030 make_tree (type, XEXP (x, 1))));
5032 case ASHIFTRT:
5033 t = signed_type_for (type);
5034 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5035 make_tree (t, XEXP (x, 0)),
5036 make_tree (type, XEXP (x, 1))));
5038 case DIV:
5039 if (TREE_CODE (type) != REAL_TYPE)
5040 t = signed_type_for (type);
5041 else
5042 t = type;
5044 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5045 make_tree (t, XEXP (x, 0)),
5046 make_tree (t, XEXP (x, 1))));
5047 case UDIV:
5048 t = unsigned_type_for (type);
5049 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5050 make_tree (t, XEXP (x, 0)),
5051 make_tree (t, XEXP (x, 1))));
5053 case SIGN_EXTEND:
5054 case ZERO_EXTEND:
5055 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5056 GET_CODE (x) == ZERO_EXTEND);
5057 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5059 case CONST:
5060 return make_tree (type, XEXP (x, 0));
5062 case SYMBOL_REF:
5063 t = SYMBOL_REF_DECL (x);
5064 if (t)
5065 return fold_convert (type, build_fold_addr_expr (t));
5066 /* else fall through. */
5068 default:
5069 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5071 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5072 address mode to pointer mode. */
5073 if (POINTER_TYPE_P (type))
5074 x = convert_memory_address_addr_space
5075 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5077 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5078 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5079 t->decl_with_rtl.rtl = x;
5081 return t;
5085 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5086 and returning TARGET.
5088 If TARGET is 0, a pseudo-register or constant is returned. */
5091 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5093 rtx tem = 0;
5095 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5096 tem = simplify_binary_operation (AND, mode, op0, op1);
5097 if (tem == 0)
5098 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5100 if (target == 0)
5101 target = tem;
5102 else if (tem != target)
5103 emit_move_insn (target, tem);
5104 return target;
5107 /* Helper function for emit_store_flag. */
5108 static rtx
5109 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5110 enum machine_mode mode, enum machine_mode compare_mode,
5111 int unsignedp, rtx x, rtx y, int normalizep,
5112 enum machine_mode target_mode)
5114 struct expand_operand ops[4];
5115 rtx op0, last, comparison, subtarget;
5116 enum machine_mode result_mode = targetm.cstore_mode (icode);
5118 last = get_last_insn ();
5119 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5120 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5121 if (!x || !y)
5123 delete_insns_since (last);
5124 return NULL_RTX;
5127 if (target_mode == VOIDmode)
5128 target_mode = result_mode;
5129 if (!target)
5130 target = gen_reg_rtx (target_mode);
5132 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5134 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5135 create_fixed_operand (&ops[1], comparison);
5136 create_fixed_operand (&ops[2], x);
5137 create_fixed_operand (&ops[3], y);
5138 if (!maybe_expand_insn (icode, 4, ops))
5140 delete_insns_since (last);
5141 return NULL_RTX;
5143 subtarget = ops[0].value;
5145 /* If we are converting to a wider mode, first convert to
5146 TARGET_MODE, then normalize. This produces better combining
5147 opportunities on machines that have a SIGN_EXTRACT when we are
5148 testing a single bit. This mostly benefits the 68k.
5150 If STORE_FLAG_VALUE does not have the sign bit set when
5151 interpreted in MODE, we can do this conversion as unsigned, which
5152 is usually more efficient. */
5153 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5155 convert_move (target, subtarget,
5156 val_signbit_known_clear_p (result_mode,
5157 STORE_FLAG_VALUE));
5158 op0 = target;
5159 result_mode = target_mode;
5161 else
5162 op0 = subtarget;
5164 /* If we want to keep subexpressions around, don't reuse our last
5165 target. */
5166 if (optimize)
5167 subtarget = 0;
5169 /* Now normalize to the proper value in MODE. Sometimes we don't
5170 have to do anything. */
5171 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5173 /* STORE_FLAG_VALUE might be the most negative number, so write
5174 the comparison this way to avoid a compiler-time warning. */
5175 else if (- normalizep == STORE_FLAG_VALUE)
5176 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5178 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5179 it hard to use a value of just the sign bit due to ANSI integer
5180 constant typing rules. */
5181 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5182 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5183 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5184 normalizep == 1);
5185 else
5187 gcc_assert (STORE_FLAG_VALUE & 1);
5189 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5190 if (normalizep == -1)
5191 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5194 /* If we were converting to a smaller mode, do the conversion now. */
5195 if (target_mode != result_mode)
5197 convert_move (target, op0, 0);
5198 return target;
5200 else
5201 return op0;
5205 /* A subroutine of emit_store_flag only including "tricks" that do not
5206 need a recursive call. These are kept separate to avoid infinite
5207 loops. */
5209 static rtx
5210 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5211 enum machine_mode mode, int unsignedp, int normalizep,
5212 enum machine_mode target_mode)
5214 rtx subtarget;
5215 enum insn_code icode;
5216 enum machine_mode compare_mode;
5217 enum mode_class mclass;
5218 enum rtx_code scode;
5219 rtx tem;
5221 if (unsignedp)
5222 code = unsigned_condition (code);
5223 scode = swap_condition (code);
5225 /* If one operand is constant, make it the second one. Only do this
5226 if the other operand is not constant as well. */
5228 if (swap_commutative_operands_p (op0, op1))
5230 tem = op0;
5231 op0 = op1;
5232 op1 = tem;
5233 code = swap_condition (code);
5236 if (mode == VOIDmode)
5237 mode = GET_MODE (op0);
5239 /* For some comparisons with 1 and -1, we can convert this to
5240 comparisons with zero. This will often produce more opportunities for
5241 store-flag insns. */
5243 switch (code)
5245 case LT:
5246 if (op1 == const1_rtx)
5247 op1 = const0_rtx, code = LE;
5248 break;
5249 case LE:
5250 if (op1 == constm1_rtx)
5251 op1 = const0_rtx, code = LT;
5252 break;
5253 case GE:
5254 if (op1 == const1_rtx)
5255 op1 = const0_rtx, code = GT;
5256 break;
5257 case GT:
5258 if (op1 == constm1_rtx)
5259 op1 = const0_rtx, code = GE;
5260 break;
5261 case GEU:
5262 if (op1 == const1_rtx)
5263 op1 = const0_rtx, code = NE;
5264 break;
5265 case LTU:
5266 if (op1 == const1_rtx)
5267 op1 = const0_rtx, code = EQ;
5268 break;
5269 default:
5270 break;
5273 /* If we are comparing a double-word integer with zero or -1, we can
5274 convert the comparison into one involving a single word. */
5275 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5276 && GET_MODE_CLASS (mode) == MODE_INT
5277 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5279 if ((code == EQ || code == NE)
5280 && (op1 == const0_rtx || op1 == constm1_rtx))
5282 rtx op00, op01;
5284 /* Do a logical OR or AND of the two words and compare the
5285 result. */
5286 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5287 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5288 tem = expand_binop (word_mode,
5289 op1 == const0_rtx ? ior_optab : and_optab,
5290 op00, op01, NULL_RTX, unsignedp,
5291 OPTAB_DIRECT);
5293 if (tem != 0)
5294 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5295 unsignedp, normalizep);
5297 else if ((code == LT || code == GE) && op1 == const0_rtx)
5299 rtx op0h;
5301 /* If testing the sign bit, can just test on high word. */
5302 op0h = simplify_gen_subreg (word_mode, op0, mode,
5303 subreg_highpart_offset (word_mode,
5304 mode));
5305 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5306 unsignedp, normalizep);
5308 else
5309 tem = NULL_RTX;
5311 if (tem)
5313 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5314 return tem;
5315 if (!target)
5316 target = gen_reg_rtx (target_mode);
5318 convert_move (target, tem,
5319 !val_signbit_known_set_p (word_mode,
5320 (normalizep ? normalizep
5321 : STORE_FLAG_VALUE)));
5322 return target;
5326 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5327 complement of A (for GE) and shifting the sign bit to the low bit. */
5328 if (op1 == const0_rtx && (code == LT || code == GE)
5329 && GET_MODE_CLASS (mode) == MODE_INT
5330 && (normalizep || STORE_FLAG_VALUE == 1
5331 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5333 subtarget = target;
5335 if (!target)
5336 target_mode = mode;
5338 /* If the result is to be wider than OP0, it is best to convert it
5339 first. If it is to be narrower, it is *incorrect* to convert it
5340 first. */
5341 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5343 op0 = convert_modes (target_mode, mode, op0, 0);
5344 mode = target_mode;
5347 if (target_mode != mode)
5348 subtarget = 0;
5350 if (code == GE)
5351 op0 = expand_unop (mode, one_cmpl_optab, op0,
5352 ((STORE_FLAG_VALUE == 1 || normalizep)
5353 ? 0 : subtarget), 0);
5355 if (STORE_FLAG_VALUE == 1 || normalizep)
5356 /* If we are supposed to produce a 0/1 value, we want to do
5357 a logical shift from the sign bit to the low-order bit; for
5358 a -1/0 value, we do an arithmetic shift. */
5359 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5360 GET_MODE_BITSIZE (mode) - 1,
5361 subtarget, normalizep != -1);
5363 if (mode != target_mode)
5364 op0 = convert_modes (target_mode, mode, op0, 0);
5366 return op0;
5369 mclass = GET_MODE_CLASS (mode);
5370 for (compare_mode = mode; compare_mode != VOIDmode;
5371 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5373 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5374 icode = optab_handler (cstore_optab, optab_mode);
5375 if (icode != CODE_FOR_nothing)
5377 do_pending_stack_adjust ();
5378 tem = emit_cstore (target, icode, code, mode, compare_mode,
5379 unsignedp, op0, op1, normalizep, target_mode);
5380 if (tem)
5381 return tem;
5383 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5385 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5386 unsignedp, op1, op0, normalizep, target_mode);
5387 if (tem)
5388 return tem;
5390 break;
5394 return 0;
5397 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5398 and storing in TARGET. Normally return TARGET.
5399 Return 0 if that cannot be done.
5401 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5402 it is VOIDmode, they cannot both be CONST_INT.
5404 UNSIGNEDP is for the case where we have to widen the operands
5405 to perform the operation. It says to use zero-extension.
5407 NORMALIZEP is 1 if we should convert the result to be either zero
5408 or one. Normalize is -1 if we should convert the result to be
5409 either zero or -1. If NORMALIZEP is zero, the result will be left
5410 "raw" out of the scc insn. */
5413 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5414 enum machine_mode mode, int unsignedp, int normalizep)
5416 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5417 enum rtx_code rcode;
5418 rtx subtarget;
5419 rtx tem, last, trueval;
5421 /* If we compare constants, we shouldn't use a store-flag operation,
5422 but a constant load. We can get there via the vanilla route that
5423 usually generates a compare-branch sequence, but will in this case
5424 fold the comparison to a constant, and thus elide the branch. */
5425 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5426 return NULL_RTX;
5428 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5429 target_mode);
5430 if (tem)
5431 return tem;
5433 /* If we reached here, we can't do this with a scc insn, however there
5434 are some comparisons that can be done in other ways. Don't do any
5435 of these cases if branches are very cheap. */
5436 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5437 return 0;
5439 /* See what we need to return. We can only return a 1, -1, or the
5440 sign bit. */
5442 if (normalizep == 0)
5444 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5445 normalizep = STORE_FLAG_VALUE;
5447 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5449 else
5450 return 0;
5453 last = get_last_insn ();
5455 /* If optimizing, use different pseudo registers for each insn, instead
5456 of reusing the same pseudo. This leads to better CSE, but slows
5457 down the compiler, since there are more pseudos */
5458 subtarget = (!optimize
5459 && (target_mode == mode)) ? target : NULL_RTX;
5460 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5462 /* For floating-point comparisons, try the reverse comparison or try
5463 changing the "orderedness" of the comparison. */
5464 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5466 enum rtx_code first_code;
5467 bool and_them;
5469 rcode = reverse_condition_maybe_unordered (code);
5470 if (can_compare_p (rcode, mode, ccp_store_flag)
5471 && (code == ORDERED || code == UNORDERED
5472 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5473 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5475 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5476 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5478 /* For the reverse comparison, use either an addition or a XOR. */
5479 if (want_add
5480 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5481 optimize_insn_for_speed_p ()) == 0)
5483 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5484 STORE_FLAG_VALUE, target_mode);
5485 if (tem)
5486 return expand_binop (target_mode, add_optab, tem,
5487 gen_int_mode (normalizep, target_mode),
5488 target, 0, OPTAB_WIDEN);
5490 else if (!want_add
5491 && rtx_cost (trueval, XOR, 1,
5492 optimize_insn_for_speed_p ()) == 0)
5494 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5495 normalizep, target_mode);
5496 if (tem)
5497 return expand_binop (target_mode, xor_optab, tem, trueval,
5498 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5502 delete_insns_since (last);
5504 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5505 if (code == ORDERED || code == UNORDERED)
5506 return 0;
5508 and_them = split_comparison (code, mode, &first_code, &code);
5510 /* If there are no NaNs, the first comparison should always fall through.
5511 Effectively change the comparison to the other one. */
5512 if (!HONOR_NANS (mode))
5514 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5515 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5516 target_mode);
5519 #ifdef HAVE_conditional_move
5520 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5521 conditional move. */
5522 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5523 normalizep, target_mode);
5524 if (tem == 0)
5525 return 0;
5527 if (and_them)
5528 tem = emit_conditional_move (target, code, op0, op1, mode,
5529 tem, const0_rtx, GET_MODE (tem), 0);
5530 else
5531 tem = emit_conditional_move (target, code, op0, op1, mode,
5532 trueval, tem, GET_MODE (tem), 0);
5534 if (tem == 0)
5535 delete_insns_since (last);
5536 return tem;
5537 #else
5538 return 0;
5539 #endif
5542 /* The remaining tricks only apply to integer comparisons. */
5544 if (GET_MODE_CLASS (mode) != MODE_INT)
5545 return 0;
5547 /* If this is an equality comparison of integers, we can try to exclusive-or
5548 (or subtract) the two operands and use a recursive call to try the
5549 comparison with zero. Don't do any of these cases if branches are
5550 very cheap. */
5552 if ((code == EQ || code == NE) && op1 != const0_rtx)
5554 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5555 OPTAB_WIDEN);
5557 if (tem == 0)
5558 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5559 OPTAB_WIDEN);
5560 if (tem != 0)
5561 tem = emit_store_flag (target, code, tem, const0_rtx,
5562 mode, unsignedp, normalizep);
5563 if (tem != 0)
5564 return tem;
5566 delete_insns_since (last);
5569 /* For integer comparisons, try the reverse comparison. However, for
5570 small X and if we'd have anyway to extend, implementing "X != 0"
5571 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5572 rcode = reverse_condition (code);
5573 if (can_compare_p (rcode, mode, ccp_store_flag)
5574 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5575 && code == NE
5576 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5577 && op1 == const0_rtx))
5579 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5580 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5582 /* Again, for the reverse comparison, use either an addition or a XOR. */
5583 if (want_add
5584 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5585 optimize_insn_for_speed_p ()) == 0)
5587 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5588 STORE_FLAG_VALUE, target_mode);
5589 if (tem != 0)
5590 tem = expand_binop (target_mode, add_optab, tem,
5591 gen_int_mode (normalizep, target_mode),
5592 target, 0, OPTAB_WIDEN);
5594 else if (!want_add
5595 && rtx_cost (trueval, XOR, 1,
5596 optimize_insn_for_speed_p ()) == 0)
5598 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5599 normalizep, target_mode);
5600 if (tem != 0)
5601 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5602 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5605 if (tem != 0)
5606 return tem;
5607 delete_insns_since (last);
5610 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5611 the constant zero. Reject all other comparisons at this point. Only
5612 do LE and GT if branches are expensive since they are expensive on
5613 2-operand machines. */
5615 if (op1 != const0_rtx
5616 || (code != EQ && code != NE
5617 && (BRANCH_COST (optimize_insn_for_speed_p (),
5618 false) <= 1 || (code != LE && code != GT))))
5619 return 0;
5621 /* Try to put the result of the comparison in the sign bit. Assume we can't
5622 do the necessary operation below. */
5624 tem = 0;
5626 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5627 the sign bit set. */
5629 if (code == LE)
5631 /* This is destructive, so SUBTARGET can't be OP0. */
5632 if (rtx_equal_p (subtarget, op0))
5633 subtarget = 0;
5635 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5636 OPTAB_WIDEN);
5637 if (tem)
5638 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5639 OPTAB_WIDEN);
5642 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5643 number of bits in the mode of OP0, minus one. */
5645 if (code == GT)
5647 if (rtx_equal_p (subtarget, op0))
5648 subtarget = 0;
5650 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5651 GET_MODE_BITSIZE (mode) - 1,
5652 subtarget, 0);
5653 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5654 OPTAB_WIDEN);
5657 if (code == EQ || code == NE)
5659 /* For EQ or NE, one way to do the comparison is to apply an operation
5660 that converts the operand into a positive number if it is nonzero
5661 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5662 for NE we negate. This puts the result in the sign bit. Then we
5663 normalize with a shift, if needed.
5665 Two operations that can do the above actions are ABS and FFS, so try
5666 them. If that doesn't work, and MODE is smaller than a full word,
5667 we can use zero-extension to the wider mode (an unsigned conversion)
5668 as the operation. */
5670 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5671 that is compensated by the subsequent overflow when subtracting
5672 one / negating. */
5674 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5675 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5676 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5677 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5678 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5680 tem = convert_modes (word_mode, mode, op0, 1);
5681 mode = word_mode;
5684 if (tem != 0)
5686 if (code == EQ)
5687 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5688 0, OPTAB_WIDEN);
5689 else
5690 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5693 /* If we couldn't do it that way, for NE we can "or" the two's complement
5694 of the value with itself. For EQ, we take the one's complement of
5695 that "or", which is an extra insn, so we only handle EQ if branches
5696 are expensive. */
5698 if (tem == 0
5699 && (code == NE
5700 || BRANCH_COST (optimize_insn_for_speed_p (),
5701 false) > 1))
5703 if (rtx_equal_p (subtarget, op0))
5704 subtarget = 0;
5706 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5707 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5708 OPTAB_WIDEN);
5710 if (tem && code == EQ)
5711 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5715 if (tem && normalizep)
5716 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5717 GET_MODE_BITSIZE (mode) - 1,
5718 subtarget, normalizep == 1);
5720 if (tem)
5722 if (!target)
5724 else if (GET_MODE (tem) != target_mode)
5726 convert_move (target, tem, 0);
5727 tem = target;
5729 else if (!subtarget)
5731 emit_move_insn (target, tem);
5732 tem = target;
5735 else
5736 delete_insns_since (last);
5738 return tem;
5741 /* Like emit_store_flag, but always succeeds. */
5744 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5745 enum machine_mode mode, int unsignedp, int normalizep)
5747 rtx tem, label;
5748 rtx trueval, falseval;
5750 /* First see if emit_store_flag can do the job. */
5751 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5752 if (tem != 0)
5753 return tem;
5755 if (!target)
5756 target = gen_reg_rtx (word_mode);
5758 /* If this failed, we have to do this with set/compare/jump/set code.
5759 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5760 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5761 if (code == NE
5762 && GET_MODE_CLASS (mode) == MODE_INT
5763 && REG_P (target)
5764 && op0 == target
5765 && op1 == const0_rtx)
5767 label = gen_label_rtx ();
5768 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5769 mode, NULL_RTX, NULL_RTX, label, -1);
5770 emit_move_insn (target, trueval);
5771 emit_label (label);
5772 return target;
5775 if (!REG_P (target)
5776 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5777 target = gen_reg_rtx (GET_MODE (target));
5779 /* Jump in the right direction if the target cannot implement CODE
5780 but can jump on its reverse condition. */
5781 falseval = const0_rtx;
5782 if (! can_compare_p (code, mode, ccp_jump)
5783 && (! FLOAT_MODE_P (mode)
5784 || code == ORDERED || code == UNORDERED
5785 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5786 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5788 enum rtx_code rcode;
5789 if (FLOAT_MODE_P (mode))
5790 rcode = reverse_condition_maybe_unordered (code);
5791 else
5792 rcode = reverse_condition (code);
5794 /* Canonicalize to UNORDERED for the libcall. */
5795 if (can_compare_p (rcode, mode, ccp_jump)
5796 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5798 falseval = trueval;
5799 trueval = const0_rtx;
5800 code = rcode;
5804 emit_move_insn (target, trueval);
5805 label = gen_label_rtx ();
5806 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5807 NULL_RTX, label, -1);
5809 emit_move_insn (target, falseval);
5810 emit_label (label);
5812 return target;
5815 /* Perform possibly multi-word comparison and conditional jump to LABEL
5816 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5817 now a thin wrapper around do_compare_rtx_and_jump. */
5819 static void
5820 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5821 rtx label)
5823 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5824 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5825 NULL_RTX, NULL_RTX, label, -1);