2012-12-14 Steve Ellcey <sellcey@mips.com>
[official-gcc.git] / gcc / expmed.c
blobd75f031a8c377693c7cf54803568d224a80509c3
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011, 2012
6 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 rtx);
53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 rtx);
58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
59 unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT, rtx, int, bool);
61 static rtx mask_rtx (enum machine_mode, int, int, int);
62 static rtx lshift_value (enum machine_mode, rtx, int, int);
63 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT, int);
65 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
66 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
67 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 /* Test whether a value is zero of a power of two. */
70 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
72 struct init_expmed_rtl
74 struct rtx_def reg; rtunion reg_fld[2];
75 struct rtx_def plus; rtunion plus_fld1;
76 struct rtx_def neg;
77 struct rtx_def mult; rtunion mult_fld1;
78 struct rtx_def sdiv; rtunion sdiv_fld1;
79 struct rtx_def udiv; rtunion udiv_fld1;
80 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
81 struct rtx_def smod_32; rtunion smod_32_fld1;
82 struct rtx_def wide_mult; rtunion wide_mult_fld1;
83 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
84 struct rtx_def wide_trunc;
85 struct rtx_def shift; rtunion shift_fld1;
86 struct rtx_def shift_mult; rtunion shift_mult_fld1;
87 struct rtx_def shift_add; rtunion shift_add_fld1;
88 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
89 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
90 struct rtx_def zext;
91 struct rtx_def trunc;
93 rtx pow2[MAX_BITS_PER_WORD];
94 rtx cint[MAX_BITS_PER_WORD];
97 static void
98 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
99 enum machine_mode from_mode, bool speed)
101 int to_size, from_size;
102 rtx which;
104 /* We're given no information about the true size of a partial integer,
105 only the size of the "full" integer it requires for storage. For
106 comparison purposes here, reduce the bit size by one in that case. */
107 to_size = (GET_MODE_BITSIZE (to_mode)
108 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
109 from_size = (GET_MODE_BITSIZE (from_mode)
110 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
112 /* Assume cost of zero-extend and sign-extend is the same. */
113 which = (to_size < from_size ? &all->trunc : &all->zext);
115 PUT_MODE (&all->reg, from_mode);
116 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
119 static void
120 init_expmed_one_mode (struct init_expmed_rtl *all,
121 enum machine_mode mode, int speed)
123 int m, n, mode_bitsize;
124 enum machine_mode mode_from;
126 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
128 PUT_MODE (&all->reg, mode);
129 PUT_MODE (&all->plus, mode);
130 PUT_MODE (&all->neg, mode);
131 PUT_MODE (&all->mult, mode);
132 PUT_MODE (&all->sdiv, mode);
133 PUT_MODE (&all->udiv, mode);
134 PUT_MODE (&all->sdiv_32, mode);
135 PUT_MODE (&all->smod_32, mode);
136 PUT_MODE (&all->wide_trunc, mode);
137 PUT_MODE (&all->shift, mode);
138 PUT_MODE (&all->shift_mult, mode);
139 PUT_MODE (&all->shift_add, mode);
140 PUT_MODE (&all->shift_sub0, mode);
141 PUT_MODE (&all->shift_sub1, mode);
142 PUT_MODE (&all->zext, mode);
143 PUT_MODE (&all->trunc, mode);
145 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
146 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
147 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
148 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
149 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
151 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
152 <= 2 * add_cost (speed, mode)));
153 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
154 <= 4 * add_cost (speed, mode)));
156 set_shift_cost (speed, mode, 0, 0);
158 int cost = add_cost (speed, mode);
159 set_shiftadd_cost (speed, mode, 0, cost);
160 set_shiftsub0_cost (speed, mode, 0, cost);
161 set_shiftsub1_cost (speed, mode, 0, cost);
164 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
165 for (m = 1; m < n; m++)
167 XEXP (&all->shift, 1) = all->cint[m];
168 XEXP (&all->shift_mult, 1) = all->pow2[m];
170 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
171 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
172 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
173 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
176 if (SCALAR_INT_MODE_P (mode))
178 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
179 mode_from = (enum machine_mode)(mode_from + 1))
180 init_expmed_one_conv (all, mode, mode_from, speed);
182 if (GET_MODE_CLASS (mode) == MODE_INT)
184 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
185 if (wider_mode != VOIDmode)
187 PUT_MODE (&all->zext, wider_mode);
188 PUT_MODE (&all->wide_mult, wider_mode);
189 PUT_MODE (&all->wide_lshr, wider_mode);
190 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
192 set_mul_widen_cost (speed, wider_mode,
193 set_src_cost (&all->wide_mult, speed));
194 set_mul_highpart_cost (speed, mode,
195 set_src_cost (&all->wide_trunc, speed));
200 void
201 init_expmed (void)
203 struct init_expmed_rtl all;
204 enum machine_mode mode;
205 int m, speed;
207 memset (&all, 0, sizeof all);
208 for (m = 1; m < MAX_BITS_PER_WORD; m++)
210 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
211 all.cint[m] = GEN_INT (m);
214 PUT_CODE (&all.reg, REG);
215 /* Avoid using hard regs in ways which may be unsupported. */
216 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
218 PUT_CODE (&all.plus, PLUS);
219 XEXP (&all.plus, 0) = &all.reg;
220 XEXP (&all.plus, 1) = &all.reg;
222 PUT_CODE (&all.neg, NEG);
223 XEXP (&all.neg, 0) = &all.reg;
225 PUT_CODE (&all.mult, MULT);
226 XEXP (&all.mult, 0) = &all.reg;
227 XEXP (&all.mult, 1) = &all.reg;
229 PUT_CODE (&all.sdiv, DIV);
230 XEXP (&all.sdiv, 0) = &all.reg;
231 XEXP (&all.sdiv, 1) = &all.reg;
233 PUT_CODE (&all.udiv, UDIV);
234 XEXP (&all.udiv, 0) = &all.reg;
235 XEXP (&all.udiv, 1) = &all.reg;
237 PUT_CODE (&all.sdiv_32, DIV);
238 XEXP (&all.sdiv_32, 0) = &all.reg;
239 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
241 PUT_CODE (&all.smod_32, MOD);
242 XEXP (&all.smod_32, 0) = &all.reg;
243 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
245 PUT_CODE (&all.zext, ZERO_EXTEND);
246 XEXP (&all.zext, 0) = &all.reg;
248 PUT_CODE (&all.wide_mult, MULT);
249 XEXP (&all.wide_mult, 0) = &all.zext;
250 XEXP (&all.wide_mult, 1) = &all.zext;
252 PUT_CODE (&all.wide_lshr, LSHIFTRT);
253 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
255 PUT_CODE (&all.wide_trunc, TRUNCATE);
256 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
258 PUT_CODE (&all.shift, ASHIFT);
259 XEXP (&all.shift, 0) = &all.reg;
261 PUT_CODE (&all.shift_mult, MULT);
262 XEXP (&all.shift_mult, 0) = &all.reg;
264 PUT_CODE (&all.shift_add, PLUS);
265 XEXP (&all.shift_add, 0) = &all.shift_mult;
266 XEXP (&all.shift_add, 1) = &all.reg;
268 PUT_CODE (&all.shift_sub0, MINUS);
269 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
270 XEXP (&all.shift_sub0, 1) = &all.reg;
272 PUT_CODE (&all.shift_sub1, MINUS);
273 XEXP (&all.shift_sub1, 0) = &all.reg;
274 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
276 PUT_CODE (&all.trunc, TRUNCATE);
277 XEXP (&all.trunc, 0) = &all.reg;
279 for (speed = 0; speed < 2; speed++)
281 crtl->maybe_hot_insn_p = speed;
282 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
284 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
285 mode = (enum machine_mode)(mode + 1))
286 init_expmed_one_mode (&all, mode, speed);
288 if (MIN_MODE_PARTIAL_INT != VOIDmode)
289 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
290 mode = (enum machine_mode)(mode + 1))
291 init_expmed_one_mode (&all, mode, speed);
293 if (MIN_MODE_VECTOR_INT != VOIDmode)
294 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
295 mode = (enum machine_mode)(mode + 1))
296 init_expmed_one_mode (&all, mode, speed);
299 if (alg_hash_used_p ())
301 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
302 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
304 else
305 set_alg_hash_used_p (true);
306 default_rtl_profile ();
309 /* Return an rtx representing minus the value of X.
310 MODE is the intended mode of the result,
311 useful if X is a CONST_INT. */
314 negate_rtx (enum machine_mode mode, rtx x)
316 rtx result = simplify_unary_operation (NEG, mode, x, mode);
318 if (result == 0)
319 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
321 return result;
324 /* Adjust bitfield memory MEM so that it points to the first unit of mode
325 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
326 If MODE is BLKmode, return a reference to every byte in the bitfield.
327 Set *NEW_BITNUM to the bit position of the field within the new memory. */
329 static rtx
330 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
331 unsigned HOST_WIDE_INT bitsize,
332 unsigned HOST_WIDE_INT bitnum,
333 unsigned HOST_WIDE_INT *new_bitnum)
335 if (mode == BLKmode)
337 *new_bitnum = bitnum % BITS_PER_UNIT;
338 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
339 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
340 / BITS_PER_UNIT);
341 return adjust_bitfield_address_size (mem, mode, offset, size);
343 else
345 unsigned int unit = GET_MODE_BITSIZE (mode);
346 *new_bitnum = bitnum % unit;
347 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
348 return adjust_bitfield_address (mem, mode, offset);
352 /* The caller wants to perform insertion or extraction PATTERN on a
353 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
354 BITREGION_START and BITREGION_END are as for store_bit_field
355 and FIELDMODE is the natural mode of the field.
357 Search for a mode that is compatible with the memory access
358 restrictions and (where applicable) with a register insertion or
359 extraction. Return the new memory on success, storing the adjusted
360 bit position in *NEW_BITNUM. Return null otherwise. */
362 static rtx
363 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
364 rtx op0, HOST_WIDE_INT bitsize,
365 HOST_WIDE_INT bitnum,
366 unsigned HOST_WIDE_INT bitregion_start,
367 unsigned HOST_WIDE_INT bitregion_end,
368 enum machine_mode fieldmode,
369 unsigned HOST_WIDE_INT *new_bitnum)
371 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
372 bitregion_end, MEM_ALIGN (op0),
373 MEM_VOLATILE_P (op0));
374 enum machine_mode best_mode;
375 if (iter.next_mode (&best_mode))
377 /* We can use a memory in BEST_MODE. See whether this is true for
378 any wider modes. All other things being equal, we prefer to
379 use the widest mode possible because it tends to expose more
380 CSE opportunities. */
381 if (!iter.prefer_smaller_modes ())
383 /* Limit the search to the mode required by the corresponding
384 register insertion or extraction instruction, if any. */
385 enum machine_mode limit_mode = word_mode;
386 extraction_insn insn;
387 if (get_best_reg_extraction_insn (&insn, pattern,
388 GET_MODE_BITSIZE (best_mode),
389 fieldmode))
390 limit_mode = insn.field_mode;
392 enum machine_mode wider_mode;
393 while (iter.next_mode (&wider_mode)
394 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
395 best_mode = wider_mode;
397 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
398 new_bitnum);
400 return NULL_RTX;
403 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
404 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
405 offset is then BITNUM / BITS_PER_UNIT. */
407 static bool
408 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
409 unsigned HOST_WIDE_INT bitsize,
410 enum machine_mode struct_mode)
412 if (BYTES_BIG_ENDIAN)
413 return (bitnum % BITS_PER_UNIT == 0
414 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
415 || (bitnum + bitsize) % BITS_PER_WORD == 0));
416 else
417 return bitnum % BITS_PER_WORD == 0;
420 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
421 bit number BITNUM can be treated as a simple value of mode MODE. */
423 static bool
424 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
425 unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
427 return (MEM_P (op0)
428 && bitnum % BITS_PER_UNIT == 0
429 && bitsize == GET_MODE_BITSIZE (mode)
430 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
431 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
432 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
435 /* Try to use instruction INSV to store VALUE into a field of OP0.
436 BITSIZE and BITNUM are as for store_bit_field. */
438 static bool
439 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
440 unsigned HOST_WIDE_INT bitsize,
441 unsigned HOST_WIDE_INT bitnum, rtx value)
443 struct expand_operand ops[4];
444 rtx value1;
445 rtx xop0 = op0;
446 rtx last = get_last_insn ();
447 bool copy_back = false;
449 enum machine_mode op_mode = insv->field_mode;
450 unsigned int unit = GET_MODE_BITSIZE (op_mode);
451 if (bitsize == 0 || bitsize > unit)
452 return false;
454 if (MEM_P (xop0))
455 /* Get a reference to the first byte of the field. */
456 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
457 &bitnum);
458 else
460 /* Convert from counting within OP0 to counting in OP_MODE. */
461 if (BYTES_BIG_ENDIAN)
462 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
464 /* If xop0 is a register, we need it in OP_MODE
465 to make it acceptable to the format of insv. */
466 if (GET_CODE (xop0) == SUBREG)
467 /* We can't just change the mode, because this might clobber op0,
468 and we will need the original value of op0 if insv fails. */
469 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
470 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
471 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
474 /* If the destination is a paradoxical subreg such that we need a
475 truncate to the inner mode, perform the insertion on a temporary and
476 truncate the result to the original destination. Note that we can't
477 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
478 X) 0)) is (reg:N X). */
479 if (GET_CODE (xop0) == SUBREG
480 && REG_P (SUBREG_REG (xop0))
481 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
482 op_mode))
484 rtx tem = gen_reg_rtx (op_mode);
485 emit_move_insn (tem, xop0);
486 xop0 = tem;
487 copy_back = true;
490 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
491 "backwards" from the size of the unit we are inserting into.
492 Otherwise, we count bits from the most significant on a
493 BYTES/BITS_BIG_ENDIAN machine. */
495 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
496 bitnum = unit - bitsize - bitnum;
498 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
499 value1 = value;
500 if (GET_MODE (value) != op_mode)
502 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
504 /* Optimization: Don't bother really extending VALUE
505 if it has all the bits we will actually use. However,
506 if we must narrow it, be sure we do it correctly. */
508 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
510 rtx tmp;
512 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
513 if (! tmp)
514 tmp = simplify_gen_subreg (op_mode,
515 force_reg (GET_MODE (value),
516 value1),
517 GET_MODE (value), 0);
518 value1 = tmp;
520 else
521 value1 = gen_lowpart (op_mode, value1);
523 else if (CONST_INT_P (value))
524 value1 = gen_int_mode (INTVAL (value), op_mode);
525 else
526 /* Parse phase is supposed to make VALUE's data type
527 match that of the component reference, which is a type
528 at least as wide as the field; so VALUE should have
529 a mode that corresponds to that type. */
530 gcc_assert (CONSTANT_P (value));
533 create_fixed_operand (&ops[0], xop0);
534 create_integer_operand (&ops[1], bitsize);
535 create_integer_operand (&ops[2], bitnum);
536 create_input_operand (&ops[3], value1, op_mode);
537 if (maybe_expand_insn (insv->icode, 4, ops))
539 if (copy_back)
540 convert_move (op0, xop0, true);
541 return true;
543 delete_insns_since (last);
544 return false;
547 /* A subroutine of store_bit_field, with the same arguments. Return true
548 if the operation could be implemented.
550 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
551 no other way of implementing the operation. If FALLBACK_P is false,
552 return false instead. */
554 static bool
555 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
556 unsigned HOST_WIDE_INT bitnum,
557 unsigned HOST_WIDE_INT bitregion_start,
558 unsigned HOST_WIDE_INT bitregion_end,
559 enum machine_mode fieldmode,
560 rtx value, bool fallback_p)
562 rtx op0 = str_rtx;
563 rtx orig_value;
565 while (GET_CODE (op0) == SUBREG)
567 /* The following line once was done only if WORDS_BIG_ENDIAN,
568 but I think that is a mistake. WORDS_BIG_ENDIAN is
569 meaningful at a much higher level; when structures are copied
570 between memory and regs, the higher-numbered regs
571 always get higher addresses. */
572 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
573 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
574 int byte_offset = 0;
576 /* Paradoxical subregs need special handling on big endian machines. */
577 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
579 int difference = inner_mode_size - outer_mode_size;
581 if (WORDS_BIG_ENDIAN)
582 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
583 if (BYTES_BIG_ENDIAN)
584 byte_offset += difference % UNITS_PER_WORD;
586 else
587 byte_offset = SUBREG_BYTE (op0);
589 bitnum += byte_offset * BITS_PER_UNIT;
590 op0 = SUBREG_REG (op0);
593 /* No action is needed if the target is a register and if the field
594 lies completely outside that register. This can occur if the source
595 code contains an out-of-bounds access to a small array. */
596 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
597 return true;
599 /* Use vec_set patterns for inserting parts of vectors whenever
600 available. */
601 if (VECTOR_MODE_P (GET_MODE (op0))
602 && !MEM_P (op0)
603 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
604 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
605 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
606 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
608 struct expand_operand ops[3];
609 enum machine_mode outermode = GET_MODE (op0);
610 enum machine_mode innermode = GET_MODE_INNER (outermode);
611 enum insn_code icode = optab_handler (vec_set_optab, outermode);
612 int pos = bitnum / GET_MODE_BITSIZE (innermode);
614 create_fixed_operand (&ops[0], op0);
615 create_input_operand (&ops[1], value, innermode);
616 create_integer_operand (&ops[2], pos);
617 if (maybe_expand_insn (icode, 3, ops))
618 return true;
621 /* If the target is a register, overwriting the entire object, or storing
622 a full-word or multi-word field can be done with just a SUBREG. */
623 if (!MEM_P (op0)
624 && bitsize == GET_MODE_BITSIZE (fieldmode)
625 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
626 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
628 /* Use the subreg machinery either to narrow OP0 to the required
629 words or to cope with mode punning between equal-sized modes. */
630 rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
631 bitnum / BITS_PER_UNIT);
632 if (sub)
634 emit_move_insn (sub, value);
635 return true;
639 /* If the target is memory, storing any naturally aligned field can be
640 done with a simple store. For targets that support fast unaligned
641 memory, any naturally sized, unit aligned field can be done directly. */
642 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
644 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
645 emit_move_insn (op0, value);
646 return true;
649 /* Make sure we are playing with integral modes. Pun with subregs
650 if we aren't. This must come after the entire register case above,
651 since that case is valid for any mode. The following cases are only
652 valid for integral modes. */
654 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
655 if (imode != GET_MODE (op0))
657 if (MEM_P (op0))
658 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
659 else
661 gcc_assert (imode != BLKmode);
662 op0 = gen_lowpart (imode, op0);
667 /* Storing an lsb-aligned field in a register
668 can be done with a movstrict instruction. */
670 if (!MEM_P (op0)
671 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
672 && bitsize == GET_MODE_BITSIZE (fieldmode)
673 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
675 struct expand_operand ops[2];
676 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
677 rtx arg0 = op0;
678 unsigned HOST_WIDE_INT subreg_off;
680 if (GET_CODE (arg0) == SUBREG)
682 /* Else we've got some float mode source being extracted into
683 a different float mode destination -- this combination of
684 subregs results in Severe Tire Damage. */
685 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
686 || GET_MODE_CLASS (fieldmode) == MODE_INT
687 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
688 arg0 = SUBREG_REG (arg0);
691 subreg_off = bitnum / BITS_PER_UNIT;
692 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
694 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
696 create_fixed_operand (&ops[0], arg0);
697 /* Shrink the source operand to FIELDMODE. */
698 create_convert_operand_to (&ops[1], value, fieldmode, false);
699 if (maybe_expand_insn (icode, 2, ops))
700 return true;
704 /* Handle fields bigger than a word. */
706 if (bitsize > BITS_PER_WORD)
708 /* Here we transfer the words of the field
709 in the order least significant first.
710 This is because the most significant word is the one which may
711 be less than full.
712 However, only do that if the value is not BLKmode. */
714 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
715 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
716 unsigned int i;
717 rtx last;
719 /* This is the mode we must force value to, so that there will be enough
720 subwords to extract. Note that fieldmode will often (always?) be
721 VOIDmode, because that is what store_field uses to indicate that this
722 is a bit field, but passing VOIDmode to operand_subword_force
723 is not allowed. */
724 fieldmode = GET_MODE (value);
725 if (fieldmode == VOIDmode)
726 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
728 last = get_last_insn ();
729 for (i = 0; i < nwords; i++)
731 /* If I is 0, use the low-order word in both field and target;
732 if I is 1, use the next to lowest word; and so on. */
733 unsigned int wordnum = (backwards
734 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
735 - i - 1
736 : i);
737 unsigned int bit_offset = (backwards
738 ? MAX ((int) bitsize - ((int) i + 1)
739 * BITS_PER_WORD,
741 : (int) i * BITS_PER_WORD);
742 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
743 unsigned HOST_WIDE_INT new_bitsize =
744 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
746 /* If the remaining chunk doesn't have full wordsize we have
747 to make sure that for big endian machines the higher order
748 bits are used. */
749 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
750 value_word = simplify_expand_binop (word_mode, lshr_optab,
751 value_word,
752 GEN_INT (BITS_PER_WORD
753 - new_bitsize),
754 NULL_RTX, true,
755 OPTAB_LIB_WIDEN);
757 if (!store_bit_field_1 (op0, new_bitsize,
758 bitnum + bit_offset,
759 bitregion_start, bitregion_end,
760 word_mode,
761 value_word, fallback_p))
763 delete_insns_since (last);
764 return false;
767 return true;
770 /* If VALUE has a floating-point or complex mode, access it as an
771 integer of the corresponding size. This can occur on a machine
772 with 64 bit registers that uses SFmode for float. It can also
773 occur for unaligned float or complex fields. */
774 orig_value = value;
775 if (GET_MODE (value) != VOIDmode
776 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
777 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
779 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
780 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
783 /* If OP0 is a multi-word register, narrow it to the affected word.
784 If the region spans two words, defer to store_split_bit_field. */
785 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
787 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
788 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
789 gcc_assert (op0);
790 bitnum %= BITS_PER_WORD;
791 if (bitnum + bitsize > BITS_PER_WORD)
793 if (!fallback_p)
794 return false;
796 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
797 bitregion_end, value);
798 return true;
802 /* From here on we can assume that the field to be stored in fits
803 within a word. If the destination is a register, it too fits
804 in a word. */
806 extraction_insn insv;
807 if (!MEM_P (op0)
808 && get_best_reg_extraction_insn (&insv, EP_insv,
809 GET_MODE_BITSIZE (GET_MODE (op0)),
810 fieldmode)
811 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
812 return true;
814 /* If OP0 is a memory, try copying it to a register and seeing if a
815 cheap register alternative is available. */
816 if (MEM_P (op0))
818 /* Do not use unaligned memory insvs for volatile bitfields when
819 -fstrict-volatile-bitfields is in effect. */
820 if (!(MEM_VOLATILE_P (op0)
821 && flag_strict_volatile_bitfields > 0)
822 && get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
823 fieldmode)
824 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
825 return true;
827 rtx last = get_last_insn ();
829 /* Try loading part of OP0 into a register, inserting the bitfield
830 into that, and then copying the result back to OP0. */
831 unsigned HOST_WIDE_INT bitpos;
832 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
833 bitregion_start, bitregion_end,
834 fieldmode, &bitpos);
835 if (xop0)
837 rtx tempreg = copy_to_reg (xop0);
838 if (store_bit_field_1 (tempreg, bitsize, bitpos,
839 bitregion_start, bitregion_end,
840 fieldmode, orig_value, false))
842 emit_move_insn (xop0, tempreg);
843 return true;
845 delete_insns_since (last);
849 if (!fallback_p)
850 return false;
852 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
853 bitregion_end, value);
854 return true;
857 /* Generate code to store value from rtx VALUE
858 into a bit-field within structure STR_RTX
859 containing BITSIZE bits starting at bit BITNUM.
861 BITREGION_START is bitpos of the first bitfield in this region.
862 BITREGION_END is the bitpos of the ending bitfield in this region.
863 These two fields are 0, if the C++ memory model does not apply,
864 or we are not interested in keeping track of bitfield regions.
866 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
868 void
869 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
870 unsigned HOST_WIDE_INT bitnum,
871 unsigned HOST_WIDE_INT bitregion_start,
872 unsigned HOST_WIDE_INT bitregion_end,
873 enum machine_mode fieldmode,
874 rtx value)
876 /* Under the C++0x memory model, we must not touch bits outside the
877 bit region. Adjust the address to start at the beginning of the
878 bit region. */
879 if (MEM_P (str_rtx) && bitregion_start > 0)
881 enum machine_mode bestmode;
882 HOST_WIDE_INT offset, size;
884 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
886 offset = bitregion_start / BITS_PER_UNIT;
887 bitnum -= bitregion_start;
888 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
889 bitregion_end -= bitregion_start;
890 bitregion_start = 0;
891 bestmode = get_best_mode (bitsize, bitnum,
892 bitregion_start, bitregion_end,
893 MEM_ALIGN (str_rtx), VOIDmode,
894 MEM_VOLATILE_P (str_rtx));
895 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
898 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
899 bitregion_start, bitregion_end,
900 fieldmode, value, true))
901 gcc_unreachable ();
904 /* Use shifts and boolean operations to store VALUE into a bit field of
905 width BITSIZE in OP0, starting at bit BITNUM. */
907 static void
908 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
909 unsigned HOST_WIDE_INT bitnum,
910 unsigned HOST_WIDE_INT bitregion_start,
911 unsigned HOST_WIDE_INT bitregion_end,
912 rtx value)
914 enum machine_mode mode;
915 rtx temp;
916 int all_zero = 0;
917 int all_one = 0;
919 /* There is a case not handled here:
920 a structure with a known alignment of just a halfword
921 and a field split across two aligned halfwords within the structure.
922 Or likewise a structure with a known alignment of just a byte
923 and a field split across two bytes.
924 Such cases are not supposed to be able to occur. */
926 if (MEM_P (op0))
928 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
930 if (bitregion_end)
931 maxbits = bitregion_end - bitregion_start + 1;
933 /* Get the proper mode to use for this field. We want a mode that
934 includes the entire field. If such a mode would be larger than
935 a word, we won't be doing the extraction the normal way.
936 We don't want a mode bigger than the destination. */
938 mode = GET_MODE (op0);
939 if (GET_MODE_BITSIZE (mode) == 0
940 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
941 mode = word_mode;
943 if (MEM_VOLATILE_P (op0)
944 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
945 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
946 && flag_strict_volatile_bitfields > 0)
947 mode = GET_MODE (op0);
948 else
949 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
950 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
952 if (mode == VOIDmode)
954 /* The only way this should occur is if the field spans word
955 boundaries. */
956 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
957 bitregion_end, value);
958 return;
961 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
964 mode = GET_MODE (op0);
965 gcc_assert (SCALAR_INT_MODE_P (mode));
967 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
968 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
970 if (BYTES_BIG_ENDIAN)
971 /* BITNUM is the distance between our msb
972 and that of the containing datum.
973 Convert it to the distance from the lsb. */
974 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
976 /* Now BITNUM is always the distance between our lsb
977 and that of OP0. */
979 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
980 we must first convert its mode to MODE. */
982 if (CONST_INT_P (value))
984 HOST_WIDE_INT v = INTVAL (value);
986 if (bitsize < HOST_BITS_PER_WIDE_INT)
987 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
989 if (v == 0)
990 all_zero = 1;
991 else if ((bitsize < HOST_BITS_PER_WIDE_INT
992 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
993 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
994 all_one = 1;
996 value = lshift_value (mode, value, bitnum, bitsize);
998 else
1000 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1001 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1003 if (GET_MODE (value) != mode)
1004 value = convert_to_mode (mode, value, 1);
1006 if (must_and)
1007 value = expand_binop (mode, and_optab, value,
1008 mask_rtx (mode, 0, bitsize, 0),
1009 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1010 if (bitnum > 0)
1011 value = expand_shift (LSHIFT_EXPR, mode, value,
1012 bitnum, NULL_RTX, 1);
1015 /* Now clear the chosen bits in OP0,
1016 except that if VALUE is -1 we need not bother. */
1017 /* We keep the intermediates in registers to allow CSE to combine
1018 consecutive bitfield assignments. */
1020 temp = force_reg (mode, op0);
1022 if (! all_one)
1024 temp = expand_binop (mode, and_optab, temp,
1025 mask_rtx (mode, bitnum, bitsize, 1),
1026 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1027 temp = force_reg (mode, temp);
1030 /* Now logical-or VALUE into OP0, unless it is zero. */
1032 if (! all_zero)
1034 temp = expand_binop (mode, ior_optab, temp, value,
1035 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1036 temp = force_reg (mode, temp);
1039 if (op0 != temp)
1041 op0 = copy_rtx (op0);
1042 emit_move_insn (op0, temp);
1046 /* Store a bit field that is split across multiple accessible memory objects.
1048 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1049 BITSIZE is the field width; BITPOS the position of its first bit
1050 (within the word).
1051 VALUE is the value to store.
1053 This does not yet handle fields wider than BITS_PER_WORD. */
1055 static void
1056 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1057 unsigned HOST_WIDE_INT bitpos,
1058 unsigned HOST_WIDE_INT bitregion_start,
1059 unsigned HOST_WIDE_INT bitregion_end,
1060 rtx value)
1062 unsigned int unit;
1063 unsigned int bitsdone = 0;
1065 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1066 much at a time. */
1067 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1068 unit = BITS_PER_WORD;
1069 else
1070 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1072 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1073 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1074 that VALUE might be a floating-point constant. */
1075 if (CONSTANT_P (value) && !CONST_INT_P (value))
1077 rtx word = gen_lowpart_common (word_mode, value);
1079 if (word && (value != word))
1080 value = word;
1081 else
1082 value = gen_lowpart_common (word_mode,
1083 force_reg (GET_MODE (value) != VOIDmode
1084 ? GET_MODE (value)
1085 : word_mode, value));
1088 while (bitsdone < bitsize)
1090 unsigned HOST_WIDE_INT thissize;
1091 rtx part, word;
1092 unsigned HOST_WIDE_INT thispos;
1093 unsigned HOST_WIDE_INT offset;
1095 offset = (bitpos + bitsdone) / unit;
1096 thispos = (bitpos + bitsdone) % unit;
1098 /* When region of bytes we can touch is restricted, decrease
1099 UNIT close to the end of the region as needed. */
1100 if (bitregion_end
1101 && unit > BITS_PER_UNIT
1102 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1104 unit = unit / 2;
1105 continue;
1108 /* THISSIZE must not overrun a word boundary. Otherwise,
1109 store_fixed_bit_field will call us again, and we will mutually
1110 recurse forever. */
1111 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1112 thissize = MIN (thissize, unit - thispos);
1114 if (BYTES_BIG_ENDIAN)
1116 /* Fetch successively less significant portions. */
1117 if (CONST_INT_P (value))
1118 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1119 >> (bitsize - bitsdone - thissize))
1120 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1121 else
1123 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1124 /* The args are chosen so that the last part includes the
1125 lsb. Give extract_bit_field the value it needs (with
1126 endianness compensation) to fetch the piece we want. */
1127 part = extract_fixed_bit_field (word_mode, value, thissize,
1128 total_bits - bitsize + bitsdone,
1129 NULL_RTX, 1, false);
1132 else
1134 /* Fetch successively more significant portions. */
1135 if (CONST_INT_P (value))
1136 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1137 >> bitsdone)
1138 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1139 else
1140 part = extract_fixed_bit_field (word_mode, value, thissize,
1141 bitsdone, NULL_RTX, 1, false);
1144 /* If OP0 is a register, then handle OFFSET here.
1146 When handling multiword bitfields, extract_bit_field may pass
1147 down a word_mode SUBREG of a larger REG for a bitfield that actually
1148 crosses a word boundary. Thus, for a SUBREG, we must find
1149 the current word starting from the base register. */
1150 if (GET_CODE (op0) == SUBREG)
1152 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1153 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1154 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1155 word = word_offset ? const0_rtx : op0;
1156 else
1157 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1158 GET_MODE (SUBREG_REG (op0)));
1159 offset = 0;
1161 else if (REG_P (op0))
1163 enum machine_mode op0_mode = GET_MODE (op0);
1164 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1165 word = offset ? const0_rtx : op0;
1166 else
1167 word = operand_subword_force (op0, offset, GET_MODE (op0));
1168 offset = 0;
1170 else
1171 word = op0;
1173 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1174 it is just an out-of-bounds access. Ignore it. */
1175 if (word != const0_rtx)
1176 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1177 bitregion_start, bitregion_end, part);
1178 bitsdone += thissize;
1182 /* A subroutine of extract_bit_field_1 that converts return value X
1183 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1184 to extract_bit_field. */
1186 static rtx
1187 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1188 enum machine_mode tmode, bool unsignedp)
1190 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1191 return x;
1193 /* If the x mode is not a scalar integral, first convert to the
1194 integer mode of that size and then access it as a floating-point
1195 value via a SUBREG. */
1196 if (!SCALAR_INT_MODE_P (tmode))
1198 enum machine_mode smode;
1200 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1201 x = convert_to_mode (smode, x, unsignedp);
1202 x = force_reg (smode, x);
1203 return gen_lowpart (tmode, x);
1206 return convert_to_mode (tmode, x, unsignedp);
1209 /* Try to use an ext(z)v pattern to extract a field from OP0.
1210 Return the extracted value on success, otherwise return null.
1211 EXT_MODE is the mode of the extraction and the other arguments
1212 are as for extract_bit_field. */
1214 static rtx
1215 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1216 unsigned HOST_WIDE_INT bitsize,
1217 unsigned HOST_WIDE_INT bitnum,
1218 int unsignedp, rtx target,
1219 enum machine_mode mode, enum machine_mode tmode)
1221 struct expand_operand ops[4];
1222 rtx spec_target = target;
1223 rtx spec_target_subreg = 0;
1224 enum machine_mode ext_mode = extv->field_mode;
1225 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1227 if (bitsize == 0 || unit < bitsize)
1228 return NULL_RTX;
1230 if (MEM_P (op0))
1231 /* Get a reference to the first byte of the field. */
1232 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1233 &bitnum);
1234 else
1236 /* Convert from counting within OP0 to counting in EXT_MODE. */
1237 if (BYTES_BIG_ENDIAN)
1238 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1240 /* If op0 is a register, we need it in EXT_MODE to make it
1241 acceptable to the format of ext(z)v. */
1242 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1243 return NULL_RTX;
1244 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1245 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1248 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1249 "backwards" from the size of the unit we are extracting from.
1250 Otherwise, we count bits from the most significant on a
1251 BYTES/BITS_BIG_ENDIAN machine. */
1253 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1254 bitnum = unit - bitsize - bitnum;
1256 if (target == 0)
1257 target = spec_target = gen_reg_rtx (tmode);
1259 if (GET_MODE (target) != ext_mode)
1261 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1262 between the mode of the extraction (word_mode) and the target
1263 mode. Instead, create a temporary and use convert_move to set
1264 the target. */
1265 if (REG_P (target)
1266 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1268 target = gen_lowpart (ext_mode, target);
1269 if (GET_MODE_PRECISION (ext_mode)
1270 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1271 spec_target_subreg = target;
1273 else
1274 target = gen_reg_rtx (ext_mode);
1277 create_output_operand (&ops[0], target, ext_mode);
1278 create_fixed_operand (&ops[1], op0);
1279 create_integer_operand (&ops[2], bitsize);
1280 create_integer_operand (&ops[3], bitnum);
1281 if (maybe_expand_insn (extv->icode, 4, ops))
1283 target = ops[0].value;
1284 if (target == spec_target)
1285 return target;
1286 if (target == spec_target_subreg)
1287 return spec_target;
1288 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1290 return NULL_RTX;
1293 /* A subroutine of extract_bit_field, with the same arguments.
1294 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1295 if we can find no other means of implementing the operation.
1296 if FALLBACK_P is false, return NULL instead. */
1298 static rtx
1299 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1300 unsigned HOST_WIDE_INT bitnum,
1301 int unsignedp, bool packedp, rtx target,
1302 enum machine_mode mode, enum machine_mode tmode,
1303 bool fallback_p)
1305 rtx op0 = str_rtx;
1306 enum machine_mode int_mode;
1307 enum machine_mode mode1;
1309 if (tmode == VOIDmode)
1310 tmode = mode;
1312 while (GET_CODE (op0) == SUBREG)
1314 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1315 op0 = SUBREG_REG (op0);
1318 /* If we have an out-of-bounds access to a register, just return an
1319 uninitialized register of the required mode. This can occur if the
1320 source code contains an out-of-bounds access to a small array. */
1321 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1322 return gen_reg_rtx (tmode);
1324 if (REG_P (op0)
1325 && mode == GET_MODE (op0)
1326 && bitnum == 0
1327 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1329 /* We're trying to extract a full register from itself. */
1330 return op0;
1333 /* See if we can get a better vector mode before extracting. */
1334 if (VECTOR_MODE_P (GET_MODE (op0))
1335 && !MEM_P (op0)
1336 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1338 enum machine_mode new_mode;
1340 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1341 new_mode = MIN_MODE_VECTOR_FLOAT;
1342 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1343 new_mode = MIN_MODE_VECTOR_FRACT;
1344 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1345 new_mode = MIN_MODE_VECTOR_UFRACT;
1346 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1347 new_mode = MIN_MODE_VECTOR_ACCUM;
1348 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1349 new_mode = MIN_MODE_VECTOR_UACCUM;
1350 else
1351 new_mode = MIN_MODE_VECTOR_INT;
1353 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1354 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1355 && targetm.vector_mode_supported_p (new_mode))
1356 break;
1357 if (new_mode != VOIDmode)
1358 op0 = gen_lowpart (new_mode, op0);
1361 /* Use vec_extract patterns for extracting parts of vectors whenever
1362 available. */
1363 if (VECTOR_MODE_P (GET_MODE (op0))
1364 && !MEM_P (op0)
1365 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1366 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1367 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1369 struct expand_operand ops[3];
1370 enum machine_mode outermode = GET_MODE (op0);
1371 enum machine_mode innermode = GET_MODE_INNER (outermode);
1372 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1373 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1375 create_output_operand (&ops[0], target, innermode);
1376 create_input_operand (&ops[1], op0, outermode);
1377 create_integer_operand (&ops[2], pos);
1378 if (maybe_expand_insn (icode, 3, ops))
1380 target = ops[0].value;
1381 if (GET_MODE (target) != mode)
1382 return gen_lowpart (tmode, target);
1383 return target;
1387 /* Make sure we are playing with integral modes. Pun with subregs
1388 if we aren't. */
1390 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1391 if (imode != GET_MODE (op0))
1393 if (MEM_P (op0))
1394 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1395 else if (imode != BLKmode)
1397 op0 = gen_lowpart (imode, op0);
1399 /* If we got a SUBREG, force it into a register since we
1400 aren't going to be able to do another SUBREG on it. */
1401 if (GET_CODE (op0) == SUBREG)
1402 op0 = force_reg (imode, op0);
1404 else if (REG_P (op0))
1406 rtx reg, subreg;
1407 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1408 MODE_INT);
1409 reg = gen_reg_rtx (imode);
1410 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1411 emit_move_insn (subreg, op0);
1412 op0 = reg;
1413 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1415 else
1417 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1418 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1419 emit_move_insn (mem, op0);
1420 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1425 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1426 If that's wrong, the solution is to test for it and set TARGET to 0
1427 if needed. */
1429 /* If the bitfield is volatile, we need to make sure the access
1430 remains on a type-aligned boundary. */
1431 if (GET_CODE (op0) == MEM
1432 && MEM_VOLATILE_P (op0)
1433 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1434 && flag_strict_volatile_bitfields > 0)
1435 goto no_subreg_mode_swap;
1437 /* Only scalar integer modes can be converted via subregs. There is an
1438 additional problem for FP modes here in that they can have a precision
1439 which is different from the size. mode_for_size uses precision, but
1440 we want a mode based on the size, so we must avoid calling it for FP
1441 modes. */
1442 mode1 = mode;
1443 if (SCALAR_INT_MODE_P (tmode))
1445 enum machine_mode try_mode = mode_for_size (bitsize,
1446 GET_MODE_CLASS (tmode), 0);
1447 if (try_mode != BLKmode)
1448 mode1 = try_mode;
1450 gcc_assert (mode1 != BLKmode);
1452 /* Extraction of a full MODE1 value can be done with a subreg as long
1453 as the least significant bit of the value is the least significant
1454 bit of either OP0 or a word of OP0. */
1455 if (!MEM_P (op0)
1456 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1457 && bitsize == GET_MODE_BITSIZE (mode1)
1458 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1460 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1461 bitnum / BITS_PER_UNIT);
1462 if (sub)
1463 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1466 /* Extraction of a full MODE1 value can be done with a load as long as
1467 the field is on a byte boundary and is sufficiently aligned. */
1468 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1470 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1471 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1474 no_subreg_mode_swap:
1476 /* Handle fields bigger than a word. */
1478 if (bitsize > BITS_PER_WORD)
1480 /* Here we transfer the words of the field
1481 in the order least significant first.
1482 This is because the most significant word is the one which may
1483 be less than full. */
1485 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1486 unsigned int i;
1487 rtx last;
1489 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1490 target = gen_reg_rtx (mode);
1492 /* Indicate for flow that the entire target reg is being set. */
1493 emit_clobber (target);
1495 last = get_last_insn ();
1496 for (i = 0; i < nwords; i++)
1498 /* If I is 0, use the low-order word in both field and target;
1499 if I is 1, use the next to lowest word; and so on. */
1500 /* Word number in TARGET to use. */
1501 unsigned int wordnum
1502 = (WORDS_BIG_ENDIAN
1503 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1504 : i);
1505 /* Offset from start of field in OP0. */
1506 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1507 ? MAX (0, ((int) bitsize - ((int) i + 1)
1508 * (int) BITS_PER_WORD))
1509 : (int) i * BITS_PER_WORD);
1510 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1511 rtx result_part
1512 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1513 bitsize - i * BITS_PER_WORD),
1514 bitnum + bit_offset, 1, false, target_part,
1515 mode, word_mode, fallback_p);
1517 gcc_assert (target_part);
1518 if (!result_part)
1520 delete_insns_since (last);
1521 return NULL;
1524 if (result_part != target_part)
1525 emit_move_insn (target_part, result_part);
1528 if (unsignedp)
1530 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1531 need to be zero'd out. */
1532 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1534 unsigned int i, total_words;
1536 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1537 for (i = nwords; i < total_words; i++)
1538 emit_move_insn
1539 (operand_subword (target,
1540 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1541 1, VOIDmode),
1542 const0_rtx);
1544 return target;
1547 /* Signed bit field: sign-extend with two arithmetic shifts. */
1548 target = expand_shift (LSHIFT_EXPR, mode, target,
1549 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1550 return expand_shift (RSHIFT_EXPR, mode, target,
1551 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1554 /* If OP0 is a multi-word register, narrow it to the affected word.
1555 If the region spans two words, defer to extract_split_bit_field. */
1556 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1558 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1559 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1560 bitnum %= BITS_PER_WORD;
1561 if (bitnum + bitsize > BITS_PER_WORD)
1563 if (!fallback_p)
1564 return NULL_RTX;
1565 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1566 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1570 /* From here on we know the desired field is smaller than a word.
1571 If OP0 is a register, it too fits within a word. */
1572 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1573 extraction_insn extv;
1574 if (!MEM_P (op0)
1575 && get_best_reg_extraction_insn (&extv, pattern, bitnum + bitsize,
1576 tmode))
1578 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1579 unsignedp, target, mode,
1580 tmode);
1581 if (result)
1582 return result;
1585 /* If OP0 is a memory, try copying it to a register and seeing if a
1586 cheap register alternative is available. */
1587 if (MEM_P (op0))
1589 /* Do not use extv/extzv for volatile bitfields when
1590 -fstrict-volatile-bitfields is in effect. */
1591 if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
1592 && get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1593 tmode))
1595 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1596 bitnum, unsignedp,
1597 target, mode,
1598 tmode);
1599 if (result)
1600 return result;
1603 rtx last = get_last_insn ();
1605 /* Try loading part of OP0 into a register and extracting the
1606 bitfield from that. */
1607 unsigned HOST_WIDE_INT bitpos;
1608 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1609 0, 0, tmode, &bitpos);
1610 if (xop0)
1612 xop0 = copy_to_reg (xop0);
1613 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1614 unsignedp, packedp, target,
1615 mode, tmode, false);
1616 if (result)
1617 return result;
1618 delete_insns_since (last);
1622 if (!fallback_p)
1623 return NULL;
1625 /* Find a correspondingly-sized integer field, so we can apply
1626 shifts and masks to it. */
1627 int_mode = int_mode_for_mode (tmode);
1628 if (int_mode == BLKmode)
1629 int_mode = int_mode_for_mode (mode);
1630 /* Should probably push op0 out to memory and then do a load. */
1631 gcc_assert (int_mode != BLKmode);
1633 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1634 target, unsignedp, packedp);
1635 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1638 /* Generate code to extract a byte-field from STR_RTX
1639 containing BITSIZE bits, starting at BITNUM,
1640 and put it in TARGET if possible (if TARGET is nonzero).
1641 Regardless of TARGET, we return the rtx for where the value is placed.
1643 STR_RTX is the structure containing the byte (a REG or MEM).
1644 UNSIGNEDP is nonzero if this is an unsigned bit field.
1645 PACKEDP is nonzero if the field has the packed attribute.
1646 MODE is the natural mode of the field value once extracted.
1647 TMODE is the mode the caller would like the value to have;
1648 but the value may be returned with type MODE instead.
1650 If a TARGET is specified and we can store in it at no extra cost,
1651 we do so, and return TARGET.
1652 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1653 if they are equally easy. */
1656 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1657 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1658 rtx target, enum machine_mode mode, enum machine_mode tmode)
1660 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1661 target, mode, tmode, true);
1664 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1665 from bit BITNUM of OP0.
1667 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1668 PACKEDP is true if the field has the packed attribute.
1670 If TARGET is nonzero, attempts to store the value there
1671 and return TARGET, but this is not guaranteed.
1672 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1674 static rtx
1675 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1676 unsigned HOST_WIDE_INT bitsize,
1677 unsigned HOST_WIDE_INT bitnum, rtx target,
1678 int unsignedp, bool packedp)
1680 enum machine_mode mode;
1682 if (MEM_P (op0))
1684 /* Get the proper mode to use for this field. We want a mode that
1685 includes the entire field. If such a mode would be larger than
1686 a word, we won't be doing the extraction the normal way. */
1688 if (MEM_VOLATILE_P (op0)
1689 && flag_strict_volatile_bitfields > 0)
1691 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1692 mode = GET_MODE (op0);
1693 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1694 mode = GET_MODE (target);
1695 else
1696 mode = tmode;
1698 else
1699 mode = get_best_mode (bitsize, bitnum, 0, 0,
1700 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1702 if (mode == VOIDmode)
1703 /* The only way this should occur is if the field spans word
1704 boundaries. */
1705 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1707 unsigned int total_bits = GET_MODE_BITSIZE (mode);
1708 HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1710 /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1711 if it results in a multi-word access where we otherwise wouldn't
1712 have one. So, check for that case here. */
1713 if (MEM_P (op0)
1714 && MEM_VOLATILE_P (op0)
1715 && flag_strict_volatile_bitfields > 0
1716 && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1717 && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1719 if (STRICT_ALIGNMENT)
1721 static bool informed_about_misalignment = false;
1723 if (packedp)
1725 if (bitsize == total_bits)
1726 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1727 "multiple accesses to volatile structure"
1728 " member because of packed attribute");
1729 else
1730 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1731 "multiple accesses to volatile structure"
1732 " bitfield because of packed attribute");
1734 return extract_split_bit_field (op0, bitsize, bitnum,
1735 unsignedp);
1738 if (bitsize == total_bits)
1739 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1740 "mis-aligned access used for structure member");
1741 else
1742 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1743 "mis-aligned access used for structure bitfield");
1745 if (! informed_about_misalignment)
1747 informed_about_misalignment = true;
1748 inform (input_location,
1749 "when a volatile object spans multiple type-sized"
1750 " locations, the compiler must choose between using"
1751 " a single mis-aligned access to preserve the"
1752 " volatility, or using multiple aligned accesses"
1753 " to avoid runtime faults; this code may fail at"
1754 " runtime if the hardware does not allow this"
1755 " access");
1758 bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1760 op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1761 bitnum -= bit_offset;
1764 mode = GET_MODE (op0);
1765 gcc_assert (SCALAR_INT_MODE_P (mode));
1767 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1768 for invalid input, such as extract equivalent of f5 from
1769 gcc.dg/pr48335-2.c. */
1771 if (BYTES_BIG_ENDIAN)
1772 /* BITNUM is the distance between our msb and that of OP0.
1773 Convert it to the distance from the lsb. */
1774 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1776 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1777 We have reduced the big-endian case to the little-endian case. */
1779 if (unsignedp)
1781 if (bitnum)
1783 /* If the field does not already start at the lsb,
1784 shift it so it does. */
1785 /* Maybe propagate the target for the shift. */
1786 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1787 if (tmode != mode)
1788 subtarget = 0;
1789 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1791 /* Convert the value to the desired mode. */
1792 if (mode != tmode)
1793 op0 = convert_to_mode (tmode, op0, 1);
1795 /* Unless the msb of the field used to be the msb when we shifted,
1796 mask out the upper bits. */
1798 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1799 return expand_binop (GET_MODE (op0), and_optab, op0,
1800 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1801 target, 1, OPTAB_LIB_WIDEN);
1802 return op0;
1805 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1806 then arithmetic-shift its lsb to the lsb of the word. */
1807 op0 = force_reg (mode, op0);
1809 /* Find the narrowest integer mode that contains the field. */
1811 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1812 mode = GET_MODE_WIDER_MODE (mode))
1813 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1815 op0 = convert_to_mode (mode, op0, 0);
1816 break;
1819 if (mode != tmode)
1820 target = 0;
1822 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1824 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1825 /* Maybe propagate the target for the shift. */
1826 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1827 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1830 return expand_shift (RSHIFT_EXPR, mode, op0,
1831 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1834 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1835 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1836 complement of that if COMPLEMENT. The mask is truncated if
1837 necessary to the width of mode MODE. The mask is zero-extended if
1838 BITSIZE+BITPOS is too small for MODE. */
1840 static rtx
1841 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1843 double_int mask;
1845 mask = double_int::mask (bitsize);
1846 mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1848 if (complement)
1849 mask = ~mask;
1851 return immed_double_int_const (mask, mode);
1854 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1855 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1857 static rtx
1858 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1860 double_int val;
1862 val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1863 val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1865 return immed_double_int_const (val, mode);
1868 /* Extract a bit field that is split across two words
1869 and return an RTX for the result.
1871 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1872 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1873 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1875 static rtx
1876 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1877 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1879 unsigned int unit;
1880 unsigned int bitsdone = 0;
1881 rtx result = NULL_RTX;
1882 int first = 1;
1884 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1885 much at a time. */
1886 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1887 unit = BITS_PER_WORD;
1888 else
1889 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1891 while (bitsdone < bitsize)
1893 unsigned HOST_WIDE_INT thissize;
1894 rtx part, word;
1895 unsigned HOST_WIDE_INT thispos;
1896 unsigned HOST_WIDE_INT offset;
1898 offset = (bitpos + bitsdone) / unit;
1899 thispos = (bitpos + bitsdone) % unit;
1901 /* THISSIZE must not overrun a word boundary. Otherwise,
1902 extract_fixed_bit_field will call us again, and we will mutually
1903 recurse forever. */
1904 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1905 thissize = MIN (thissize, unit - thispos);
1907 /* If OP0 is a register, then handle OFFSET here.
1909 When handling multiword bitfields, extract_bit_field may pass
1910 down a word_mode SUBREG of a larger REG for a bitfield that actually
1911 crosses a word boundary. Thus, for a SUBREG, we must find
1912 the current word starting from the base register. */
1913 if (GET_CODE (op0) == SUBREG)
1915 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1916 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1917 GET_MODE (SUBREG_REG (op0)));
1918 offset = 0;
1920 else if (REG_P (op0))
1922 word = operand_subword_force (op0, offset, GET_MODE (op0));
1923 offset = 0;
1925 else
1926 word = op0;
1928 /* Extract the parts in bit-counting order,
1929 whose meaning is determined by BYTES_PER_UNIT.
1930 OFFSET is in UNITs, and UNIT is in bits. */
1931 part = extract_fixed_bit_field (word_mode, word, thissize,
1932 offset * unit + thispos, 0, 1, false);
1933 bitsdone += thissize;
1935 /* Shift this part into place for the result. */
1936 if (BYTES_BIG_ENDIAN)
1938 if (bitsize != bitsdone)
1939 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1940 bitsize - bitsdone, 0, 1);
1942 else
1944 if (bitsdone != thissize)
1945 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1946 bitsdone - thissize, 0, 1);
1949 if (first)
1950 result = part;
1951 else
1952 /* Combine the parts with bitwise or. This works
1953 because we extracted each part as an unsigned bit field. */
1954 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1955 OPTAB_LIB_WIDEN);
1957 first = 0;
1960 /* Unsigned bit field: we are done. */
1961 if (unsignedp)
1962 return result;
1963 /* Signed bit field: sign-extend with two arithmetic shifts. */
1964 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1965 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1966 return expand_shift (RSHIFT_EXPR, word_mode, result,
1967 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1970 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1971 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
1972 MODE, fill the upper bits with zeros. Fail if the layout of either
1973 mode is unknown (as for CC modes) or if the extraction would involve
1974 unprofitable mode punning. Return the value on success, otherwise
1975 return null.
1977 This is different from gen_lowpart* in these respects:
1979 - the returned value must always be considered an rvalue
1981 - when MODE is wider than SRC_MODE, the extraction involves
1982 a zero extension
1984 - when MODE is smaller than SRC_MODE, the extraction involves
1985 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1987 In other words, this routine performs a computation, whereas the
1988 gen_lowpart* routines are conceptually lvalue or rvalue subreg
1989 operations. */
1992 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1994 enum machine_mode int_mode, src_int_mode;
1996 if (mode == src_mode)
1997 return src;
1999 if (CONSTANT_P (src))
2001 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2002 fails, it will happily create (subreg (symbol_ref)) or similar
2003 invalid SUBREGs. */
2004 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2005 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2006 if (ret)
2007 return ret;
2009 if (GET_MODE (src) == VOIDmode
2010 || !validate_subreg (mode, src_mode, src, byte))
2011 return NULL_RTX;
2013 src = force_reg (GET_MODE (src), src);
2014 return gen_rtx_SUBREG (mode, src, byte);
2017 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2018 return NULL_RTX;
2020 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2021 && MODES_TIEABLE_P (mode, src_mode))
2023 rtx x = gen_lowpart_common (mode, src);
2024 if (x)
2025 return x;
2028 src_int_mode = int_mode_for_mode (src_mode);
2029 int_mode = int_mode_for_mode (mode);
2030 if (src_int_mode == BLKmode || int_mode == BLKmode)
2031 return NULL_RTX;
2033 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2034 return NULL_RTX;
2035 if (!MODES_TIEABLE_P (int_mode, mode))
2036 return NULL_RTX;
2038 src = gen_lowpart (src_int_mode, src);
2039 src = convert_modes (int_mode, src_int_mode, src, true);
2040 src = gen_lowpart (mode, src);
2041 return src;
2044 /* Add INC into TARGET. */
2046 void
2047 expand_inc (rtx target, rtx inc)
2049 rtx value = expand_binop (GET_MODE (target), add_optab,
2050 target, inc,
2051 target, 0, OPTAB_LIB_WIDEN);
2052 if (value != target)
2053 emit_move_insn (target, value);
2056 /* Subtract DEC from TARGET. */
2058 void
2059 expand_dec (rtx target, rtx dec)
2061 rtx value = expand_binop (GET_MODE (target), sub_optab,
2062 target, dec,
2063 target, 0, OPTAB_LIB_WIDEN);
2064 if (value != target)
2065 emit_move_insn (target, value);
2068 /* Output a shift instruction for expression code CODE,
2069 with SHIFTED being the rtx for the value to shift,
2070 and AMOUNT the rtx for the amount to shift by.
2071 Store the result in the rtx TARGET, if that is convenient.
2072 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2073 Return the rtx for where the value is. */
2075 static rtx
2076 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2077 rtx amount, rtx target, int unsignedp)
2079 rtx op1, temp = 0;
2080 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2081 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2082 optab lshift_optab = ashl_optab;
2083 optab rshift_arith_optab = ashr_optab;
2084 optab rshift_uns_optab = lshr_optab;
2085 optab lrotate_optab = rotl_optab;
2086 optab rrotate_optab = rotr_optab;
2087 enum machine_mode op1_mode;
2088 int attempt;
2089 bool speed = optimize_insn_for_speed_p ();
2091 op1 = amount;
2092 op1_mode = GET_MODE (op1);
2094 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2095 shift amount is a vector, use the vector/vector shift patterns. */
2096 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2098 lshift_optab = vashl_optab;
2099 rshift_arith_optab = vashr_optab;
2100 rshift_uns_optab = vlshr_optab;
2101 lrotate_optab = vrotl_optab;
2102 rrotate_optab = vrotr_optab;
2105 /* Previously detected shift-counts computed by NEGATE_EXPR
2106 and shifted in the other direction; but that does not work
2107 on all machines. */
2109 if (SHIFT_COUNT_TRUNCATED)
2111 if (CONST_INT_P (op1)
2112 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2113 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2114 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2115 % GET_MODE_BITSIZE (mode));
2116 else if (GET_CODE (op1) == SUBREG
2117 && subreg_lowpart_p (op1)
2118 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))
2119 && INTEGRAL_MODE_P (GET_MODE (op1)))
2120 op1 = SUBREG_REG (op1);
2123 if (op1 == const0_rtx)
2124 return shifted;
2126 /* Check whether its cheaper to implement a left shift by a constant
2127 bit count by a sequence of additions. */
2128 if (code == LSHIFT_EXPR
2129 && CONST_INT_P (op1)
2130 && INTVAL (op1) > 0
2131 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2132 && INTVAL (op1) < MAX_BITS_PER_WORD
2133 && (shift_cost (speed, mode, INTVAL (op1))
2134 > INTVAL (op1) * add_cost (speed, mode))
2135 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2137 int i;
2138 for (i = 0; i < INTVAL (op1); i++)
2140 temp = force_reg (mode, shifted);
2141 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2142 unsignedp, OPTAB_LIB_WIDEN);
2144 return shifted;
2147 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2149 enum optab_methods methods;
2151 if (attempt == 0)
2152 methods = OPTAB_DIRECT;
2153 else if (attempt == 1)
2154 methods = OPTAB_WIDEN;
2155 else
2156 methods = OPTAB_LIB_WIDEN;
2158 if (rotate)
2160 /* Widening does not work for rotation. */
2161 if (methods == OPTAB_WIDEN)
2162 continue;
2163 else if (methods == OPTAB_LIB_WIDEN)
2165 /* If we have been unable to open-code this by a rotation,
2166 do it as the IOR of two shifts. I.e., to rotate A
2167 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2168 where C is the bitsize of A.
2170 It is theoretically possible that the target machine might
2171 not be able to perform either shift and hence we would
2172 be making two libcalls rather than just the one for the
2173 shift (similarly if IOR could not be done). We will allow
2174 this extremely unlikely lossage to avoid complicating the
2175 code below. */
2177 rtx subtarget = target == shifted ? 0 : target;
2178 rtx new_amount, other_amount;
2179 rtx temp1;
2181 new_amount = op1;
2182 if (CONST_INT_P (op1))
2183 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2184 - INTVAL (op1));
2185 else
2186 other_amount
2187 = simplify_gen_binary (MINUS, GET_MODE (op1),
2188 GEN_INT (GET_MODE_PRECISION (mode)),
2189 op1);
2191 shifted = force_reg (mode, shifted);
2193 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2194 mode, shifted, new_amount, 0, 1);
2195 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2196 mode, shifted, other_amount,
2197 subtarget, 1);
2198 return expand_binop (mode, ior_optab, temp, temp1, target,
2199 unsignedp, methods);
2202 temp = expand_binop (mode,
2203 left ? lrotate_optab : rrotate_optab,
2204 shifted, op1, target, unsignedp, methods);
2206 else if (unsignedp)
2207 temp = expand_binop (mode,
2208 left ? lshift_optab : rshift_uns_optab,
2209 shifted, op1, target, unsignedp, methods);
2211 /* Do arithmetic shifts.
2212 Also, if we are going to widen the operand, we can just as well
2213 use an arithmetic right-shift instead of a logical one. */
2214 if (temp == 0 && ! rotate
2215 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2217 enum optab_methods methods1 = methods;
2219 /* If trying to widen a log shift to an arithmetic shift,
2220 don't accept an arithmetic shift of the same size. */
2221 if (unsignedp)
2222 methods1 = OPTAB_MUST_WIDEN;
2224 /* Arithmetic shift */
2226 temp = expand_binop (mode,
2227 left ? lshift_optab : rshift_arith_optab,
2228 shifted, op1, target, unsignedp, methods1);
2231 /* We used to try extzv here for logical right shifts, but that was
2232 only useful for one machine, the VAX, and caused poor code
2233 generation there for lshrdi3, so the code was deleted and a
2234 define_expand for lshrsi3 was added to vax.md. */
2237 gcc_assert (temp);
2238 return temp;
2241 /* Output a shift instruction for expression code CODE,
2242 with SHIFTED being the rtx for the value to shift,
2243 and AMOUNT the amount to shift by.
2244 Store the result in the rtx TARGET, if that is convenient.
2245 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2246 Return the rtx for where the value is. */
2249 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2250 int amount, rtx target, int unsignedp)
2252 return expand_shift_1 (code, mode,
2253 shifted, GEN_INT (amount), target, unsignedp);
2256 /* Output a shift instruction for expression code CODE,
2257 with SHIFTED being the rtx for the value to shift,
2258 and AMOUNT the tree for the amount to shift by.
2259 Store the result in the rtx TARGET, if that is convenient.
2260 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2261 Return the rtx for where the value is. */
2264 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2265 tree amount, rtx target, int unsignedp)
2267 return expand_shift_1 (code, mode,
2268 shifted, expand_normal (amount), target, unsignedp);
2272 /* Indicates the type of fixup needed after a constant multiplication.
2273 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2274 the result should be negated, and ADD_VARIANT means that the
2275 multiplicand should be added to the result. */
2276 enum mult_variant {basic_variant, negate_variant, add_variant};
2278 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2279 const struct mult_cost *, enum machine_mode mode);
2280 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2281 struct algorithm *, enum mult_variant *, int);
2282 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2283 const struct algorithm *, enum mult_variant);
2284 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2285 static rtx extract_high_half (enum machine_mode, rtx);
2286 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2287 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2288 int, int);
2289 /* Compute and return the best algorithm for multiplying by T.
2290 The algorithm must cost less than cost_limit
2291 If retval.cost >= COST_LIMIT, no algorithm was found and all
2292 other field of the returned struct are undefined.
2293 MODE is the machine mode of the multiplication. */
2295 static void
2296 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2297 const struct mult_cost *cost_limit, enum machine_mode mode)
2299 int m;
2300 struct algorithm *alg_in, *best_alg;
2301 struct mult_cost best_cost;
2302 struct mult_cost new_limit;
2303 int op_cost, op_latency;
2304 unsigned HOST_WIDE_INT orig_t = t;
2305 unsigned HOST_WIDE_INT q;
2306 int maxm, hash_index;
2307 bool cache_hit = false;
2308 enum alg_code cache_alg = alg_zero;
2309 bool speed = optimize_insn_for_speed_p ();
2310 enum machine_mode imode;
2311 struct alg_hash_entry *entry_ptr;
2313 /* Indicate that no algorithm is yet found. If no algorithm
2314 is found, this value will be returned and indicate failure. */
2315 alg_out->cost.cost = cost_limit->cost + 1;
2316 alg_out->cost.latency = cost_limit->latency + 1;
2318 if (cost_limit->cost < 0
2319 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2320 return;
2322 /* Be prepared for vector modes. */
2323 imode = GET_MODE_INNER (mode);
2324 if (imode == VOIDmode)
2325 imode = mode;
2327 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2329 /* Restrict the bits of "t" to the multiplication's mode. */
2330 t &= GET_MODE_MASK (imode);
2332 /* t == 1 can be done in zero cost. */
2333 if (t == 1)
2335 alg_out->ops = 1;
2336 alg_out->cost.cost = 0;
2337 alg_out->cost.latency = 0;
2338 alg_out->op[0] = alg_m;
2339 return;
2342 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2343 fail now. */
2344 if (t == 0)
2346 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2347 return;
2348 else
2350 alg_out->ops = 1;
2351 alg_out->cost.cost = zero_cost (speed);
2352 alg_out->cost.latency = zero_cost (speed);
2353 alg_out->op[0] = alg_zero;
2354 return;
2358 /* We'll be needing a couple extra algorithm structures now. */
2360 alg_in = XALLOCA (struct algorithm);
2361 best_alg = XALLOCA (struct algorithm);
2362 best_cost = *cost_limit;
2364 /* Compute the hash index. */
2365 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2367 /* See if we already know what to do for T. */
2368 entry_ptr = alg_hash_entry_ptr (hash_index);
2369 if (entry_ptr->t == t
2370 && entry_ptr->mode == mode
2371 && entry_ptr->mode == mode
2372 && entry_ptr->speed == speed
2373 && entry_ptr->alg != alg_unknown)
2375 cache_alg = entry_ptr->alg;
2377 if (cache_alg == alg_impossible)
2379 /* The cache tells us that it's impossible to synthesize
2380 multiplication by T within entry_ptr->cost. */
2381 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2382 /* COST_LIMIT is at least as restrictive as the one
2383 recorded in the hash table, in which case we have no
2384 hope of synthesizing a multiplication. Just
2385 return. */
2386 return;
2388 /* If we get here, COST_LIMIT is less restrictive than the
2389 one recorded in the hash table, so we may be able to
2390 synthesize a multiplication. Proceed as if we didn't
2391 have the cache entry. */
2393 else
2395 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2396 /* The cached algorithm shows that this multiplication
2397 requires more cost than COST_LIMIT. Just return. This
2398 way, we don't clobber this cache entry with
2399 alg_impossible but retain useful information. */
2400 return;
2402 cache_hit = true;
2404 switch (cache_alg)
2406 case alg_shift:
2407 goto do_alg_shift;
2409 case alg_add_t_m2:
2410 case alg_sub_t_m2:
2411 goto do_alg_addsub_t_m2;
2413 case alg_add_factor:
2414 case alg_sub_factor:
2415 goto do_alg_addsub_factor;
2417 case alg_add_t2_m:
2418 goto do_alg_add_t2_m;
2420 case alg_sub_t2_m:
2421 goto do_alg_sub_t2_m;
2423 default:
2424 gcc_unreachable ();
2429 /* If we have a group of zero bits at the low-order part of T, try
2430 multiplying by the remaining bits and then doing a shift. */
2432 if ((t & 1) == 0)
2434 do_alg_shift:
2435 m = floor_log2 (t & -t); /* m = number of low zero bits */
2436 if (m < maxm)
2438 q = t >> m;
2439 /* The function expand_shift will choose between a shift and
2440 a sequence of additions, so the observed cost is given as
2441 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2442 op_cost = m * add_cost (speed, mode);
2443 if (shift_cost (speed, mode, m) < op_cost)
2444 op_cost = shift_cost (speed, mode, m);
2445 new_limit.cost = best_cost.cost - op_cost;
2446 new_limit.latency = best_cost.latency - op_cost;
2447 synth_mult (alg_in, q, &new_limit, mode);
2449 alg_in->cost.cost += op_cost;
2450 alg_in->cost.latency += op_cost;
2451 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2453 struct algorithm *x;
2454 best_cost = alg_in->cost;
2455 x = alg_in, alg_in = best_alg, best_alg = x;
2456 best_alg->log[best_alg->ops] = m;
2457 best_alg->op[best_alg->ops] = alg_shift;
2460 /* See if treating ORIG_T as a signed number yields a better
2461 sequence. Try this sequence only for a negative ORIG_T
2462 as it would be useless for a non-negative ORIG_T. */
2463 if ((HOST_WIDE_INT) orig_t < 0)
2465 /* Shift ORIG_T as follows because a right shift of a
2466 negative-valued signed type is implementation
2467 defined. */
2468 q = ~(~orig_t >> m);
2469 /* The function expand_shift will choose between a shift
2470 and a sequence of additions, so the observed cost is
2471 given as MIN (m * add_cost(speed, mode),
2472 shift_cost(speed, mode, m)). */
2473 op_cost = m * add_cost (speed, mode);
2474 if (shift_cost (speed, mode, m) < op_cost)
2475 op_cost = shift_cost (speed, mode, m);
2476 new_limit.cost = best_cost.cost - op_cost;
2477 new_limit.latency = best_cost.latency - op_cost;
2478 synth_mult (alg_in, q, &new_limit, mode);
2480 alg_in->cost.cost += op_cost;
2481 alg_in->cost.latency += op_cost;
2482 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2484 struct algorithm *x;
2485 best_cost = alg_in->cost;
2486 x = alg_in, alg_in = best_alg, best_alg = x;
2487 best_alg->log[best_alg->ops] = m;
2488 best_alg->op[best_alg->ops] = alg_shift;
2492 if (cache_hit)
2493 goto done;
2496 /* If we have an odd number, add or subtract one. */
2497 if ((t & 1) != 0)
2499 unsigned HOST_WIDE_INT w;
2501 do_alg_addsub_t_m2:
2502 for (w = 1; (w & t) != 0; w <<= 1)
2504 /* If T was -1, then W will be zero after the loop. This is another
2505 case where T ends with ...111. Handling this with (T + 1) and
2506 subtract 1 produces slightly better code and results in algorithm
2507 selection much faster than treating it like the ...0111 case
2508 below. */
2509 if (w == 0
2510 || (w > 2
2511 /* Reject the case where t is 3.
2512 Thus we prefer addition in that case. */
2513 && t != 3))
2515 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2517 op_cost = add_cost (speed, mode);
2518 new_limit.cost = best_cost.cost - op_cost;
2519 new_limit.latency = best_cost.latency - op_cost;
2520 synth_mult (alg_in, t + 1, &new_limit, mode);
2522 alg_in->cost.cost += op_cost;
2523 alg_in->cost.latency += op_cost;
2524 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2526 struct algorithm *x;
2527 best_cost = alg_in->cost;
2528 x = alg_in, alg_in = best_alg, best_alg = x;
2529 best_alg->log[best_alg->ops] = 0;
2530 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2533 else
2535 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2537 op_cost = add_cost (speed, mode);
2538 new_limit.cost = best_cost.cost - op_cost;
2539 new_limit.latency = best_cost.latency - op_cost;
2540 synth_mult (alg_in, t - 1, &new_limit, mode);
2542 alg_in->cost.cost += op_cost;
2543 alg_in->cost.latency += op_cost;
2544 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2546 struct algorithm *x;
2547 best_cost = alg_in->cost;
2548 x = alg_in, alg_in = best_alg, best_alg = x;
2549 best_alg->log[best_alg->ops] = 0;
2550 best_alg->op[best_alg->ops] = alg_add_t_m2;
2554 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2555 quickly with a - a * n for some appropriate constant n. */
2556 m = exact_log2 (-orig_t + 1);
2557 if (m >= 0 && m < maxm)
2559 op_cost = shiftsub1_cost (speed, mode, m);
2560 new_limit.cost = best_cost.cost - op_cost;
2561 new_limit.latency = best_cost.latency - op_cost;
2562 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2563 &new_limit, mode);
2565 alg_in->cost.cost += op_cost;
2566 alg_in->cost.latency += op_cost;
2567 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2569 struct algorithm *x;
2570 best_cost = alg_in->cost;
2571 x = alg_in, alg_in = best_alg, best_alg = x;
2572 best_alg->log[best_alg->ops] = m;
2573 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2577 if (cache_hit)
2578 goto done;
2581 /* Look for factors of t of the form
2582 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2583 If we find such a factor, we can multiply by t using an algorithm that
2584 multiplies by q, shift the result by m and add/subtract it to itself.
2586 We search for large factors first and loop down, even if large factors
2587 are less probable than small; if we find a large factor we will find a
2588 good sequence quickly, and therefore be able to prune (by decreasing
2589 COST_LIMIT) the search. */
2591 do_alg_addsub_factor:
2592 for (m = floor_log2 (t - 1); m >= 2; m--)
2594 unsigned HOST_WIDE_INT d;
2596 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2597 if (t % d == 0 && t > d && m < maxm
2598 && (!cache_hit || cache_alg == alg_add_factor))
2600 /* If the target has a cheap shift-and-add instruction use
2601 that in preference to a shift insn followed by an add insn.
2602 Assume that the shift-and-add is "atomic" with a latency
2603 equal to its cost, otherwise assume that on superscalar
2604 hardware the shift may be executed concurrently with the
2605 earlier steps in the algorithm. */
2606 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2607 if (shiftadd_cost (speed, mode, m) < op_cost)
2609 op_cost = shiftadd_cost (speed, mode, m);
2610 op_latency = op_cost;
2612 else
2613 op_latency = add_cost (speed, mode);
2615 new_limit.cost = best_cost.cost - op_cost;
2616 new_limit.latency = best_cost.latency - op_latency;
2617 synth_mult (alg_in, t / d, &new_limit, mode);
2619 alg_in->cost.cost += op_cost;
2620 alg_in->cost.latency += op_latency;
2621 if (alg_in->cost.latency < op_cost)
2622 alg_in->cost.latency = op_cost;
2623 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2625 struct algorithm *x;
2626 best_cost = alg_in->cost;
2627 x = alg_in, alg_in = best_alg, best_alg = x;
2628 best_alg->log[best_alg->ops] = m;
2629 best_alg->op[best_alg->ops] = alg_add_factor;
2631 /* Other factors will have been taken care of in the recursion. */
2632 break;
2635 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2636 if (t % d == 0 && t > d && m < maxm
2637 && (!cache_hit || cache_alg == alg_sub_factor))
2639 /* If the target has a cheap shift-and-subtract insn use
2640 that in preference to a shift insn followed by a sub insn.
2641 Assume that the shift-and-sub is "atomic" with a latency
2642 equal to it's cost, otherwise assume that on superscalar
2643 hardware the shift may be executed concurrently with the
2644 earlier steps in the algorithm. */
2645 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2646 if (shiftsub0_cost (speed, mode, m) < op_cost)
2648 op_cost = shiftsub0_cost (speed, mode, m);
2649 op_latency = op_cost;
2651 else
2652 op_latency = add_cost (speed, mode);
2654 new_limit.cost = best_cost.cost - op_cost;
2655 new_limit.latency = best_cost.latency - op_latency;
2656 synth_mult (alg_in, t / d, &new_limit, mode);
2658 alg_in->cost.cost += op_cost;
2659 alg_in->cost.latency += op_latency;
2660 if (alg_in->cost.latency < op_cost)
2661 alg_in->cost.latency = op_cost;
2662 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2664 struct algorithm *x;
2665 best_cost = alg_in->cost;
2666 x = alg_in, alg_in = best_alg, best_alg = x;
2667 best_alg->log[best_alg->ops] = m;
2668 best_alg->op[best_alg->ops] = alg_sub_factor;
2670 break;
2673 if (cache_hit)
2674 goto done;
2676 /* Try shift-and-add (load effective address) instructions,
2677 i.e. do a*3, a*5, a*9. */
2678 if ((t & 1) != 0)
2680 do_alg_add_t2_m:
2681 q = t - 1;
2682 q = q & -q;
2683 m = exact_log2 (q);
2684 if (m >= 0 && m < maxm)
2686 op_cost = shiftadd_cost (speed, mode, m);
2687 new_limit.cost = best_cost.cost - op_cost;
2688 new_limit.latency = best_cost.latency - op_cost;
2689 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2691 alg_in->cost.cost += op_cost;
2692 alg_in->cost.latency += op_cost;
2693 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2695 struct algorithm *x;
2696 best_cost = alg_in->cost;
2697 x = alg_in, alg_in = best_alg, best_alg = x;
2698 best_alg->log[best_alg->ops] = m;
2699 best_alg->op[best_alg->ops] = alg_add_t2_m;
2702 if (cache_hit)
2703 goto done;
2705 do_alg_sub_t2_m:
2706 q = t + 1;
2707 q = q & -q;
2708 m = exact_log2 (q);
2709 if (m >= 0 && m < maxm)
2711 op_cost = shiftsub0_cost (speed, mode, m);
2712 new_limit.cost = best_cost.cost - op_cost;
2713 new_limit.latency = best_cost.latency - op_cost;
2714 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2716 alg_in->cost.cost += op_cost;
2717 alg_in->cost.latency += op_cost;
2718 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2720 struct algorithm *x;
2721 best_cost = alg_in->cost;
2722 x = alg_in, alg_in = best_alg, best_alg = x;
2723 best_alg->log[best_alg->ops] = m;
2724 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2727 if (cache_hit)
2728 goto done;
2731 done:
2732 /* If best_cost has not decreased, we have not found any algorithm. */
2733 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2735 /* We failed to find an algorithm. Record alg_impossible for
2736 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2737 we are asked to find an algorithm for T within the same or
2738 lower COST_LIMIT, we can immediately return to the
2739 caller. */
2740 entry_ptr->t = t;
2741 entry_ptr->mode = mode;
2742 entry_ptr->speed = speed;
2743 entry_ptr->alg = alg_impossible;
2744 entry_ptr->cost = *cost_limit;
2745 return;
2748 /* Cache the result. */
2749 if (!cache_hit)
2751 entry_ptr->t = t;
2752 entry_ptr->mode = mode;
2753 entry_ptr->speed = speed;
2754 entry_ptr->alg = best_alg->op[best_alg->ops];
2755 entry_ptr->cost.cost = best_cost.cost;
2756 entry_ptr->cost.latency = best_cost.latency;
2759 /* If we are getting a too long sequence for `struct algorithm'
2760 to record, make this search fail. */
2761 if (best_alg->ops == MAX_BITS_PER_WORD)
2762 return;
2764 /* Copy the algorithm from temporary space to the space at alg_out.
2765 We avoid using structure assignment because the majority of
2766 best_alg is normally undefined, and this is a critical function. */
2767 alg_out->ops = best_alg->ops + 1;
2768 alg_out->cost = best_cost;
2769 memcpy (alg_out->op, best_alg->op,
2770 alg_out->ops * sizeof *alg_out->op);
2771 memcpy (alg_out->log, best_alg->log,
2772 alg_out->ops * sizeof *alg_out->log);
2775 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2776 Try three variations:
2778 - a shift/add sequence based on VAL itself
2779 - a shift/add sequence based on -VAL, followed by a negation
2780 - a shift/add sequence based on VAL - 1, followed by an addition.
2782 Return true if the cheapest of these cost less than MULT_COST,
2783 describing the algorithm in *ALG and final fixup in *VARIANT. */
2785 static bool
2786 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2787 struct algorithm *alg, enum mult_variant *variant,
2788 int mult_cost)
2790 struct algorithm alg2;
2791 struct mult_cost limit;
2792 int op_cost;
2793 bool speed = optimize_insn_for_speed_p ();
2795 /* Fail quickly for impossible bounds. */
2796 if (mult_cost < 0)
2797 return false;
2799 /* Ensure that mult_cost provides a reasonable upper bound.
2800 Any constant multiplication can be performed with less
2801 than 2 * bits additions. */
2802 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2803 if (mult_cost > op_cost)
2804 mult_cost = op_cost;
2806 *variant = basic_variant;
2807 limit.cost = mult_cost;
2808 limit.latency = mult_cost;
2809 synth_mult (alg, val, &limit, mode);
2811 /* This works only if the inverted value actually fits in an
2812 `unsigned int' */
2813 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2815 op_cost = neg_cost(speed, mode);
2816 if (MULT_COST_LESS (&alg->cost, mult_cost))
2818 limit.cost = alg->cost.cost - op_cost;
2819 limit.latency = alg->cost.latency - op_cost;
2821 else
2823 limit.cost = mult_cost - op_cost;
2824 limit.latency = mult_cost - op_cost;
2827 synth_mult (&alg2, -val, &limit, mode);
2828 alg2.cost.cost += op_cost;
2829 alg2.cost.latency += op_cost;
2830 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2831 *alg = alg2, *variant = negate_variant;
2834 /* This proves very useful for division-by-constant. */
2835 op_cost = add_cost (speed, mode);
2836 if (MULT_COST_LESS (&alg->cost, mult_cost))
2838 limit.cost = alg->cost.cost - op_cost;
2839 limit.latency = alg->cost.latency - op_cost;
2841 else
2843 limit.cost = mult_cost - op_cost;
2844 limit.latency = mult_cost - op_cost;
2847 synth_mult (&alg2, val - 1, &limit, mode);
2848 alg2.cost.cost += op_cost;
2849 alg2.cost.latency += op_cost;
2850 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2851 *alg = alg2, *variant = add_variant;
2853 return MULT_COST_LESS (&alg->cost, mult_cost);
2856 /* A subroutine of expand_mult, used for constant multiplications.
2857 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2858 convenient. Use the shift/add sequence described by ALG and apply
2859 the final fixup specified by VARIANT. */
2861 static rtx
2862 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2863 rtx target, const struct algorithm *alg,
2864 enum mult_variant variant)
2866 HOST_WIDE_INT val_so_far;
2867 rtx insn, accum, tem;
2868 int opno;
2869 enum machine_mode nmode;
2871 /* Avoid referencing memory over and over and invalid sharing
2872 on SUBREGs. */
2873 op0 = force_reg (mode, op0);
2875 /* ACCUM starts out either as OP0 or as a zero, depending on
2876 the first operation. */
2878 if (alg->op[0] == alg_zero)
2880 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2881 val_so_far = 0;
2883 else if (alg->op[0] == alg_m)
2885 accum = copy_to_mode_reg (mode, op0);
2886 val_so_far = 1;
2888 else
2889 gcc_unreachable ();
2891 for (opno = 1; opno < alg->ops; opno++)
2893 int log = alg->log[opno];
2894 rtx shift_subtarget = optimize ? 0 : accum;
2895 rtx add_target
2896 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2897 && !optimize)
2898 ? target : 0;
2899 rtx accum_target = optimize ? 0 : accum;
2900 rtx accum_inner;
2902 switch (alg->op[opno])
2904 case alg_shift:
2905 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2906 /* REG_EQUAL note will be attached to the following insn. */
2907 emit_move_insn (accum, tem);
2908 val_so_far <<= log;
2909 break;
2911 case alg_add_t_m2:
2912 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2913 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2914 add_target ? add_target : accum_target);
2915 val_so_far += (HOST_WIDE_INT) 1 << log;
2916 break;
2918 case alg_sub_t_m2:
2919 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2920 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2921 add_target ? add_target : accum_target);
2922 val_so_far -= (HOST_WIDE_INT) 1 << log;
2923 break;
2925 case alg_add_t2_m:
2926 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2927 log, shift_subtarget, 0);
2928 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2929 add_target ? add_target : accum_target);
2930 val_so_far = (val_so_far << log) + 1;
2931 break;
2933 case alg_sub_t2_m:
2934 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2935 log, shift_subtarget, 0);
2936 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2937 add_target ? add_target : accum_target);
2938 val_so_far = (val_so_far << log) - 1;
2939 break;
2941 case alg_add_factor:
2942 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2943 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2944 add_target ? add_target : accum_target);
2945 val_so_far += val_so_far << log;
2946 break;
2948 case alg_sub_factor:
2949 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2950 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2951 (add_target
2952 ? add_target : (optimize ? 0 : tem)));
2953 val_so_far = (val_so_far << log) - val_so_far;
2954 break;
2956 default:
2957 gcc_unreachable ();
2960 if (SCALAR_INT_MODE_P (mode))
2962 /* Write a REG_EQUAL note on the last insn so that we can cse
2963 multiplication sequences. Note that if ACCUM is a SUBREG,
2964 we've set the inner register and must properly indicate that. */
2965 tem = op0, nmode = mode;
2966 accum_inner = accum;
2967 if (GET_CODE (accum) == SUBREG)
2969 accum_inner = SUBREG_REG (accum);
2970 nmode = GET_MODE (accum_inner);
2971 tem = gen_lowpart (nmode, op0);
2974 insn = get_last_insn ();
2975 set_dst_reg_note (insn, REG_EQUAL,
2976 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
2977 accum_inner);
2981 if (variant == negate_variant)
2983 val_so_far = -val_so_far;
2984 accum = expand_unop (mode, neg_optab, accum, target, 0);
2986 else if (variant == add_variant)
2988 val_so_far = val_so_far + 1;
2989 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2992 /* Compare only the bits of val and val_so_far that are significant
2993 in the result mode, to avoid sign-/zero-extension confusion. */
2994 nmode = GET_MODE_INNER (mode);
2995 if (nmode == VOIDmode)
2996 nmode = mode;
2997 val &= GET_MODE_MASK (nmode);
2998 val_so_far &= GET_MODE_MASK (nmode);
2999 gcc_assert (val == val_so_far);
3001 return accum;
3004 /* Perform a multiplication and return an rtx for the result.
3005 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3006 TARGET is a suggestion for where to store the result (an rtx).
3008 We check specially for a constant integer as OP1.
3009 If you want this check for OP0 as well, then before calling
3010 you should swap the two operands if OP0 would be constant. */
3013 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3014 int unsignedp)
3016 enum mult_variant variant;
3017 struct algorithm algorithm;
3018 rtx scalar_op1;
3019 int max_cost;
3020 bool speed = optimize_insn_for_speed_p ();
3021 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3023 if (CONSTANT_P (op0))
3025 rtx temp = op0;
3026 op0 = op1;
3027 op1 = temp;
3030 /* For vectors, there are several simplifications that can be made if
3031 all elements of the vector constant are identical. */
3032 scalar_op1 = op1;
3033 if (GET_CODE (op1) == CONST_VECTOR)
3035 int i, n = CONST_VECTOR_NUNITS (op1);
3036 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3037 for (i = 1; i < n; ++i)
3038 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3039 goto skip_scalar;
3042 if (INTEGRAL_MODE_P (mode))
3044 rtx fake_reg;
3045 HOST_WIDE_INT coeff;
3046 bool is_neg;
3047 int mode_bitsize;
3049 if (op1 == CONST0_RTX (mode))
3050 return op1;
3051 if (op1 == CONST1_RTX (mode))
3052 return op0;
3053 if (op1 == CONSTM1_RTX (mode))
3054 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3055 op0, target, 0);
3057 if (do_trapv)
3058 goto skip_synth;
3060 /* These are the operations that are potentially turned into
3061 a sequence of shifts and additions. */
3062 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3064 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3065 less than or equal in size to `unsigned int' this doesn't matter.
3066 If the mode is larger than `unsigned int', then synth_mult works
3067 only if the constant value exactly fits in an `unsigned int' without
3068 any truncation. This means that multiplying by negative values does
3069 not work; results are off by 2^32 on a 32 bit machine. */
3071 if (CONST_INT_P (scalar_op1))
3073 coeff = INTVAL (scalar_op1);
3074 is_neg = coeff < 0;
3076 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3078 /* If we are multiplying in DImode, it may still be a win
3079 to try to work with shifts and adds. */
3080 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3081 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3083 coeff = CONST_DOUBLE_LOW (scalar_op1);
3084 is_neg = false;
3086 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3088 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3089 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3091 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3092 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3093 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3094 return expand_shift (LSHIFT_EXPR, mode, op0,
3095 shift, target, unsignedp);
3097 goto skip_synth;
3099 else
3100 goto skip_synth;
3102 else
3103 goto skip_synth;
3105 /* We used to test optimize here, on the grounds that it's better to
3106 produce a smaller program when -O is not used. But this causes
3107 such a terrible slowdown sometimes that it seems better to always
3108 use synth_mult. */
3110 /* Special case powers of two. */
3111 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3112 return expand_shift (LSHIFT_EXPR, mode, op0,
3113 floor_log2 (coeff), target, unsignedp);
3115 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3117 /* Attempt to handle multiplication of DImode values by negative
3118 coefficients, by performing the multiplication by a positive
3119 multiplier and then inverting the result. */
3120 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3122 /* Its safe to use -coeff even for INT_MIN, as the
3123 result is interpreted as an unsigned coefficient.
3124 Exclude cost of op0 from max_cost to match the cost
3125 calculation of the synth_mult. */
3126 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3127 - neg_cost(speed, mode));
3128 if (max_cost > 0
3129 && choose_mult_variant (mode, -coeff, &algorithm,
3130 &variant, max_cost))
3132 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3133 &algorithm, variant);
3134 return expand_unop (mode, neg_optab, temp, target, 0);
3136 goto skip_synth;
3139 /* Exclude cost of op0 from max_cost to match the cost
3140 calculation of the synth_mult. */
3141 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3142 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3143 return expand_mult_const (mode, op0, coeff, target,
3144 &algorithm, variant);
3146 skip_synth:
3148 /* Expand x*2.0 as x+x. */
3149 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3151 REAL_VALUE_TYPE d;
3152 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3154 if (REAL_VALUES_EQUAL (d, dconst2))
3156 op0 = force_reg (GET_MODE (op0), op0);
3157 return expand_binop (mode, add_optab, op0, op0,
3158 target, unsignedp, OPTAB_LIB_WIDEN);
3161 skip_scalar:
3163 /* This used to use umul_optab if unsigned, but for non-widening multiply
3164 there is no difference between signed and unsigned. */
3165 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3166 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3167 gcc_assert (op0);
3168 return op0;
3171 /* Return a cost estimate for multiplying a register by the given
3172 COEFFicient in the given MODE and SPEED. */
3175 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3177 int max_cost;
3178 struct algorithm algorithm;
3179 enum mult_variant variant;
3181 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3182 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3183 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3184 return algorithm.cost.cost;
3185 else
3186 return max_cost;
3189 /* Perform a widening multiplication and return an rtx for the result.
3190 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3191 TARGET is a suggestion for where to store the result (an rtx).
3192 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3193 or smul_widen_optab.
3195 We check specially for a constant integer as OP1, comparing the
3196 cost of a widening multiply against the cost of a sequence of shifts
3197 and adds. */
3200 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3201 int unsignedp, optab this_optab)
3203 bool speed = optimize_insn_for_speed_p ();
3204 rtx cop1;
3206 if (CONST_INT_P (op1)
3207 && GET_MODE (op0) != VOIDmode
3208 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3209 this_optab == umul_widen_optab))
3210 && CONST_INT_P (cop1)
3211 && (INTVAL (cop1) >= 0
3212 || HWI_COMPUTABLE_MODE_P (mode)))
3214 HOST_WIDE_INT coeff = INTVAL (cop1);
3215 int max_cost;
3216 enum mult_variant variant;
3217 struct algorithm algorithm;
3219 /* Special case powers of two. */
3220 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3222 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3223 return expand_shift (LSHIFT_EXPR, mode, op0,
3224 floor_log2 (coeff), target, unsignedp);
3227 /* Exclude cost of op0 from max_cost to match the cost
3228 calculation of the synth_mult. */
3229 max_cost = mul_widen_cost (speed, mode);
3230 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3231 max_cost))
3233 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3234 return expand_mult_const (mode, op0, coeff, target,
3235 &algorithm, variant);
3238 return expand_binop (mode, this_optab, op0, op1, target,
3239 unsignedp, OPTAB_LIB_WIDEN);
3242 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3243 replace division by D, and put the least significant N bits of the result
3244 in *MULTIPLIER_PTR and return the most significant bit.
3246 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3247 needed precision is in PRECISION (should be <= N).
3249 PRECISION should be as small as possible so this function can choose
3250 multiplier more freely.
3252 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3253 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3255 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3256 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3258 unsigned HOST_WIDE_INT
3259 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3260 unsigned HOST_WIDE_INT *multiplier_ptr,
3261 int *post_shift_ptr, int *lgup_ptr)
3263 double_int mhigh, mlow;
3264 int lgup, post_shift;
3265 int pow, pow2;
3267 /* lgup = ceil(log2(divisor)); */
3268 lgup = ceil_log2 (d);
3270 gcc_assert (lgup <= n);
3272 pow = n + lgup;
3273 pow2 = n + lgup - precision;
3275 /* We could handle this with some effort, but this case is much
3276 better handled directly with a scc insn, so rely on caller using
3277 that. */
3278 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3280 /* mlow = 2^(N + lgup)/d */
3281 double_int val = double_int_zero.set_bit (pow);
3282 mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3284 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3285 val |= double_int_zero.set_bit (pow2);
3286 mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3288 gcc_assert (!mhigh.high || val.high - d < d);
3289 gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3290 /* Assert that mlow < mhigh. */
3291 gcc_assert (mlow.ult (mhigh));
3293 /* If precision == N, then mlow, mhigh exceed 2^N
3294 (but they do not exceed 2^(N+1)). */
3296 /* Reduce to lowest terms. */
3297 for (post_shift = lgup; post_shift > 0; post_shift--)
3299 int shft = HOST_BITS_PER_WIDE_INT - 1;
3300 unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3301 unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3302 if (ml_lo >= mh_lo)
3303 break;
3305 mlow = double_int::from_uhwi (ml_lo);
3306 mhigh = double_int::from_uhwi (mh_lo);
3309 *post_shift_ptr = post_shift;
3310 *lgup_ptr = lgup;
3311 if (n < HOST_BITS_PER_WIDE_INT)
3313 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3314 *multiplier_ptr = mhigh.low & mask;
3315 return mhigh.low >= mask;
3317 else
3319 *multiplier_ptr = mhigh.low;
3320 return mhigh.high;
3324 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3325 congruent to 1 (mod 2**N). */
3327 static unsigned HOST_WIDE_INT
3328 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3330 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3332 /* The algorithm notes that the choice y = x satisfies
3333 x*y == 1 mod 2^3, since x is assumed odd.
3334 Each iteration doubles the number of bits of significance in y. */
3336 unsigned HOST_WIDE_INT mask;
3337 unsigned HOST_WIDE_INT y = x;
3338 int nbit = 3;
3340 mask = (n == HOST_BITS_PER_WIDE_INT
3341 ? ~(unsigned HOST_WIDE_INT) 0
3342 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3344 while (nbit < n)
3346 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3347 nbit *= 2;
3349 return y;
3352 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3353 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3354 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3355 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3356 become signed.
3358 The result is put in TARGET if that is convenient.
3360 MODE is the mode of operation. */
3363 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3364 rtx op1, rtx target, int unsignedp)
3366 rtx tem;
3367 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3369 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3370 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3371 tem = expand_and (mode, tem, op1, NULL_RTX);
3372 adj_operand
3373 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3374 adj_operand);
3376 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3377 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3378 tem = expand_and (mode, tem, op0, NULL_RTX);
3379 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3380 target);
3382 return target;
3385 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3387 static rtx
3388 extract_high_half (enum machine_mode mode, rtx op)
3390 enum machine_mode wider_mode;
3392 if (mode == word_mode)
3393 return gen_highpart (mode, op);
3395 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3397 wider_mode = GET_MODE_WIDER_MODE (mode);
3398 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3399 GET_MODE_BITSIZE (mode), 0, 1);
3400 return convert_modes (mode, wider_mode, op, 0);
3403 /* Like expmed_mult_highpart, but only consider using a multiplication
3404 optab. OP1 is an rtx for the constant operand. */
3406 static rtx
3407 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3408 rtx target, int unsignedp, int max_cost)
3410 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3411 enum machine_mode wider_mode;
3412 optab moptab;
3413 rtx tem;
3414 int size;
3415 bool speed = optimize_insn_for_speed_p ();
3417 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3419 wider_mode = GET_MODE_WIDER_MODE (mode);
3420 size = GET_MODE_BITSIZE (mode);
3422 /* Firstly, try using a multiplication insn that only generates the needed
3423 high part of the product, and in the sign flavor of unsignedp. */
3424 if (mul_highpart_cost (speed, mode) < max_cost)
3426 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3427 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3428 unsignedp, OPTAB_DIRECT);
3429 if (tem)
3430 return tem;
3433 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3434 Need to adjust the result after the multiplication. */
3435 if (size - 1 < BITS_PER_WORD
3436 && (mul_highpart_cost (speed, mode)
3437 + 2 * shift_cost (speed, mode, size-1)
3438 + 4 * add_cost (speed, mode) < max_cost))
3440 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3441 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3442 unsignedp, OPTAB_DIRECT);
3443 if (tem)
3444 /* We used the wrong signedness. Adjust the result. */
3445 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3446 tem, unsignedp);
3449 /* Try widening multiplication. */
3450 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3451 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3452 && mul_widen_cost (speed, wider_mode) < max_cost)
3454 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3455 unsignedp, OPTAB_WIDEN);
3456 if (tem)
3457 return extract_high_half (mode, tem);
3460 /* Try widening the mode and perform a non-widening multiplication. */
3461 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3462 && size - 1 < BITS_PER_WORD
3463 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3464 < max_cost))
3466 rtx insns, wop0, wop1;
3468 /* We need to widen the operands, for example to ensure the
3469 constant multiplier is correctly sign or zero extended.
3470 Use a sequence to clean-up any instructions emitted by
3471 the conversions if things don't work out. */
3472 start_sequence ();
3473 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3474 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3475 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3476 unsignedp, OPTAB_WIDEN);
3477 insns = get_insns ();
3478 end_sequence ();
3480 if (tem)
3482 emit_insn (insns);
3483 return extract_high_half (mode, tem);
3487 /* Try widening multiplication of opposite signedness, and adjust. */
3488 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3489 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3490 && size - 1 < BITS_PER_WORD
3491 && (mul_widen_cost (speed, wider_mode)
3492 + 2 * shift_cost (speed, mode, size-1)
3493 + 4 * add_cost (speed, mode) < max_cost))
3495 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3496 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3497 if (tem != 0)
3499 tem = extract_high_half (mode, tem);
3500 /* We used the wrong signedness. Adjust the result. */
3501 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3502 target, unsignedp);
3506 return 0;
3509 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3510 putting the high half of the result in TARGET if that is convenient,
3511 and return where the result is. If the operation can not be performed,
3512 0 is returned.
3514 MODE is the mode of operation and result.
3516 UNSIGNEDP nonzero means unsigned multiply.
3518 MAX_COST is the total allowed cost for the expanded RTL. */
3520 static rtx
3521 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3522 rtx target, int unsignedp, int max_cost)
3524 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3525 unsigned HOST_WIDE_INT cnst1;
3526 int extra_cost;
3527 bool sign_adjust = false;
3528 enum mult_variant variant;
3529 struct algorithm alg;
3530 rtx tem;
3531 bool speed = optimize_insn_for_speed_p ();
3533 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3534 /* We can't support modes wider than HOST_BITS_PER_INT. */
3535 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3537 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3539 /* We can't optimize modes wider than BITS_PER_WORD.
3540 ??? We might be able to perform double-word arithmetic if
3541 mode == word_mode, however all the cost calculations in
3542 synth_mult etc. assume single-word operations. */
3543 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3544 return expmed_mult_highpart_optab (mode, op0, op1, target,
3545 unsignedp, max_cost);
3547 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3549 /* Check whether we try to multiply by a negative constant. */
3550 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3552 sign_adjust = true;
3553 extra_cost += add_cost (speed, mode);
3556 /* See whether shift/add multiplication is cheap enough. */
3557 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3558 max_cost - extra_cost))
3560 /* See whether the specialized multiplication optabs are
3561 cheaper than the shift/add version. */
3562 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3563 alg.cost.cost + extra_cost);
3564 if (tem)
3565 return tem;
3567 tem = convert_to_mode (wider_mode, op0, unsignedp);
3568 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3569 tem = extract_high_half (mode, tem);
3571 /* Adjust result for signedness. */
3572 if (sign_adjust)
3573 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3575 return tem;
3577 return expmed_mult_highpart_optab (mode, op0, op1, target,
3578 unsignedp, max_cost);
3582 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3584 static rtx
3585 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3587 unsigned HOST_WIDE_INT masklow, maskhigh;
3588 rtx result, temp, shift, label;
3589 int logd;
3591 logd = floor_log2 (d);
3592 result = gen_reg_rtx (mode);
3594 /* Avoid conditional branches when they're expensive. */
3595 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3596 && optimize_insn_for_speed_p ())
3598 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3599 mode, 0, -1);
3600 if (signmask)
3602 signmask = force_reg (mode, signmask);
3603 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3604 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3606 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3607 which instruction sequence to use. If logical right shifts
3608 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3609 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3611 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3612 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3613 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3614 > COSTS_N_INSNS (2)))
3616 temp = expand_binop (mode, xor_optab, op0, signmask,
3617 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3618 temp = expand_binop (mode, sub_optab, temp, signmask,
3619 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3620 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3621 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3622 temp = expand_binop (mode, xor_optab, temp, signmask,
3623 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3624 temp = expand_binop (mode, sub_optab, temp, signmask,
3625 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3627 else
3629 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3630 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3631 signmask = force_reg (mode, signmask);
3633 temp = expand_binop (mode, add_optab, op0, signmask,
3634 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3635 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3636 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3637 temp = expand_binop (mode, sub_optab, temp, signmask,
3638 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3640 return temp;
3644 /* Mask contains the mode's signbit and the significant bits of the
3645 modulus. By including the signbit in the operation, many targets
3646 can avoid an explicit compare operation in the following comparison
3647 against zero. */
3649 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3650 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3652 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3653 maskhigh = -1;
3655 else
3656 maskhigh = (HOST_WIDE_INT) -1
3657 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3659 temp = expand_binop (mode, and_optab, op0,
3660 immed_double_const (masklow, maskhigh, mode),
3661 result, 1, OPTAB_LIB_WIDEN);
3662 if (temp != result)
3663 emit_move_insn (result, temp);
3665 label = gen_label_rtx ();
3666 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3668 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3669 0, OPTAB_LIB_WIDEN);
3670 masklow = (HOST_WIDE_INT) -1 << logd;
3671 maskhigh = -1;
3672 temp = expand_binop (mode, ior_optab, temp,
3673 immed_double_const (masklow, maskhigh, mode),
3674 result, 1, OPTAB_LIB_WIDEN);
3675 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3676 0, OPTAB_LIB_WIDEN);
3677 if (temp != result)
3678 emit_move_insn (result, temp);
3679 emit_label (label);
3680 return result;
3683 /* Expand signed division of OP0 by a power of two D in mode MODE.
3684 This routine is only called for positive values of D. */
3686 static rtx
3687 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3689 rtx temp, label;
3690 int logd;
3692 logd = floor_log2 (d);
3694 if (d == 2
3695 && BRANCH_COST (optimize_insn_for_speed_p (),
3696 false) >= 1)
3698 temp = gen_reg_rtx (mode);
3699 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3700 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3701 0, OPTAB_LIB_WIDEN);
3702 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3705 #ifdef HAVE_conditional_move
3706 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3707 >= 2)
3709 rtx temp2;
3711 /* ??? emit_conditional_move forces a stack adjustment via
3712 compare_from_rtx so, if the sequence is discarded, it will
3713 be lost. Do it now instead. */
3714 do_pending_stack_adjust ();
3716 start_sequence ();
3717 temp2 = copy_to_mode_reg (mode, op0);
3718 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3719 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3720 temp = force_reg (mode, temp);
3722 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3723 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3724 mode, temp, temp2, mode, 0);
3725 if (temp2)
3727 rtx seq = get_insns ();
3728 end_sequence ();
3729 emit_insn (seq);
3730 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3732 end_sequence ();
3734 #endif
3736 if (BRANCH_COST (optimize_insn_for_speed_p (),
3737 false) >= 2)
3739 int ushift = GET_MODE_BITSIZE (mode) - logd;
3741 temp = gen_reg_rtx (mode);
3742 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3743 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3744 > COSTS_N_INSNS (1))
3745 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3746 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3747 else
3748 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3749 ushift, NULL_RTX, 1);
3750 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3751 0, OPTAB_LIB_WIDEN);
3752 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3755 label = gen_label_rtx ();
3756 temp = copy_to_mode_reg (mode, op0);
3757 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3758 expand_inc (temp, GEN_INT (d - 1));
3759 emit_label (label);
3760 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3763 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3764 if that is convenient, and returning where the result is.
3765 You may request either the quotient or the remainder as the result;
3766 specify REM_FLAG nonzero to get the remainder.
3768 CODE is the expression code for which kind of division this is;
3769 it controls how rounding is done. MODE is the machine mode to use.
3770 UNSIGNEDP nonzero means do unsigned division. */
3772 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3773 and then correct it by or'ing in missing high bits
3774 if result of ANDI is nonzero.
3775 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3776 This could optimize to a bfexts instruction.
3777 But C doesn't use these operations, so their optimizations are
3778 left for later. */
3779 /* ??? For modulo, we don't actually need the highpart of the first product,
3780 the low part will do nicely. And for small divisors, the second multiply
3781 can also be a low-part only multiply or even be completely left out.
3782 E.g. to calculate the remainder of a division by 3 with a 32 bit
3783 multiply, multiply with 0x55555556 and extract the upper two bits;
3784 the result is exact for inputs up to 0x1fffffff.
3785 The input range can be reduced by using cross-sum rules.
3786 For odd divisors >= 3, the following table gives right shift counts
3787 so that if a number is shifted by an integer multiple of the given
3788 amount, the remainder stays the same:
3789 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3790 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3791 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3792 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3793 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3795 Cross-sum rules for even numbers can be derived by leaving as many bits
3796 to the right alone as the divisor has zeros to the right.
3797 E.g. if x is an unsigned 32 bit number:
3798 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3802 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3803 rtx op0, rtx op1, rtx target, int unsignedp)
3805 enum machine_mode compute_mode;
3806 rtx tquotient;
3807 rtx quotient = 0, remainder = 0;
3808 rtx last;
3809 int size;
3810 rtx insn;
3811 optab optab1, optab2;
3812 int op1_is_constant, op1_is_pow2 = 0;
3813 int max_cost, extra_cost;
3814 static HOST_WIDE_INT last_div_const = 0;
3815 static HOST_WIDE_INT ext_op1;
3816 bool speed = optimize_insn_for_speed_p ();
3818 op1_is_constant = CONST_INT_P (op1);
3819 if (op1_is_constant)
3821 ext_op1 = INTVAL (op1);
3822 if (unsignedp)
3823 ext_op1 &= GET_MODE_MASK (mode);
3824 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3825 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3829 This is the structure of expand_divmod:
3831 First comes code to fix up the operands so we can perform the operations
3832 correctly and efficiently.
3834 Second comes a switch statement with code specific for each rounding mode.
3835 For some special operands this code emits all RTL for the desired
3836 operation, for other cases, it generates only a quotient and stores it in
3837 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3838 to indicate that it has not done anything.
3840 Last comes code that finishes the operation. If QUOTIENT is set and
3841 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3842 QUOTIENT is not set, it is computed using trunc rounding.
3844 We try to generate special code for division and remainder when OP1 is a
3845 constant. If |OP1| = 2**n we can use shifts and some other fast
3846 operations. For other values of OP1, we compute a carefully selected
3847 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3848 by m.
3850 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3851 half of the product. Different strategies for generating the product are
3852 implemented in expmed_mult_highpart.
3854 If what we actually want is the remainder, we generate that by another
3855 by-constant multiplication and a subtraction. */
3857 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3858 code below will malfunction if we are, so check here and handle
3859 the special case if so. */
3860 if (op1 == const1_rtx)
3861 return rem_flag ? const0_rtx : op0;
3863 /* When dividing by -1, we could get an overflow.
3864 negv_optab can handle overflows. */
3865 if (! unsignedp && op1 == constm1_rtx)
3867 if (rem_flag)
3868 return const0_rtx;
3869 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3870 ? negv_optab : neg_optab, op0, target, 0);
3873 if (target
3874 /* Don't use the function value register as a target
3875 since we have to read it as well as write it,
3876 and function-inlining gets confused by this. */
3877 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3878 /* Don't clobber an operand while doing a multi-step calculation. */
3879 || ((rem_flag || op1_is_constant)
3880 && (reg_mentioned_p (target, op0)
3881 || (MEM_P (op0) && MEM_P (target))))
3882 || reg_mentioned_p (target, op1)
3883 || (MEM_P (op1) && MEM_P (target))))
3884 target = 0;
3886 /* Get the mode in which to perform this computation. Normally it will
3887 be MODE, but sometimes we can't do the desired operation in MODE.
3888 If so, pick a wider mode in which we can do the operation. Convert
3889 to that mode at the start to avoid repeated conversions.
3891 First see what operations we need. These depend on the expression
3892 we are evaluating. (We assume that divxx3 insns exist under the
3893 same conditions that modxx3 insns and that these insns don't normally
3894 fail. If these assumptions are not correct, we may generate less
3895 efficient code in some cases.)
3897 Then see if we find a mode in which we can open-code that operation
3898 (either a division, modulus, or shift). Finally, check for the smallest
3899 mode for which we can do the operation with a library call. */
3901 /* We might want to refine this now that we have division-by-constant
3902 optimization. Since expmed_mult_highpart tries so many variants, it is
3903 not straightforward to generalize this. Maybe we should make an array
3904 of possible modes in init_expmed? Save this for GCC 2.7. */
3906 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3907 ? (unsignedp ? lshr_optab : ashr_optab)
3908 : (unsignedp ? udiv_optab : sdiv_optab));
3909 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3910 ? optab1
3911 : (unsignedp ? udivmod_optab : sdivmod_optab));
3913 for (compute_mode = mode; compute_mode != VOIDmode;
3914 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3915 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3916 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3917 break;
3919 if (compute_mode == VOIDmode)
3920 for (compute_mode = mode; compute_mode != VOIDmode;
3921 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3922 if (optab_libfunc (optab1, compute_mode)
3923 || optab_libfunc (optab2, compute_mode))
3924 break;
3926 /* If we still couldn't find a mode, use MODE, but expand_binop will
3927 probably die. */
3928 if (compute_mode == VOIDmode)
3929 compute_mode = mode;
3931 if (target && GET_MODE (target) == compute_mode)
3932 tquotient = target;
3933 else
3934 tquotient = gen_reg_rtx (compute_mode);
3936 size = GET_MODE_BITSIZE (compute_mode);
3937 #if 0
3938 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3939 (mode), and thereby get better code when OP1 is a constant. Do that
3940 later. It will require going over all usages of SIZE below. */
3941 size = GET_MODE_BITSIZE (mode);
3942 #endif
3944 /* Only deduct something for a REM if the last divide done was
3945 for a different constant. Then set the constant of the last
3946 divide. */
3947 max_cost = (unsignedp
3948 ? udiv_cost (speed, compute_mode)
3949 : sdiv_cost (speed, compute_mode));
3950 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3951 && INTVAL (op1) == last_div_const))
3952 max_cost -= (mul_cost (speed, compute_mode)
3953 + add_cost (speed, compute_mode));
3955 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3957 /* Now convert to the best mode to use. */
3958 if (compute_mode != mode)
3960 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3961 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3963 /* convert_modes may have placed op1 into a register, so we
3964 must recompute the following. */
3965 op1_is_constant = CONST_INT_P (op1);
3966 op1_is_pow2 = (op1_is_constant
3967 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3968 || (! unsignedp
3969 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3972 /* If one of the operands is a volatile MEM, copy it into a register. */
3974 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3975 op0 = force_reg (compute_mode, op0);
3976 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3977 op1 = force_reg (compute_mode, op1);
3979 /* If we need the remainder or if OP1 is constant, we need to
3980 put OP0 in a register in case it has any queued subexpressions. */
3981 if (rem_flag || op1_is_constant)
3982 op0 = force_reg (compute_mode, op0);
3984 last = get_last_insn ();
3986 /* Promote floor rounding to trunc rounding for unsigned operations. */
3987 if (unsignedp)
3989 if (code == FLOOR_DIV_EXPR)
3990 code = TRUNC_DIV_EXPR;
3991 if (code == FLOOR_MOD_EXPR)
3992 code = TRUNC_MOD_EXPR;
3993 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3994 code = TRUNC_DIV_EXPR;
3997 if (op1 != const0_rtx)
3998 switch (code)
4000 case TRUNC_MOD_EXPR:
4001 case TRUNC_DIV_EXPR:
4002 if (op1_is_constant)
4004 if (unsignedp)
4006 unsigned HOST_WIDE_INT mh, ml;
4007 int pre_shift, post_shift;
4008 int dummy;
4009 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4010 & GET_MODE_MASK (compute_mode));
4012 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4014 pre_shift = floor_log2 (d);
4015 if (rem_flag)
4017 remainder
4018 = expand_binop (compute_mode, and_optab, op0,
4019 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4020 remainder, 1,
4021 OPTAB_LIB_WIDEN);
4022 if (remainder)
4023 return gen_lowpart (mode, remainder);
4025 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4026 pre_shift, tquotient, 1);
4028 else if (size <= HOST_BITS_PER_WIDE_INT)
4030 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4032 /* Most significant bit of divisor is set; emit an scc
4033 insn. */
4034 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4035 compute_mode, 1, 1);
4037 else
4039 /* Find a suitable multiplier and right shift count
4040 instead of multiplying with D. */
4042 mh = choose_multiplier (d, size, size,
4043 &ml, &post_shift, &dummy);
4045 /* If the suggested multiplier is more than SIZE bits,
4046 we can do better for even divisors, using an
4047 initial right shift. */
4048 if (mh != 0 && (d & 1) == 0)
4050 pre_shift = floor_log2 (d & -d);
4051 mh = choose_multiplier (d >> pre_shift, size,
4052 size - pre_shift,
4053 &ml, &post_shift, &dummy);
4054 gcc_assert (!mh);
4056 else
4057 pre_shift = 0;
4059 if (mh != 0)
4061 rtx t1, t2, t3, t4;
4063 if (post_shift - 1 >= BITS_PER_WORD)
4064 goto fail1;
4066 extra_cost
4067 = (shift_cost (speed, compute_mode, post_shift - 1)
4068 + shift_cost (speed, compute_mode, 1)
4069 + 2 * add_cost (speed, compute_mode));
4070 t1 = expmed_mult_highpart (compute_mode, op0,
4071 GEN_INT (ml),
4072 NULL_RTX, 1,
4073 max_cost - extra_cost);
4074 if (t1 == 0)
4075 goto fail1;
4076 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4077 op0, t1),
4078 NULL_RTX);
4079 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4080 t2, 1, NULL_RTX, 1);
4081 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4082 t1, t3),
4083 NULL_RTX);
4084 quotient = expand_shift
4085 (RSHIFT_EXPR, compute_mode, t4,
4086 post_shift - 1, tquotient, 1);
4088 else
4090 rtx t1, t2;
4092 if (pre_shift >= BITS_PER_WORD
4093 || post_shift >= BITS_PER_WORD)
4094 goto fail1;
4096 t1 = expand_shift
4097 (RSHIFT_EXPR, compute_mode, op0,
4098 pre_shift, NULL_RTX, 1);
4099 extra_cost
4100 = (shift_cost (speed, compute_mode, pre_shift)
4101 + shift_cost (speed, compute_mode, post_shift));
4102 t2 = expmed_mult_highpart (compute_mode, t1,
4103 GEN_INT (ml),
4104 NULL_RTX, 1,
4105 max_cost - extra_cost);
4106 if (t2 == 0)
4107 goto fail1;
4108 quotient = expand_shift
4109 (RSHIFT_EXPR, compute_mode, t2,
4110 post_shift, tquotient, 1);
4114 else /* Too wide mode to use tricky code */
4115 break;
4117 insn = get_last_insn ();
4118 if (insn != last)
4119 set_dst_reg_note (insn, REG_EQUAL,
4120 gen_rtx_UDIV (compute_mode, op0, op1),
4121 quotient);
4123 else /* TRUNC_DIV, signed */
4125 unsigned HOST_WIDE_INT ml;
4126 int lgup, post_shift;
4127 rtx mlr;
4128 HOST_WIDE_INT d = INTVAL (op1);
4129 unsigned HOST_WIDE_INT abs_d;
4131 /* Since d might be INT_MIN, we have to cast to
4132 unsigned HOST_WIDE_INT before negating to avoid
4133 undefined signed overflow. */
4134 abs_d = (d >= 0
4135 ? (unsigned HOST_WIDE_INT) d
4136 : - (unsigned HOST_WIDE_INT) d);
4138 /* n rem d = n rem -d */
4139 if (rem_flag && d < 0)
4141 d = abs_d;
4142 op1 = gen_int_mode (abs_d, compute_mode);
4145 if (d == 1)
4146 quotient = op0;
4147 else if (d == -1)
4148 quotient = expand_unop (compute_mode, neg_optab, op0,
4149 tquotient, 0);
4150 else if (HOST_BITS_PER_WIDE_INT >= size
4151 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4153 /* This case is not handled correctly below. */
4154 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4155 compute_mode, 1, 1);
4156 if (quotient == 0)
4157 goto fail1;
4159 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4160 && (rem_flag
4161 ? smod_pow2_cheap (speed, compute_mode)
4162 : sdiv_pow2_cheap (speed, compute_mode))
4163 /* We assume that cheap metric is true if the
4164 optab has an expander for this mode. */
4165 && ((optab_handler ((rem_flag ? smod_optab
4166 : sdiv_optab),
4167 compute_mode)
4168 != CODE_FOR_nothing)
4169 || (optab_handler (sdivmod_optab,
4170 compute_mode)
4171 != CODE_FOR_nothing)))
4173 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4175 if (rem_flag)
4177 remainder = expand_smod_pow2 (compute_mode, op0, d);
4178 if (remainder)
4179 return gen_lowpart (mode, remainder);
4182 if (sdiv_pow2_cheap (speed, compute_mode)
4183 && ((optab_handler (sdiv_optab, compute_mode)
4184 != CODE_FOR_nothing)
4185 || (optab_handler (sdivmod_optab, compute_mode)
4186 != CODE_FOR_nothing)))
4187 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4188 compute_mode, op0,
4189 gen_int_mode (abs_d,
4190 compute_mode),
4191 NULL_RTX, 0);
4192 else
4193 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4195 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4196 negate the quotient. */
4197 if (d < 0)
4199 insn = get_last_insn ();
4200 if (insn != last
4201 && abs_d < ((unsigned HOST_WIDE_INT) 1
4202 << (HOST_BITS_PER_WIDE_INT - 1)))
4203 set_dst_reg_note (insn, REG_EQUAL,
4204 gen_rtx_DIV (compute_mode, op0,
4205 gen_int_mode
4206 (abs_d,
4207 compute_mode)),
4208 quotient);
4210 quotient = expand_unop (compute_mode, neg_optab,
4211 quotient, quotient, 0);
4214 else if (size <= HOST_BITS_PER_WIDE_INT)
4216 choose_multiplier (abs_d, size, size - 1,
4217 &ml, &post_shift, &lgup);
4218 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4220 rtx t1, t2, t3;
4222 if (post_shift >= BITS_PER_WORD
4223 || size - 1 >= BITS_PER_WORD)
4224 goto fail1;
4226 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4227 + shift_cost (speed, compute_mode, size - 1)
4228 + add_cost (speed, compute_mode));
4229 t1 = expmed_mult_highpart (compute_mode, op0,
4230 GEN_INT (ml), NULL_RTX, 0,
4231 max_cost - extra_cost);
4232 if (t1 == 0)
4233 goto fail1;
4234 t2 = expand_shift
4235 (RSHIFT_EXPR, compute_mode, t1,
4236 post_shift, NULL_RTX, 0);
4237 t3 = expand_shift
4238 (RSHIFT_EXPR, compute_mode, op0,
4239 size - 1, NULL_RTX, 0);
4240 if (d < 0)
4241 quotient
4242 = force_operand (gen_rtx_MINUS (compute_mode,
4243 t3, t2),
4244 tquotient);
4245 else
4246 quotient
4247 = force_operand (gen_rtx_MINUS (compute_mode,
4248 t2, t3),
4249 tquotient);
4251 else
4253 rtx t1, t2, t3, t4;
4255 if (post_shift >= BITS_PER_WORD
4256 || size - 1 >= BITS_PER_WORD)
4257 goto fail1;
4259 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4260 mlr = gen_int_mode (ml, compute_mode);
4261 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4262 + shift_cost (speed, compute_mode, size - 1)
4263 + 2 * add_cost (speed, compute_mode));
4264 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4265 NULL_RTX, 0,
4266 max_cost - extra_cost);
4267 if (t1 == 0)
4268 goto fail1;
4269 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4270 t1, op0),
4271 NULL_RTX);
4272 t3 = expand_shift
4273 (RSHIFT_EXPR, compute_mode, t2,
4274 post_shift, NULL_RTX, 0);
4275 t4 = expand_shift
4276 (RSHIFT_EXPR, compute_mode, op0,
4277 size - 1, NULL_RTX, 0);
4278 if (d < 0)
4279 quotient
4280 = force_operand (gen_rtx_MINUS (compute_mode,
4281 t4, t3),
4282 tquotient);
4283 else
4284 quotient
4285 = force_operand (gen_rtx_MINUS (compute_mode,
4286 t3, t4),
4287 tquotient);
4290 else /* Too wide mode to use tricky code */
4291 break;
4293 insn = get_last_insn ();
4294 if (insn != last)
4295 set_dst_reg_note (insn, REG_EQUAL,
4296 gen_rtx_DIV (compute_mode, op0, op1),
4297 quotient);
4299 break;
4301 fail1:
4302 delete_insns_since (last);
4303 break;
4305 case FLOOR_DIV_EXPR:
4306 case FLOOR_MOD_EXPR:
4307 /* We will come here only for signed operations. */
4308 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4310 unsigned HOST_WIDE_INT mh, ml;
4311 int pre_shift, lgup, post_shift;
4312 HOST_WIDE_INT d = INTVAL (op1);
4314 if (d > 0)
4316 /* We could just as easily deal with negative constants here,
4317 but it does not seem worth the trouble for GCC 2.6. */
4318 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4320 pre_shift = floor_log2 (d);
4321 if (rem_flag)
4323 remainder = expand_binop (compute_mode, and_optab, op0,
4324 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4325 remainder, 0, OPTAB_LIB_WIDEN);
4326 if (remainder)
4327 return gen_lowpart (mode, remainder);
4329 quotient = expand_shift
4330 (RSHIFT_EXPR, compute_mode, op0,
4331 pre_shift, tquotient, 0);
4333 else
4335 rtx t1, t2, t3, t4;
4337 mh = choose_multiplier (d, size, size - 1,
4338 &ml, &post_shift, &lgup);
4339 gcc_assert (!mh);
4341 if (post_shift < BITS_PER_WORD
4342 && size - 1 < BITS_PER_WORD)
4344 t1 = expand_shift
4345 (RSHIFT_EXPR, compute_mode, op0,
4346 size - 1, NULL_RTX, 0);
4347 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4348 NULL_RTX, 0, OPTAB_WIDEN);
4349 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4350 + shift_cost (speed, compute_mode, size - 1)
4351 + 2 * add_cost (speed, compute_mode));
4352 t3 = expmed_mult_highpart (compute_mode, t2,
4353 GEN_INT (ml), NULL_RTX, 1,
4354 max_cost - extra_cost);
4355 if (t3 != 0)
4357 t4 = expand_shift
4358 (RSHIFT_EXPR, compute_mode, t3,
4359 post_shift, NULL_RTX, 1);
4360 quotient = expand_binop (compute_mode, xor_optab,
4361 t4, t1, tquotient, 0,
4362 OPTAB_WIDEN);
4367 else
4369 rtx nsign, t1, t2, t3, t4;
4370 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4371 op0, constm1_rtx), NULL_RTX);
4372 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4373 0, OPTAB_WIDEN);
4374 nsign = expand_shift
4375 (RSHIFT_EXPR, compute_mode, t2,
4376 size - 1, NULL_RTX, 0);
4377 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4378 NULL_RTX);
4379 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4380 NULL_RTX, 0);
4381 if (t4)
4383 rtx t5;
4384 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4385 NULL_RTX, 0);
4386 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4387 t4, t5),
4388 tquotient);
4393 if (quotient != 0)
4394 break;
4395 delete_insns_since (last);
4397 /* Try using an instruction that produces both the quotient and
4398 remainder, using truncation. We can easily compensate the quotient
4399 or remainder to get floor rounding, once we have the remainder.
4400 Notice that we compute also the final remainder value here,
4401 and return the result right away. */
4402 if (target == 0 || GET_MODE (target) != compute_mode)
4403 target = gen_reg_rtx (compute_mode);
4405 if (rem_flag)
4407 remainder
4408 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4409 quotient = gen_reg_rtx (compute_mode);
4411 else
4413 quotient
4414 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4415 remainder = gen_reg_rtx (compute_mode);
4418 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4419 quotient, remainder, 0))
4421 /* This could be computed with a branch-less sequence.
4422 Save that for later. */
4423 rtx tem;
4424 rtx label = gen_label_rtx ();
4425 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4426 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4427 NULL_RTX, 0, OPTAB_WIDEN);
4428 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4429 expand_dec (quotient, const1_rtx);
4430 expand_inc (remainder, op1);
4431 emit_label (label);
4432 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4435 /* No luck with division elimination or divmod. Have to do it
4436 by conditionally adjusting op0 *and* the result. */
4438 rtx label1, label2, label3, label4, label5;
4439 rtx adjusted_op0;
4440 rtx tem;
4442 quotient = gen_reg_rtx (compute_mode);
4443 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4444 label1 = gen_label_rtx ();
4445 label2 = gen_label_rtx ();
4446 label3 = gen_label_rtx ();
4447 label4 = gen_label_rtx ();
4448 label5 = gen_label_rtx ();
4449 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4450 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4451 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4452 quotient, 0, OPTAB_LIB_WIDEN);
4453 if (tem != quotient)
4454 emit_move_insn (quotient, tem);
4455 emit_jump_insn (gen_jump (label5));
4456 emit_barrier ();
4457 emit_label (label1);
4458 expand_inc (adjusted_op0, const1_rtx);
4459 emit_jump_insn (gen_jump (label4));
4460 emit_barrier ();
4461 emit_label (label2);
4462 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4463 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4464 quotient, 0, OPTAB_LIB_WIDEN);
4465 if (tem != quotient)
4466 emit_move_insn (quotient, tem);
4467 emit_jump_insn (gen_jump (label5));
4468 emit_barrier ();
4469 emit_label (label3);
4470 expand_dec (adjusted_op0, const1_rtx);
4471 emit_label (label4);
4472 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4473 quotient, 0, OPTAB_LIB_WIDEN);
4474 if (tem != quotient)
4475 emit_move_insn (quotient, tem);
4476 expand_dec (quotient, const1_rtx);
4477 emit_label (label5);
4479 break;
4481 case CEIL_DIV_EXPR:
4482 case CEIL_MOD_EXPR:
4483 if (unsignedp)
4485 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4487 rtx t1, t2, t3;
4488 unsigned HOST_WIDE_INT d = INTVAL (op1);
4489 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4490 floor_log2 (d), tquotient, 1);
4491 t2 = expand_binop (compute_mode, and_optab, op0,
4492 GEN_INT (d - 1),
4493 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4494 t3 = gen_reg_rtx (compute_mode);
4495 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4496 compute_mode, 1, 1);
4497 if (t3 == 0)
4499 rtx lab;
4500 lab = gen_label_rtx ();
4501 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4502 expand_inc (t1, const1_rtx);
4503 emit_label (lab);
4504 quotient = t1;
4506 else
4507 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4508 t1, t3),
4509 tquotient);
4510 break;
4513 /* Try using an instruction that produces both the quotient and
4514 remainder, using truncation. We can easily compensate the
4515 quotient or remainder to get ceiling rounding, once we have the
4516 remainder. Notice that we compute also the final remainder
4517 value here, and return the result right away. */
4518 if (target == 0 || GET_MODE (target) != compute_mode)
4519 target = gen_reg_rtx (compute_mode);
4521 if (rem_flag)
4523 remainder = (REG_P (target)
4524 ? target : gen_reg_rtx (compute_mode));
4525 quotient = gen_reg_rtx (compute_mode);
4527 else
4529 quotient = (REG_P (target)
4530 ? target : gen_reg_rtx (compute_mode));
4531 remainder = gen_reg_rtx (compute_mode);
4534 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4535 remainder, 1))
4537 /* This could be computed with a branch-less sequence.
4538 Save that for later. */
4539 rtx label = gen_label_rtx ();
4540 do_cmp_and_jump (remainder, const0_rtx, EQ,
4541 compute_mode, label);
4542 expand_inc (quotient, const1_rtx);
4543 expand_dec (remainder, op1);
4544 emit_label (label);
4545 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4548 /* No luck with division elimination or divmod. Have to do it
4549 by conditionally adjusting op0 *and* the result. */
4551 rtx label1, label2;
4552 rtx adjusted_op0, tem;
4554 quotient = gen_reg_rtx (compute_mode);
4555 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4556 label1 = gen_label_rtx ();
4557 label2 = gen_label_rtx ();
4558 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4559 compute_mode, label1);
4560 emit_move_insn (quotient, const0_rtx);
4561 emit_jump_insn (gen_jump (label2));
4562 emit_barrier ();
4563 emit_label (label1);
4564 expand_dec (adjusted_op0, const1_rtx);
4565 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4566 quotient, 1, OPTAB_LIB_WIDEN);
4567 if (tem != quotient)
4568 emit_move_insn (quotient, tem);
4569 expand_inc (quotient, const1_rtx);
4570 emit_label (label2);
4573 else /* signed */
4575 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4576 && INTVAL (op1) >= 0)
4578 /* This is extremely similar to the code for the unsigned case
4579 above. For 2.7 we should merge these variants, but for
4580 2.6.1 I don't want to touch the code for unsigned since that
4581 get used in C. The signed case will only be used by other
4582 languages (Ada). */
4584 rtx t1, t2, t3;
4585 unsigned HOST_WIDE_INT d = INTVAL (op1);
4586 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4587 floor_log2 (d), tquotient, 0);
4588 t2 = expand_binop (compute_mode, and_optab, op0,
4589 GEN_INT (d - 1),
4590 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4591 t3 = gen_reg_rtx (compute_mode);
4592 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4593 compute_mode, 1, 1);
4594 if (t3 == 0)
4596 rtx lab;
4597 lab = gen_label_rtx ();
4598 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4599 expand_inc (t1, const1_rtx);
4600 emit_label (lab);
4601 quotient = t1;
4603 else
4604 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4605 t1, t3),
4606 tquotient);
4607 break;
4610 /* Try using an instruction that produces both the quotient and
4611 remainder, using truncation. We can easily compensate the
4612 quotient or remainder to get ceiling rounding, once we have the
4613 remainder. Notice that we compute also the final remainder
4614 value here, and return the result right away. */
4615 if (target == 0 || GET_MODE (target) != compute_mode)
4616 target = gen_reg_rtx (compute_mode);
4617 if (rem_flag)
4619 remainder= (REG_P (target)
4620 ? target : gen_reg_rtx (compute_mode));
4621 quotient = gen_reg_rtx (compute_mode);
4623 else
4625 quotient = (REG_P (target)
4626 ? target : gen_reg_rtx (compute_mode));
4627 remainder = gen_reg_rtx (compute_mode);
4630 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4631 remainder, 0))
4633 /* This could be computed with a branch-less sequence.
4634 Save that for later. */
4635 rtx tem;
4636 rtx label = gen_label_rtx ();
4637 do_cmp_and_jump (remainder, const0_rtx, EQ,
4638 compute_mode, label);
4639 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4640 NULL_RTX, 0, OPTAB_WIDEN);
4641 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4642 expand_inc (quotient, const1_rtx);
4643 expand_dec (remainder, op1);
4644 emit_label (label);
4645 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4648 /* No luck with division elimination or divmod. Have to do it
4649 by conditionally adjusting op0 *and* the result. */
4651 rtx label1, label2, label3, label4, label5;
4652 rtx adjusted_op0;
4653 rtx tem;
4655 quotient = gen_reg_rtx (compute_mode);
4656 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4657 label1 = gen_label_rtx ();
4658 label2 = gen_label_rtx ();
4659 label3 = gen_label_rtx ();
4660 label4 = gen_label_rtx ();
4661 label5 = gen_label_rtx ();
4662 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4663 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4664 compute_mode, label1);
4665 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4666 quotient, 0, OPTAB_LIB_WIDEN);
4667 if (tem != quotient)
4668 emit_move_insn (quotient, tem);
4669 emit_jump_insn (gen_jump (label5));
4670 emit_barrier ();
4671 emit_label (label1);
4672 expand_dec (adjusted_op0, const1_rtx);
4673 emit_jump_insn (gen_jump (label4));
4674 emit_barrier ();
4675 emit_label (label2);
4676 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4677 compute_mode, label3);
4678 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4679 quotient, 0, OPTAB_LIB_WIDEN);
4680 if (tem != quotient)
4681 emit_move_insn (quotient, tem);
4682 emit_jump_insn (gen_jump (label5));
4683 emit_barrier ();
4684 emit_label (label3);
4685 expand_inc (adjusted_op0, const1_rtx);
4686 emit_label (label4);
4687 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4688 quotient, 0, OPTAB_LIB_WIDEN);
4689 if (tem != quotient)
4690 emit_move_insn (quotient, tem);
4691 expand_inc (quotient, const1_rtx);
4692 emit_label (label5);
4695 break;
4697 case EXACT_DIV_EXPR:
4698 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4700 HOST_WIDE_INT d = INTVAL (op1);
4701 unsigned HOST_WIDE_INT ml;
4702 int pre_shift;
4703 rtx t1;
4705 pre_shift = floor_log2 (d & -d);
4706 ml = invert_mod2n (d >> pre_shift, size);
4707 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4708 pre_shift, NULL_RTX, unsignedp);
4709 quotient = expand_mult (compute_mode, t1,
4710 gen_int_mode (ml, compute_mode),
4711 NULL_RTX, 1);
4713 insn = get_last_insn ();
4714 set_dst_reg_note (insn, REG_EQUAL,
4715 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4716 compute_mode, op0, op1),
4717 quotient);
4719 break;
4721 case ROUND_DIV_EXPR:
4722 case ROUND_MOD_EXPR:
4723 if (unsignedp)
4725 rtx tem;
4726 rtx label;
4727 label = gen_label_rtx ();
4728 quotient = gen_reg_rtx (compute_mode);
4729 remainder = gen_reg_rtx (compute_mode);
4730 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4732 rtx tem;
4733 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4734 quotient, 1, OPTAB_LIB_WIDEN);
4735 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4736 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4737 remainder, 1, OPTAB_LIB_WIDEN);
4739 tem = plus_constant (compute_mode, op1, -1);
4740 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4741 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4742 expand_inc (quotient, const1_rtx);
4743 expand_dec (remainder, op1);
4744 emit_label (label);
4746 else
4748 rtx abs_rem, abs_op1, tem, mask;
4749 rtx label;
4750 label = gen_label_rtx ();
4751 quotient = gen_reg_rtx (compute_mode);
4752 remainder = gen_reg_rtx (compute_mode);
4753 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4755 rtx tem;
4756 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4757 quotient, 0, OPTAB_LIB_WIDEN);
4758 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4759 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4760 remainder, 0, OPTAB_LIB_WIDEN);
4762 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4763 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4764 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4765 1, NULL_RTX, 1);
4766 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4767 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4768 NULL_RTX, 0, OPTAB_WIDEN);
4769 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4770 size - 1, NULL_RTX, 0);
4771 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4772 NULL_RTX, 0, OPTAB_WIDEN);
4773 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4774 NULL_RTX, 0, OPTAB_WIDEN);
4775 expand_inc (quotient, tem);
4776 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4777 NULL_RTX, 0, OPTAB_WIDEN);
4778 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4779 NULL_RTX, 0, OPTAB_WIDEN);
4780 expand_dec (remainder, tem);
4781 emit_label (label);
4783 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4785 default:
4786 gcc_unreachable ();
4789 if (quotient == 0)
4791 if (target && GET_MODE (target) != compute_mode)
4792 target = 0;
4794 if (rem_flag)
4796 /* Try to produce the remainder without producing the quotient.
4797 If we seem to have a divmod pattern that does not require widening,
4798 don't try widening here. We should really have a WIDEN argument
4799 to expand_twoval_binop, since what we'd really like to do here is
4800 1) try a mod insn in compute_mode
4801 2) try a divmod insn in compute_mode
4802 3) try a div insn in compute_mode and multiply-subtract to get
4803 remainder
4804 4) try the same things with widening allowed. */
4805 remainder
4806 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4807 op0, op1, target,
4808 unsignedp,
4809 ((optab_handler (optab2, compute_mode)
4810 != CODE_FOR_nothing)
4811 ? OPTAB_DIRECT : OPTAB_WIDEN));
4812 if (remainder == 0)
4814 /* No luck there. Can we do remainder and divide at once
4815 without a library call? */
4816 remainder = gen_reg_rtx (compute_mode);
4817 if (! expand_twoval_binop ((unsignedp
4818 ? udivmod_optab
4819 : sdivmod_optab),
4820 op0, op1,
4821 NULL_RTX, remainder, unsignedp))
4822 remainder = 0;
4825 if (remainder)
4826 return gen_lowpart (mode, remainder);
4829 /* Produce the quotient. Try a quotient insn, but not a library call.
4830 If we have a divmod in this mode, use it in preference to widening
4831 the div (for this test we assume it will not fail). Note that optab2
4832 is set to the one of the two optabs that the call below will use. */
4833 quotient
4834 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4835 op0, op1, rem_flag ? NULL_RTX : target,
4836 unsignedp,
4837 ((optab_handler (optab2, compute_mode)
4838 != CODE_FOR_nothing)
4839 ? OPTAB_DIRECT : OPTAB_WIDEN));
4841 if (quotient == 0)
4843 /* No luck there. Try a quotient-and-remainder insn,
4844 keeping the quotient alone. */
4845 quotient = gen_reg_rtx (compute_mode);
4846 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4847 op0, op1,
4848 quotient, NULL_RTX, unsignedp))
4850 quotient = 0;
4851 if (! rem_flag)
4852 /* Still no luck. If we are not computing the remainder,
4853 use a library call for the quotient. */
4854 quotient = sign_expand_binop (compute_mode,
4855 udiv_optab, sdiv_optab,
4856 op0, op1, target,
4857 unsignedp, OPTAB_LIB_WIDEN);
4862 if (rem_flag)
4864 if (target && GET_MODE (target) != compute_mode)
4865 target = 0;
4867 if (quotient == 0)
4869 /* No divide instruction either. Use library for remainder. */
4870 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4871 op0, op1, target,
4872 unsignedp, OPTAB_LIB_WIDEN);
4873 /* No remainder function. Try a quotient-and-remainder
4874 function, keeping the remainder. */
4875 if (!remainder)
4877 remainder = gen_reg_rtx (compute_mode);
4878 if (!expand_twoval_binop_libfunc
4879 (unsignedp ? udivmod_optab : sdivmod_optab,
4880 op0, op1,
4881 NULL_RTX, remainder,
4882 unsignedp ? UMOD : MOD))
4883 remainder = NULL_RTX;
4886 else
4888 /* We divided. Now finish doing X - Y * (X / Y). */
4889 remainder = expand_mult (compute_mode, quotient, op1,
4890 NULL_RTX, unsignedp);
4891 remainder = expand_binop (compute_mode, sub_optab, op0,
4892 remainder, target, unsignedp,
4893 OPTAB_LIB_WIDEN);
4897 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4900 /* Return a tree node with data type TYPE, describing the value of X.
4901 Usually this is an VAR_DECL, if there is no obvious better choice.
4902 X may be an expression, however we only support those expressions
4903 generated by loop.c. */
4905 tree
4906 make_tree (tree type, rtx x)
4908 tree t;
4910 switch (GET_CODE (x))
4912 case CONST_INT:
4914 HOST_WIDE_INT hi = 0;
4916 if (INTVAL (x) < 0
4917 && !(TYPE_UNSIGNED (type)
4918 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4919 < HOST_BITS_PER_WIDE_INT)))
4920 hi = -1;
4922 t = build_int_cst_wide (type, INTVAL (x), hi);
4924 return t;
4927 case CONST_DOUBLE:
4928 if (GET_MODE (x) == VOIDmode)
4929 t = build_int_cst_wide (type,
4930 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4931 else
4933 REAL_VALUE_TYPE d;
4935 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4936 t = build_real (type, d);
4939 return t;
4941 case CONST_VECTOR:
4943 int units = CONST_VECTOR_NUNITS (x);
4944 tree itype = TREE_TYPE (type);
4945 tree *elts;
4946 int i;
4948 /* Build a tree with vector elements. */
4949 elts = XALLOCAVEC (tree, units);
4950 for (i = units - 1; i >= 0; --i)
4952 rtx elt = CONST_VECTOR_ELT (x, i);
4953 elts[i] = make_tree (itype, elt);
4956 return build_vector (type, elts);
4959 case PLUS:
4960 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4961 make_tree (type, XEXP (x, 1)));
4963 case MINUS:
4964 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4965 make_tree (type, XEXP (x, 1)));
4967 case NEG:
4968 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4970 case MULT:
4971 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4972 make_tree (type, XEXP (x, 1)));
4974 case ASHIFT:
4975 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4976 make_tree (type, XEXP (x, 1)));
4978 case LSHIFTRT:
4979 t = unsigned_type_for (type);
4980 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4981 make_tree (t, XEXP (x, 0)),
4982 make_tree (type, XEXP (x, 1))));
4984 case ASHIFTRT:
4985 t = signed_type_for (type);
4986 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4987 make_tree (t, XEXP (x, 0)),
4988 make_tree (type, XEXP (x, 1))));
4990 case DIV:
4991 if (TREE_CODE (type) != REAL_TYPE)
4992 t = signed_type_for (type);
4993 else
4994 t = type;
4996 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4997 make_tree (t, XEXP (x, 0)),
4998 make_tree (t, XEXP (x, 1))));
4999 case UDIV:
5000 t = unsigned_type_for (type);
5001 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5002 make_tree (t, XEXP (x, 0)),
5003 make_tree (t, XEXP (x, 1))));
5005 case SIGN_EXTEND:
5006 case ZERO_EXTEND:
5007 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5008 GET_CODE (x) == ZERO_EXTEND);
5009 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5011 case CONST:
5012 return make_tree (type, XEXP (x, 0));
5014 case SYMBOL_REF:
5015 t = SYMBOL_REF_DECL (x);
5016 if (t)
5017 return fold_convert (type, build_fold_addr_expr (t));
5018 /* else fall through. */
5020 default:
5021 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5023 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5024 address mode to pointer mode. */
5025 if (POINTER_TYPE_P (type))
5026 x = convert_memory_address_addr_space
5027 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5029 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5030 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5031 t->decl_with_rtl.rtl = x;
5033 return t;
5037 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5038 and returning TARGET.
5040 If TARGET is 0, a pseudo-register or constant is returned. */
5043 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5045 rtx tem = 0;
5047 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5048 tem = simplify_binary_operation (AND, mode, op0, op1);
5049 if (tem == 0)
5050 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5052 if (target == 0)
5053 target = tem;
5054 else if (tem != target)
5055 emit_move_insn (target, tem);
5056 return target;
5059 /* Helper function for emit_store_flag. */
5060 static rtx
5061 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5062 enum machine_mode mode, enum machine_mode compare_mode,
5063 int unsignedp, rtx x, rtx y, int normalizep,
5064 enum machine_mode target_mode)
5066 struct expand_operand ops[4];
5067 rtx op0, last, comparison, subtarget;
5068 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5070 last = get_last_insn ();
5071 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5072 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5073 if (!x || !y)
5075 delete_insns_since (last);
5076 return NULL_RTX;
5079 if (target_mode == VOIDmode)
5080 target_mode = result_mode;
5081 if (!target)
5082 target = gen_reg_rtx (target_mode);
5084 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5086 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5087 create_fixed_operand (&ops[1], comparison);
5088 create_fixed_operand (&ops[2], x);
5089 create_fixed_operand (&ops[3], y);
5090 if (!maybe_expand_insn (icode, 4, ops))
5092 delete_insns_since (last);
5093 return NULL_RTX;
5095 subtarget = ops[0].value;
5097 /* If we are converting to a wider mode, first convert to
5098 TARGET_MODE, then normalize. This produces better combining
5099 opportunities on machines that have a SIGN_EXTRACT when we are
5100 testing a single bit. This mostly benefits the 68k.
5102 If STORE_FLAG_VALUE does not have the sign bit set when
5103 interpreted in MODE, we can do this conversion as unsigned, which
5104 is usually more efficient. */
5105 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5107 convert_move (target, subtarget,
5108 val_signbit_known_clear_p (result_mode,
5109 STORE_FLAG_VALUE));
5110 op0 = target;
5111 result_mode = target_mode;
5113 else
5114 op0 = subtarget;
5116 /* If we want to keep subexpressions around, don't reuse our last
5117 target. */
5118 if (optimize)
5119 subtarget = 0;
5121 /* Now normalize to the proper value in MODE. Sometimes we don't
5122 have to do anything. */
5123 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5125 /* STORE_FLAG_VALUE might be the most negative number, so write
5126 the comparison this way to avoid a compiler-time warning. */
5127 else if (- normalizep == STORE_FLAG_VALUE)
5128 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5130 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5131 it hard to use a value of just the sign bit due to ANSI integer
5132 constant typing rules. */
5133 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5134 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5135 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5136 normalizep == 1);
5137 else
5139 gcc_assert (STORE_FLAG_VALUE & 1);
5141 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5142 if (normalizep == -1)
5143 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5146 /* If we were converting to a smaller mode, do the conversion now. */
5147 if (target_mode != result_mode)
5149 convert_move (target, op0, 0);
5150 return target;
5152 else
5153 return op0;
5157 /* A subroutine of emit_store_flag only including "tricks" that do not
5158 need a recursive call. These are kept separate to avoid infinite
5159 loops. */
5161 static rtx
5162 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5163 enum machine_mode mode, int unsignedp, int normalizep,
5164 enum machine_mode target_mode)
5166 rtx subtarget;
5167 enum insn_code icode;
5168 enum machine_mode compare_mode;
5169 enum mode_class mclass;
5170 enum rtx_code scode;
5171 rtx tem;
5173 if (unsignedp)
5174 code = unsigned_condition (code);
5175 scode = swap_condition (code);
5177 /* If one operand is constant, make it the second one. Only do this
5178 if the other operand is not constant as well. */
5180 if (swap_commutative_operands_p (op0, op1))
5182 tem = op0;
5183 op0 = op1;
5184 op1 = tem;
5185 code = swap_condition (code);
5188 if (mode == VOIDmode)
5189 mode = GET_MODE (op0);
5191 /* For some comparisons with 1 and -1, we can convert this to
5192 comparisons with zero. This will often produce more opportunities for
5193 store-flag insns. */
5195 switch (code)
5197 case LT:
5198 if (op1 == const1_rtx)
5199 op1 = const0_rtx, code = LE;
5200 break;
5201 case LE:
5202 if (op1 == constm1_rtx)
5203 op1 = const0_rtx, code = LT;
5204 break;
5205 case GE:
5206 if (op1 == const1_rtx)
5207 op1 = const0_rtx, code = GT;
5208 break;
5209 case GT:
5210 if (op1 == constm1_rtx)
5211 op1 = const0_rtx, code = GE;
5212 break;
5213 case GEU:
5214 if (op1 == const1_rtx)
5215 op1 = const0_rtx, code = NE;
5216 break;
5217 case LTU:
5218 if (op1 == const1_rtx)
5219 op1 = const0_rtx, code = EQ;
5220 break;
5221 default:
5222 break;
5225 /* If we are comparing a double-word integer with zero or -1, we can
5226 convert the comparison into one involving a single word. */
5227 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5228 && GET_MODE_CLASS (mode) == MODE_INT
5229 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5231 if ((code == EQ || code == NE)
5232 && (op1 == const0_rtx || op1 == constm1_rtx))
5234 rtx op00, op01;
5236 /* Do a logical OR or AND of the two words and compare the
5237 result. */
5238 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5239 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5240 tem = expand_binop (word_mode,
5241 op1 == const0_rtx ? ior_optab : and_optab,
5242 op00, op01, NULL_RTX, unsignedp,
5243 OPTAB_DIRECT);
5245 if (tem != 0)
5246 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5247 unsignedp, normalizep);
5249 else if ((code == LT || code == GE) && op1 == const0_rtx)
5251 rtx op0h;
5253 /* If testing the sign bit, can just test on high word. */
5254 op0h = simplify_gen_subreg (word_mode, op0, mode,
5255 subreg_highpart_offset (word_mode,
5256 mode));
5257 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5258 unsignedp, normalizep);
5260 else
5261 tem = NULL_RTX;
5263 if (tem)
5265 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5266 return tem;
5267 if (!target)
5268 target = gen_reg_rtx (target_mode);
5270 convert_move (target, tem,
5271 !val_signbit_known_set_p (word_mode,
5272 (normalizep ? normalizep
5273 : STORE_FLAG_VALUE)));
5274 return target;
5278 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5279 complement of A (for GE) and shifting the sign bit to the low bit. */
5280 if (op1 == const0_rtx && (code == LT || code == GE)
5281 && GET_MODE_CLASS (mode) == MODE_INT
5282 && (normalizep || STORE_FLAG_VALUE == 1
5283 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5285 subtarget = target;
5287 if (!target)
5288 target_mode = mode;
5290 /* If the result is to be wider than OP0, it is best to convert it
5291 first. If it is to be narrower, it is *incorrect* to convert it
5292 first. */
5293 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5295 op0 = convert_modes (target_mode, mode, op0, 0);
5296 mode = target_mode;
5299 if (target_mode != mode)
5300 subtarget = 0;
5302 if (code == GE)
5303 op0 = expand_unop (mode, one_cmpl_optab, op0,
5304 ((STORE_FLAG_VALUE == 1 || normalizep)
5305 ? 0 : subtarget), 0);
5307 if (STORE_FLAG_VALUE == 1 || normalizep)
5308 /* If we are supposed to produce a 0/1 value, we want to do
5309 a logical shift from the sign bit to the low-order bit; for
5310 a -1/0 value, we do an arithmetic shift. */
5311 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5312 GET_MODE_BITSIZE (mode) - 1,
5313 subtarget, normalizep != -1);
5315 if (mode != target_mode)
5316 op0 = convert_modes (target_mode, mode, op0, 0);
5318 return op0;
5321 mclass = GET_MODE_CLASS (mode);
5322 for (compare_mode = mode; compare_mode != VOIDmode;
5323 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5325 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5326 icode = optab_handler (cstore_optab, optab_mode);
5327 if (icode != CODE_FOR_nothing)
5329 do_pending_stack_adjust ();
5330 tem = emit_cstore (target, icode, code, mode, compare_mode,
5331 unsignedp, op0, op1, normalizep, target_mode);
5332 if (tem)
5333 return tem;
5335 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5337 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5338 unsignedp, op1, op0, normalizep, target_mode);
5339 if (tem)
5340 return tem;
5342 break;
5346 return 0;
5349 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5350 and storing in TARGET. Normally return TARGET.
5351 Return 0 if that cannot be done.
5353 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5354 it is VOIDmode, they cannot both be CONST_INT.
5356 UNSIGNEDP is for the case where we have to widen the operands
5357 to perform the operation. It says to use zero-extension.
5359 NORMALIZEP is 1 if we should convert the result to be either zero
5360 or one. Normalize is -1 if we should convert the result to be
5361 either zero or -1. If NORMALIZEP is zero, the result will be left
5362 "raw" out of the scc insn. */
5365 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5366 enum machine_mode mode, int unsignedp, int normalizep)
5368 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5369 enum rtx_code rcode;
5370 rtx subtarget;
5371 rtx tem, last, trueval;
5373 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5374 target_mode);
5375 if (tem)
5376 return tem;
5378 /* If we reached here, we can't do this with a scc insn, however there
5379 are some comparisons that can be done in other ways. Don't do any
5380 of these cases if branches are very cheap. */
5381 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5382 return 0;
5384 /* See what we need to return. We can only return a 1, -1, or the
5385 sign bit. */
5387 if (normalizep == 0)
5389 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5390 normalizep = STORE_FLAG_VALUE;
5392 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5394 else
5395 return 0;
5398 last = get_last_insn ();
5400 /* If optimizing, use different pseudo registers for each insn, instead
5401 of reusing the same pseudo. This leads to better CSE, but slows
5402 down the compiler, since there are more pseudos */
5403 subtarget = (!optimize
5404 && (target_mode == mode)) ? target : NULL_RTX;
5405 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5407 /* For floating-point comparisons, try the reverse comparison or try
5408 changing the "orderedness" of the comparison. */
5409 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5411 enum rtx_code first_code;
5412 bool and_them;
5414 rcode = reverse_condition_maybe_unordered (code);
5415 if (can_compare_p (rcode, mode, ccp_store_flag)
5416 && (code == ORDERED || code == UNORDERED
5417 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5418 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5420 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5421 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5423 /* For the reverse comparison, use either an addition or a XOR. */
5424 if (want_add
5425 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5426 optimize_insn_for_speed_p ()) == 0)
5428 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5429 STORE_FLAG_VALUE, target_mode);
5430 if (tem)
5431 return expand_binop (target_mode, add_optab, tem,
5432 GEN_INT (normalizep),
5433 target, 0, OPTAB_WIDEN);
5435 else if (!want_add
5436 && rtx_cost (trueval, XOR, 1,
5437 optimize_insn_for_speed_p ()) == 0)
5439 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5440 normalizep, target_mode);
5441 if (tem)
5442 return expand_binop (target_mode, xor_optab, tem, trueval,
5443 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5447 delete_insns_since (last);
5449 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5450 if (code == ORDERED || code == UNORDERED)
5451 return 0;
5453 and_them = split_comparison (code, mode, &first_code, &code);
5455 /* If there are no NaNs, the first comparison should always fall through.
5456 Effectively change the comparison to the other one. */
5457 if (!HONOR_NANS (mode))
5459 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5460 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5461 target_mode);
5464 #ifdef HAVE_conditional_move
5465 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5466 conditional move. */
5467 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5468 normalizep, target_mode);
5469 if (tem == 0)
5470 return 0;
5472 if (and_them)
5473 tem = emit_conditional_move (target, code, op0, op1, mode,
5474 tem, const0_rtx, GET_MODE (tem), 0);
5475 else
5476 tem = emit_conditional_move (target, code, op0, op1, mode,
5477 trueval, tem, GET_MODE (tem), 0);
5479 if (tem == 0)
5480 delete_insns_since (last);
5481 return tem;
5482 #else
5483 return 0;
5484 #endif
5487 /* The remaining tricks only apply to integer comparisons. */
5489 if (GET_MODE_CLASS (mode) != MODE_INT)
5490 return 0;
5492 /* If this is an equality comparison of integers, we can try to exclusive-or
5493 (or subtract) the two operands and use a recursive call to try the
5494 comparison with zero. Don't do any of these cases if branches are
5495 very cheap. */
5497 if ((code == EQ || code == NE) && op1 != const0_rtx)
5499 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5500 OPTAB_WIDEN);
5502 if (tem == 0)
5503 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5504 OPTAB_WIDEN);
5505 if (tem != 0)
5506 tem = emit_store_flag (target, code, tem, const0_rtx,
5507 mode, unsignedp, normalizep);
5508 if (tem != 0)
5509 return tem;
5511 delete_insns_since (last);
5514 /* For integer comparisons, try the reverse comparison. However, for
5515 small X and if we'd have anyway to extend, implementing "X != 0"
5516 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5517 rcode = reverse_condition (code);
5518 if (can_compare_p (rcode, mode, ccp_store_flag)
5519 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5520 && code == NE
5521 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5522 && op1 == const0_rtx))
5524 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5525 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5527 /* Again, for the reverse comparison, use either an addition or a XOR. */
5528 if (want_add
5529 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5530 optimize_insn_for_speed_p ()) == 0)
5532 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5533 STORE_FLAG_VALUE, target_mode);
5534 if (tem != 0)
5535 tem = expand_binop (target_mode, add_optab, tem,
5536 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5538 else if (!want_add
5539 && rtx_cost (trueval, XOR, 1,
5540 optimize_insn_for_speed_p ()) == 0)
5542 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5543 normalizep, target_mode);
5544 if (tem != 0)
5545 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5546 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5549 if (tem != 0)
5550 return tem;
5551 delete_insns_since (last);
5554 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5555 the constant zero. Reject all other comparisons at this point. Only
5556 do LE and GT if branches are expensive since they are expensive on
5557 2-operand machines. */
5559 if (op1 != const0_rtx
5560 || (code != EQ && code != NE
5561 && (BRANCH_COST (optimize_insn_for_speed_p (),
5562 false) <= 1 || (code != LE && code != GT))))
5563 return 0;
5565 /* Try to put the result of the comparison in the sign bit. Assume we can't
5566 do the necessary operation below. */
5568 tem = 0;
5570 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5571 the sign bit set. */
5573 if (code == LE)
5575 /* This is destructive, so SUBTARGET can't be OP0. */
5576 if (rtx_equal_p (subtarget, op0))
5577 subtarget = 0;
5579 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5580 OPTAB_WIDEN);
5581 if (tem)
5582 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5583 OPTAB_WIDEN);
5586 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5587 number of bits in the mode of OP0, minus one. */
5589 if (code == GT)
5591 if (rtx_equal_p (subtarget, op0))
5592 subtarget = 0;
5594 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5595 GET_MODE_BITSIZE (mode) - 1,
5596 subtarget, 0);
5597 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5598 OPTAB_WIDEN);
5601 if (code == EQ || code == NE)
5603 /* For EQ or NE, one way to do the comparison is to apply an operation
5604 that converts the operand into a positive number if it is nonzero
5605 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5606 for NE we negate. This puts the result in the sign bit. Then we
5607 normalize with a shift, if needed.
5609 Two operations that can do the above actions are ABS and FFS, so try
5610 them. If that doesn't work, and MODE is smaller than a full word,
5611 we can use zero-extension to the wider mode (an unsigned conversion)
5612 as the operation. */
5614 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5615 that is compensated by the subsequent overflow when subtracting
5616 one / negating. */
5618 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5619 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5620 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5621 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5622 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5624 tem = convert_modes (word_mode, mode, op0, 1);
5625 mode = word_mode;
5628 if (tem != 0)
5630 if (code == EQ)
5631 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5632 0, OPTAB_WIDEN);
5633 else
5634 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5637 /* If we couldn't do it that way, for NE we can "or" the two's complement
5638 of the value with itself. For EQ, we take the one's complement of
5639 that "or", which is an extra insn, so we only handle EQ if branches
5640 are expensive. */
5642 if (tem == 0
5643 && (code == NE
5644 || BRANCH_COST (optimize_insn_for_speed_p (),
5645 false) > 1))
5647 if (rtx_equal_p (subtarget, op0))
5648 subtarget = 0;
5650 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5651 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5652 OPTAB_WIDEN);
5654 if (tem && code == EQ)
5655 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5659 if (tem && normalizep)
5660 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5661 GET_MODE_BITSIZE (mode) - 1,
5662 subtarget, normalizep == 1);
5664 if (tem)
5666 if (!target)
5668 else if (GET_MODE (tem) != target_mode)
5670 convert_move (target, tem, 0);
5671 tem = target;
5673 else if (!subtarget)
5675 emit_move_insn (target, tem);
5676 tem = target;
5679 else
5680 delete_insns_since (last);
5682 return tem;
5685 /* Like emit_store_flag, but always succeeds. */
5688 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5689 enum machine_mode mode, int unsignedp, int normalizep)
5691 rtx tem, label;
5692 rtx trueval, falseval;
5694 /* First see if emit_store_flag can do the job. */
5695 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5696 if (tem != 0)
5697 return tem;
5699 if (!target)
5700 target = gen_reg_rtx (word_mode);
5702 /* If this failed, we have to do this with set/compare/jump/set code.
5703 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5704 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5705 if (code == NE
5706 && GET_MODE_CLASS (mode) == MODE_INT
5707 && REG_P (target)
5708 && op0 == target
5709 && op1 == const0_rtx)
5711 label = gen_label_rtx ();
5712 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5713 mode, NULL_RTX, NULL_RTX, label, -1);
5714 emit_move_insn (target, trueval);
5715 emit_label (label);
5716 return target;
5719 if (!REG_P (target)
5720 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5721 target = gen_reg_rtx (GET_MODE (target));
5723 /* Jump in the right direction if the target cannot implement CODE
5724 but can jump on its reverse condition. */
5725 falseval = const0_rtx;
5726 if (! can_compare_p (code, mode, ccp_jump)
5727 && (! FLOAT_MODE_P (mode)
5728 || code == ORDERED || code == UNORDERED
5729 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5730 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5732 enum rtx_code rcode;
5733 if (FLOAT_MODE_P (mode))
5734 rcode = reverse_condition_maybe_unordered (code);
5735 else
5736 rcode = reverse_condition (code);
5738 /* Canonicalize to UNORDERED for the libcall. */
5739 if (can_compare_p (rcode, mode, ccp_jump)
5740 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5742 falseval = trueval;
5743 trueval = const0_rtx;
5744 code = rcode;
5748 emit_move_insn (target, trueval);
5749 label = gen_label_rtx ();
5750 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5751 NULL_RTX, label, -1);
5753 emit_move_insn (target, falseval);
5754 emit_label (label);
5756 return target;
5759 /* Perform possibly multi-word comparison and conditional jump to LABEL
5760 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5761 now a thin wrapper around do_compare_rtx_and_jump. */
5763 static void
5764 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5765 rtx label)
5767 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5768 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5769 NULL_RTX, NULL_RTX, label, -1);