2012-11-04 Janus Weil <janus@gcc.gnu.org>
[official-gcc.git] / gcc / expmed.c
blob5b697a1cd2d765d1b6832e901973533711adaca4
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011, 2012
6 Free Software Foundation, Inc.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 rtx);
53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 rtx);
58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
59 unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT, rtx, int, bool);
61 static rtx mask_rtx (enum machine_mode, int, int, int);
62 static rtx lshift_value (enum machine_mode, rtx, int, int);
63 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT, int);
65 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
66 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
67 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 /* Test whether a value is zero of a power of two. */
70 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
72 /* Reduce conditional compilation elsewhere. */
73 #ifndef HAVE_insv
74 #define HAVE_insv 0
75 #define CODE_FOR_insv CODE_FOR_nothing
76 #define gen_insv(a,b,c,d) NULL_RTX
77 #endif
78 #ifndef HAVE_extv
79 #define HAVE_extv 0
80 #define CODE_FOR_extv CODE_FOR_nothing
81 #define gen_extv(a,b,c,d) NULL_RTX
82 #endif
83 #ifndef HAVE_extzv
84 #define HAVE_extzv 0
85 #define CODE_FOR_extzv CODE_FOR_nothing
86 #define gen_extzv(a,b,c,d) NULL_RTX
87 #endif
89 struct init_expmed_rtl
91 struct rtx_def reg; rtunion reg_fld[2];
92 struct rtx_def plus; rtunion plus_fld1;
93 struct rtx_def neg;
94 struct rtx_def mult; rtunion mult_fld1;
95 struct rtx_def sdiv; rtunion sdiv_fld1;
96 struct rtx_def udiv; rtunion udiv_fld1;
97 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
98 struct rtx_def smod_32; rtunion smod_32_fld1;
99 struct rtx_def wide_mult; rtunion wide_mult_fld1;
100 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
101 struct rtx_def wide_trunc;
102 struct rtx_def shift; rtunion shift_fld1;
103 struct rtx_def shift_mult; rtunion shift_mult_fld1;
104 struct rtx_def shift_add; rtunion shift_add_fld1;
105 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
106 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
107 struct rtx_def zext;
108 struct rtx_def trunc;
110 rtx pow2[MAX_BITS_PER_WORD];
111 rtx cint[MAX_BITS_PER_WORD];
114 static void
115 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
116 enum machine_mode from_mode, bool speed)
118 int to_size, from_size;
119 rtx which;
121 /* We're given no information about the true size of a partial integer,
122 only the size of the "full" integer it requires for storage. For
123 comparison purposes here, reduce the bit size by one in that case. */
124 to_size = (GET_MODE_BITSIZE (to_mode)
125 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
126 from_size = (GET_MODE_BITSIZE (from_mode)
127 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
129 /* Assume cost of zero-extend and sign-extend is the same. */
130 which = (to_size < from_size ? &all->trunc : &all->zext);
132 PUT_MODE (&all->reg, from_mode);
133 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
136 static void
137 init_expmed_one_mode (struct init_expmed_rtl *all,
138 enum machine_mode mode, int speed)
140 int m, n, mode_bitsize;
141 enum machine_mode mode_from;
143 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
145 PUT_MODE (&all->reg, mode);
146 PUT_MODE (&all->plus, mode);
147 PUT_MODE (&all->neg, mode);
148 PUT_MODE (&all->mult, mode);
149 PUT_MODE (&all->sdiv, mode);
150 PUT_MODE (&all->udiv, mode);
151 PUT_MODE (&all->sdiv_32, mode);
152 PUT_MODE (&all->smod_32, mode);
153 PUT_MODE (&all->wide_trunc, mode);
154 PUT_MODE (&all->shift, mode);
155 PUT_MODE (&all->shift_mult, mode);
156 PUT_MODE (&all->shift_add, mode);
157 PUT_MODE (&all->shift_sub0, mode);
158 PUT_MODE (&all->shift_sub1, mode);
159 PUT_MODE (&all->zext, mode);
160 PUT_MODE (&all->trunc, mode);
162 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
163 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
164 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
165 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
166 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
168 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
169 <= 2 * add_cost (speed, mode)));
170 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
171 <= 4 * add_cost (speed, mode)));
173 set_shift_cost (speed, mode, 0, 0);
175 int cost = add_cost (speed, mode);
176 set_shiftadd_cost (speed, mode, 0, cost);
177 set_shiftsub0_cost (speed, mode, 0, cost);
178 set_shiftsub1_cost (speed, mode, 0, cost);
181 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
182 for (m = 1; m < n; m++)
184 XEXP (&all->shift, 1) = all->cint[m];
185 XEXP (&all->shift_mult, 1) = all->pow2[m];
187 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
188 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
189 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
190 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
193 if (SCALAR_INT_MODE_P (mode))
195 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
196 mode_from = (enum machine_mode)(mode_from + 1))
197 init_expmed_one_conv (all, mode, mode_from, speed);
199 if (GET_MODE_CLASS (mode) == MODE_INT)
201 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
202 if (wider_mode != VOIDmode)
204 PUT_MODE (&all->zext, wider_mode);
205 PUT_MODE (&all->wide_mult, wider_mode);
206 PUT_MODE (&all->wide_lshr, wider_mode);
207 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
209 set_mul_widen_cost (speed, wider_mode,
210 set_src_cost (&all->wide_mult, speed));
211 set_mul_highpart_cost (speed, mode,
212 set_src_cost (&all->wide_trunc, speed));
217 void
218 init_expmed (void)
220 struct init_expmed_rtl all;
221 enum machine_mode mode;
222 int m, speed;
224 memset (&all, 0, sizeof all);
225 for (m = 1; m < MAX_BITS_PER_WORD; m++)
227 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
228 all.cint[m] = GEN_INT (m);
231 PUT_CODE (&all.reg, REG);
232 /* Avoid using hard regs in ways which may be unsupported. */
233 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
235 PUT_CODE (&all.plus, PLUS);
236 XEXP (&all.plus, 0) = &all.reg;
237 XEXP (&all.plus, 1) = &all.reg;
239 PUT_CODE (&all.neg, NEG);
240 XEXP (&all.neg, 0) = &all.reg;
242 PUT_CODE (&all.mult, MULT);
243 XEXP (&all.mult, 0) = &all.reg;
244 XEXP (&all.mult, 1) = &all.reg;
246 PUT_CODE (&all.sdiv, DIV);
247 XEXP (&all.sdiv, 0) = &all.reg;
248 XEXP (&all.sdiv, 1) = &all.reg;
250 PUT_CODE (&all.udiv, UDIV);
251 XEXP (&all.udiv, 0) = &all.reg;
252 XEXP (&all.udiv, 1) = &all.reg;
254 PUT_CODE (&all.sdiv_32, DIV);
255 XEXP (&all.sdiv_32, 0) = &all.reg;
256 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
258 PUT_CODE (&all.smod_32, MOD);
259 XEXP (&all.smod_32, 0) = &all.reg;
260 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
262 PUT_CODE (&all.zext, ZERO_EXTEND);
263 XEXP (&all.zext, 0) = &all.reg;
265 PUT_CODE (&all.wide_mult, MULT);
266 XEXP (&all.wide_mult, 0) = &all.zext;
267 XEXP (&all.wide_mult, 1) = &all.zext;
269 PUT_CODE (&all.wide_lshr, LSHIFTRT);
270 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
272 PUT_CODE (&all.wide_trunc, TRUNCATE);
273 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
275 PUT_CODE (&all.shift, ASHIFT);
276 XEXP (&all.shift, 0) = &all.reg;
278 PUT_CODE (&all.shift_mult, MULT);
279 XEXP (&all.shift_mult, 0) = &all.reg;
281 PUT_CODE (&all.shift_add, PLUS);
282 XEXP (&all.shift_add, 0) = &all.shift_mult;
283 XEXP (&all.shift_add, 1) = &all.reg;
285 PUT_CODE (&all.shift_sub0, MINUS);
286 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
287 XEXP (&all.shift_sub0, 1) = &all.reg;
289 PUT_CODE (&all.shift_sub1, MINUS);
290 XEXP (&all.shift_sub1, 0) = &all.reg;
291 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
293 PUT_CODE (&all.trunc, TRUNCATE);
294 XEXP (&all.trunc, 0) = &all.reg;
296 for (speed = 0; speed < 2; speed++)
298 crtl->maybe_hot_insn_p = speed;
299 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
301 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
302 mode = (enum machine_mode)(mode + 1))
303 init_expmed_one_mode (&all, mode, speed);
305 if (MIN_MODE_PARTIAL_INT != VOIDmode)
306 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
307 mode = (enum machine_mode)(mode + 1))
308 init_expmed_one_mode (&all, mode, speed);
310 if (MIN_MODE_VECTOR_INT != VOIDmode)
311 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
312 mode = (enum machine_mode)(mode + 1))
313 init_expmed_one_mode (&all, mode, speed);
316 if (alg_hash_used_p ())
318 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
319 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
321 else
322 set_alg_hash_used_p (true);
323 default_rtl_profile ();
326 /* Return an rtx representing minus the value of X.
327 MODE is the intended mode of the result,
328 useful if X is a CONST_INT. */
331 negate_rtx (enum machine_mode mode, rtx x)
333 rtx result = simplify_unary_operation (NEG, mode, x, mode);
335 if (result == 0)
336 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
338 return result;
341 /* Report on the availability of insv/extv/extzv and the desired mode
342 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
343 is false; else the mode of the specified operand. If OPNO is -1,
344 all the caller cares about is whether the insn is available. */
345 enum machine_mode
346 mode_for_extraction (enum extraction_pattern pattern, int opno)
348 const struct insn_data_d *data;
350 switch (pattern)
352 case EP_insv:
353 if (HAVE_insv)
355 data = &insn_data[CODE_FOR_insv];
356 break;
358 return MAX_MACHINE_MODE;
360 case EP_extv:
361 if (HAVE_extv)
363 data = &insn_data[CODE_FOR_extv];
364 break;
366 return MAX_MACHINE_MODE;
368 case EP_extzv:
369 if (HAVE_extzv)
371 data = &insn_data[CODE_FOR_extzv];
372 break;
374 return MAX_MACHINE_MODE;
376 default:
377 gcc_unreachable ();
380 if (opno == -1)
381 return VOIDmode;
383 /* Everyone who uses this function used to follow it with
384 if (result == VOIDmode) result = word_mode; */
385 if (data->operand[opno].mode == VOIDmode)
386 return word_mode;
387 return data->operand[opno].mode;
390 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
391 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
392 offset is then BITNUM / BITS_PER_UNIT. */
394 static bool
395 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
396 unsigned HOST_WIDE_INT bitsize,
397 enum machine_mode struct_mode)
399 if (BYTES_BIG_ENDIAN)
400 return (bitnum % BITS_PER_UNIT == 0
401 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
402 || (bitnum + bitsize) % BITS_PER_WORD == 0));
403 else
404 return bitnum % BITS_PER_WORD == 0;
407 /* Try to use an insv pattern to store VALUE into a field of OP0.
408 OP_MODE is the mode of the insertion and BITSIZE and BITNUM are
409 as for store_bit_field. */
411 static bool
412 store_bit_field_using_insv (rtx op0, unsigned HOST_WIDE_INT bitsize,
413 unsigned HOST_WIDE_INT bitnum, rtx value,
414 enum machine_mode op_mode)
416 struct expand_operand ops[4];
417 rtx value1;
418 rtx xop0 = op0;
419 rtx last = get_last_insn ();
420 bool copy_back = false;
422 unsigned int unit = GET_MODE_BITSIZE (op_mode);
423 if (bitsize == 0 || bitsize > unit)
424 return false;
426 if (MEM_P (xop0))
428 /* Get a reference to the first byte of the field. */
429 xop0 = adjust_bitfield_address (xop0, byte_mode, bitnum / BITS_PER_UNIT);
430 bitnum %= BITS_PER_UNIT;
432 else
434 /* Convert from counting within OP0 to counting in OP_MODE. */
435 if (BYTES_BIG_ENDIAN)
436 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
438 /* If xop0 is a register, we need it in OP_MODE
439 to make it acceptable to the format of insv. */
440 if (GET_CODE (xop0) == SUBREG)
441 /* We can't just change the mode, because this might clobber op0,
442 and we will need the original value of op0 if insv fails. */
443 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
444 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
445 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
448 /* If the destination is a paradoxical subreg such that we need a
449 truncate to the inner mode, perform the insertion on a temporary and
450 truncate the result to the original destination. Note that we can't
451 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
452 X) 0)) is (reg:N X). */
453 if (GET_CODE (xop0) == SUBREG
454 && REG_P (SUBREG_REG (xop0))
455 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
456 op_mode))
458 rtx tem = gen_reg_rtx (op_mode);
459 emit_move_insn (tem, xop0);
460 xop0 = tem;
461 copy_back = true;
464 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
465 "backwards" from the size of the unit we are inserting into.
466 Otherwise, we count bits from the most significant on a
467 BYTES/BITS_BIG_ENDIAN machine. */
469 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
470 bitnum = unit - bitsize - bitnum;
472 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
473 value1 = value;
474 if (GET_MODE (value) != op_mode)
476 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
478 /* Optimization: Don't bother really extending VALUE
479 if it has all the bits we will actually use. However,
480 if we must narrow it, be sure we do it correctly. */
482 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
484 rtx tmp;
486 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
487 if (! tmp)
488 tmp = simplify_gen_subreg (op_mode,
489 force_reg (GET_MODE (value),
490 value1),
491 GET_MODE (value), 0);
492 value1 = tmp;
494 else
495 value1 = gen_lowpart (op_mode, value1);
497 else if (CONST_INT_P (value))
498 value1 = gen_int_mode (INTVAL (value), op_mode);
499 else
500 /* Parse phase is supposed to make VALUE's data type
501 match that of the component reference, which is a type
502 at least as wide as the field; so VALUE should have
503 a mode that corresponds to that type. */
504 gcc_assert (CONSTANT_P (value));
507 create_fixed_operand (&ops[0], xop0);
508 create_integer_operand (&ops[1], bitsize);
509 create_integer_operand (&ops[2], bitnum);
510 create_input_operand (&ops[3], value1, op_mode);
511 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
513 if (copy_back)
514 convert_move (op0, xop0, true);
515 return true;
517 delete_insns_since (last);
518 return false;
521 /* A subroutine of store_bit_field, with the same arguments. Return true
522 if the operation could be implemented.
524 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
525 no other way of implementing the operation. If FALLBACK_P is false,
526 return false instead. */
528 static bool
529 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
530 unsigned HOST_WIDE_INT bitnum,
531 unsigned HOST_WIDE_INT bitregion_start,
532 unsigned HOST_WIDE_INT bitregion_end,
533 enum machine_mode fieldmode,
534 rtx value, bool fallback_p)
536 rtx op0 = str_rtx;
537 rtx orig_value;
539 while (GET_CODE (op0) == SUBREG)
541 /* The following line once was done only if WORDS_BIG_ENDIAN,
542 but I think that is a mistake. WORDS_BIG_ENDIAN is
543 meaningful at a much higher level; when structures are copied
544 between memory and regs, the higher-numbered regs
545 always get higher addresses. */
546 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
547 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
548 int byte_offset = 0;
550 /* Paradoxical subregs need special handling on big endian machines. */
551 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
553 int difference = inner_mode_size - outer_mode_size;
555 if (WORDS_BIG_ENDIAN)
556 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
557 if (BYTES_BIG_ENDIAN)
558 byte_offset += difference % UNITS_PER_WORD;
560 else
561 byte_offset = SUBREG_BYTE (op0);
563 bitnum += byte_offset * BITS_PER_UNIT;
564 op0 = SUBREG_REG (op0);
567 /* No action is needed if the target is a register and if the field
568 lies completely outside that register. This can occur if the source
569 code contains an out-of-bounds access to a small array. */
570 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
571 return true;
573 /* Use vec_set patterns for inserting parts of vectors whenever
574 available. */
575 if (VECTOR_MODE_P (GET_MODE (op0))
576 && !MEM_P (op0)
577 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
578 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
579 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
580 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
582 struct expand_operand ops[3];
583 enum machine_mode outermode = GET_MODE (op0);
584 enum machine_mode innermode = GET_MODE_INNER (outermode);
585 enum insn_code icode = optab_handler (vec_set_optab, outermode);
586 int pos = bitnum / GET_MODE_BITSIZE (innermode);
588 create_fixed_operand (&ops[0], op0);
589 create_input_operand (&ops[1], value, innermode);
590 create_integer_operand (&ops[2], pos);
591 if (maybe_expand_insn (icode, 3, ops))
592 return true;
595 /* If the target is a register, overwriting the entire object, or storing
596 a full-word or multi-word field can be done with just a SUBREG. */
597 if (!MEM_P (op0)
598 && bitsize == GET_MODE_BITSIZE (fieldmode)
599 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
600 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
602 /* Use the subreg machinery either to narrow OP0 to the required
603 words or to cope with mode punning between equal-sized modes. */
604 rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
605 bitnum / BITS_PER_UNIT);
606 if (sub)
608 emit_move_insn (sub, value);
609 return true;
613 /* If the target is memory, storing any naturally aligned field can be
614 done with a simple store. For targets that support fast unaligned
615 memory, any naturally sized, unit aligned field can be done directly. */
616 if (MEM_P (op0)
617 && bitnum % BITS_PER_UNIT == 0
618 && bitsize == GET_MODE_BITSIZE (fieldmode)
619 && (!SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
620 || (bitnum % bitsize == 0
621 && MEM_ALIGN (op0) % bitsize == 0)))
623 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
624 emit_move_insn (op0, value);
625 return true;
628 /* Make sure we are playing with integral modes. Pun with subregs
629 if we aren't. This must come after the entire register case above,
630 since that case is valid for any mode. The following cases are only
631 valid for integral modes. */
633 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
634 if (imode != GET_MODE (op0))
636 if (MEM_P (op0))
637 op0 = adjust_bitfield_address (op0, imode, 0);
638 else
640 gcc_assert (imode != BLKmode);
641 op0 = gen_lowpart (imode, op0);
646 /* Storing an lsb-aligned field in a register
647 can be done with a movstrict instruction. */
649 if (!MEM_P (op0)
650 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
651 && bitsize == GET_MODE_BITSIZE (fieldmode)
652 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
654 struct expand_operand ops[2];
655 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
656 rtx arg0 = op0;
657 unsigned HOST_WIDE_INT subreg_off;
659 if (GET_CODE (arg0) == SUBREG)
661 /* Else we've got some float mode source being extracted into
662 a different float mode destination -- this combination of
663 subregs results in Severe Tire Damage. */
664 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
665 || GET_MODE_CLASS (fieldmode) == MODE_INT
666 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
667 arg0 = SUBREG_REG (arg0);
670 subreg_off = bitnum / BITS_PER_UNIT;
671 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
673 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
675 create_fixed_operand (&ops[0], arg0);
676 /* Shrink the source operand to FIELDMODE. */
677 create_convert_operand_to (&ops[1], value, fieldmode, false);
678 if (maybe_expand_insn (icode, 2, ops))
679 return true;
683 /* Handle fields bigger than a word. */
685 if (bitsize > BITS_PER_WORD)
687 /* Here we transfer the words of the field
688 in the order least significant first.
689 This is because the most significant word is the one which may
690 be less than full.
691 However, only do that if the value is not BLKmode. */
693 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
694 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
695 unsigned int i;
696 rtx last;
698 /* This is the mode we must force value to, so that there will be enough
699 subwords to extract. Note that fieldmode will often (always?) be
700 VOIDmode, because that is what store_field uses to indicate that this
701 is a bit field, but passing VOIDmode to operand_subword_force
702 is not allowed. */
703 fieldmode = GET_MODE (value);
704 if (fieldmode == VOIDmode)
705 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
707 last = get_last_insn ();
708 for (i = 0; i < nwords; i++)
710 /* If I is 0, use the low-order word in both field and target;
711 if I is 1, use the next to lowest word; and so on. */
712 unsigned int wordnum = (backwards
713 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
714 - i - 1
715 : i);
716 unsigned int bit_offset = (backwards
717 ? MAX ((int) bitsize - ((int) i + 1)
718 * BITS_PER_WORD,
720 : (int) i * BITS_PER_WORD);
721 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
722 unsigned HOST_WIDE_INT new_bitsize =
723 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
725 /* If the remaining chunk doesn't have full wordsize we have
726 to make sure that for big endian machines the higher order
727 bits are used. */
728 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
729 value_word = simplify_expand_binop (word_mode, lshr_optab,
730 value_word,
731 GEN_INT (BITS_PER_WORD
732 - new_bitsize),
733 NULL_RTX, true,
734 OPTAB_LIB_WIDEN);
736 if (!store_bit_field_1 (op0, new_bitsize,
737 bitnum + bit_offset,
738 bitregion_start, bitregion_end,
739 word_mode,
740 value_word, fallback_p))
742 delete_insns_since (last);
743 return false;
746 return true;
749 /* If VALUE has a floating-point or complex mode, access it as an
750 integer of the corresponding size. This can occur on a machine
751 with 64 bit registers that uses SFmode for float. It can also
752 occur for unaligned float or complex fields. */
753 orig_value = value;
754 if (GET_MODE (value) != VOIDmode
755 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
756 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
758 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
759 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
762 /* If OP0 is a multi-word register, narrow it to the affected word.
763 If the region spans two words, defer to store_split_bit_field. */
764 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
766 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
767 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
768 gcc_assert (op0);
769 bitnum %= BITS_PER_WORD;
770 if (bitnum + bitsize > BITS_PER_WORD)
772 if (!fallback_p)
773 return false;
775 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
776 bitregion_end, value);
777 return true;
781 /* From here on we can assume that the field to be stored in fits
782 within a word. If the destination is a register, it too fits
783 in a word. */
785 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
786 if (op_mode != MAX_MACHINE_MODE
787 && !MEM_P (op0)
788 && store_bit_field_using_insv (op0, bitsize, bitnum, value, op_mode))
789 return true;
791 /* If OP0 is a memory, try copying it to a register and seeing if a
792 cheap register alternative is available. */
793 if (op_mode != MAX_MACHINE_MODE && MEM_P (op0))
795 enum machine_mode bestmode;
796 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
798 /* Do not use insv for volatile bitfields when
799 -fstrict-volatile-bitfields is in effect. */
800 if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
801 /* Do not use insv if the bit region is restricted and
802 an op_mode integer doesn't fit into the restricted region. */
803 && !(bitregion_end
804 && (bitnum - (bitnum % BITS_PER_UNIT)
805 + GET_MODE_BITSIZE (op_mode)
806 > bitregion_end + 1))
807 && store_bit_field_using_insv (op0, bitsize, bitnum, value, op_mode))
808 return true;
810 if (bitregion_end)
811 maxbits = bitregion_end - bitregion_start + 1;
813 /* Get the mode to use for inserting into this field. If OP0 is
814 BLKmode, get the smallest mode consistent with the alignment. If
815 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
816 mode. Otherwise, use the smallest mode containing the field. */
818 if (GET_MODE (op0) == BLKmode
819 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
820 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode))
821 bestmode = get_best_mode (bitsize, bitnum,
822 bitregion_start, bitregion_end,
823 MEM_ALIGN (op0), op_mode,
824 MEM_VOLATILE_P (op0));
825 else
826 bestmode = GET_MODE (op0);
828 if (bestmode != VOIDmode
829 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
830 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
831 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
833 rtx last, tempreg, xop0;
834 unsigned int unit;
835 unsigned HOST_WIDE_INT offset, bitpos;
837 last = get_last_insn ();
839 /* Adjust address to point to the containing unit of
840 that mode. Compute the offset as a multiple of this unit,
841 counting in bytes. */
842 unit = GET_MODE_BITSIZE (bestmode);
843 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
844 bitpos = bitnum % unit;
845 xop0 = adjust_bitfield_address (op0, bestmode, offset);
847 /* Fetch that unit, store the bitfield in it, then store
848 the unit. */
849 tempreg = copy_to_reg (xop0);
850 if (store_bit_field_1 (tempreg, bitsize, bitpos,
851 bitregion_start, bitregion_end,
852 fieldmode, orig_value, false))
854 emit_move_insn (xop0, tempreg);
855 return true;
857 delete_insns_since (last);
861 if (!fallback_p)
862 return false;
864 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
865 bitregion_end, value);
866 return true;
869 /* Generate code to store value from rtx VALUE
870 into a bit-field within structure STR_RTX
871 containing BITSIZE bits starting at bit BITNUM.
873 BITREGION_START is bitpos of the first bitfield in this region.
874 BITREGION_END is the bitpos of the ending bitfield in this region.
875 These two fields are 0, if the C++ memory model does not apply,
876 or we are not interested in keeping track of bitfield regions.
878 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
880 void
881 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
882 unsigned HOST_WIDE_INT bitnum,
883 unsigned HOST_WIDE_INT bitregion_start,
884 unsigned HOST_WIDE_INT bitregion_end,
885 enum machine_mode fieldmode,
886 rtx value)
888 /* Under the C++0x memory model, we must not touch bits outside the
889 bit region. Adjust the address to start at the beginning of the
890 bit region. */
891 if (MEM_P (str_rtx) && bitregion_start > 0)
893 enum machine_mode bestmode;
894 enum machine_mode op_mode;
895 unsigned HOST_WIDE_INT offset;
897 op_mode = mode_for_extraction (EP_insv, 3);
898 if (op_mode == MAX_MACHINE_MODE)
899 op_mode = VOIDmode;
901 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
903 offset = bitregion_start / BITS_PER_UNIT;
904 bitnum -= bitregion_start;
905 bitregion_end -= bitregion_start;
906 bitregion_start = 0;
907 bestmode = get_best_mode (bitsize, bitnum,
908 bitregion_start, bitregion_end,
909 MEM_ALIGN (str_rtx),
910 op_mode,
911 MEM_VOLATILE_P (str_rtx));
912 str_rtx = adjust_address (str_rtx, bestmode, offset);
915 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
916 bitregion_start, bitregion_end,
917 fieldmode, value, true))
918 gcc_unreachable ();
921 /* Use shifts and boolean operations to store VALUE into a bit field of
922 width BITSIZE in OP0, starting at bit BITNUM. */
924 static void
925 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
926 unsigned HOST_WIDE_INT bitnum,
927 unsigned HOST_WIDE_INT bitregion_start,
928 unsigned HOST_WIDE_INT bitregion_end,
929 rtx value)
931 enum machine_mode mode;
932 rtx temp;
933 int all_zero = 0;
934 int all_one = 0;
936 /* There is a case not handled here:
937 a structure with a known alignment of just a halfword
938 and a field split across two aligned halfwords within the structure.
939 Or likewise a structure with a known alignment of just a byte
940 and a field split across two bytes.
941 Such cases are not supposed to be able to occur. */
943 if (MEM_P (op0))
945 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
947 if (bitregion_end)
948 maxbits = bitregion_end - bitregion_start + 1;
950 /* Get the proper mode to use for this field. We want a mode that
951 includes the entire field. If such a mode would be larger than
952 a word, we won't be doing the extraction the normal way.
953 We don't want a mode bigger than the destination. */
955 mode = GET_MODE (op0);
956 if (GET_MODE_BITSIZE (mode) == 0
957 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
958 mode = word_mode;
960 if (MEM_VOLATILE_P (op0)
961 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
962 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
963 && flag_strict_volatile_bitfields > 0)
964 mode = GET_MODE (op0);
965 else
966 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
967 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
969 if (mode == VOIDmode)
971 /* The only way this should occur is if the field spans word
972 boundaries. */
973 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
974 bitregion_end, value);
975 return;
978 HOST_WIDE_INT bit_offset = bitnum - bitnum % GET_MODE_BITSIZE (mode);
979 op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
980 bitnum -= bit_offset;
983 mode = GET_MODE (op0);
984 gcc_assert (SCALAR_INT_MODE_P (mode));
986 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
987 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
989 if (BYTES_BIG_ENDIAN)
990 /* BITNUM is the distance between our msb
991 and that of the containing datum.
992 Convert it to the distance from the lsb. */
993 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
995 /* Now BITNUM is always the distance between our lsb
996 and that of OP0. */
998 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
999 we must first convert its mode to MODE. */
1001 if (CONST_INT_P (value))
1003 HOST_WIDE_INT v = INTVAL (value);
1005 if (bitsize < HOST_BITS_PER_WIDE_INT)
1006 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1008 if (v == 0)
1009 all_zero = 1;
1010 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1011 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1012 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1013 all_one = 1;
1015 value = lshift_value (mode, value, bitnum, bitsize);
1017 else
1019 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1020 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1022 if (GET_MODE (value) != mode)
1023 value = convert_to_mode (mode, value, 1);
1025 if (must_and)
1026 value = expand_binop (mode, and_optab, value,
1027 mask_rtx (mode, 0, bitsize, 0),
1028 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1029 if (bitnum > 0)
1030 value = expand_shift (LSHIFT_EXPR, mode, value,
1031 bitnum, NULL_RTX, 1);
1034 /* Now clear the chosen bits in OP0,
1035 except that if VALUE is -1 we need not bother. */
1036 /* We keep the intermediates in registers to allow CSE to combine
1037 consecutive bitfield assignments. */
1039 temp = force_reg (mode, op0);
1041 if (! all_one)
1043 temp = expand_binop (mode, and_optab, temp,
1044 mask_rtx (mode, bitnum, bitsize, 1),
1045 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1046 temp = force_reg (mode, temp);
1049 /* Now logical-or VALUE into OP0, unless it is zero. */
1051 if (! all_zero)
1053 temp = expand_binop (mode, ior_optab, temp, value,
1054 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1055 temp = force_reg (mode, temp);
1058 if (op0 != temp)
1060 op0 = copy_rtx (op0);
1061 emit_move_insn (op0, temp);
1065 /* Store a bit field that is split across multiple accessible memory objects.
1067 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1068 BITSIZE is the field width; BITPOS the position of its first bit
1069 (within the word).
1070 VALUE is the value to store.
1072 This does not yet handle fields wider than BITS_PER_WORD. */
1074 static void
1075 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1076 unsigned HOST_WIDE_INT bitpos,
1077 unsigned HOST_WIDE_INT bitregion_start,
1078 unsigned HOST_WIDE_INT bitregion_end,
1079 rtx value)
1081 unsigned int unit;
1082 unsigned int bitsdone = 0;
1084 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1085 much at a time. */
1086 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1087 unit = BITS_PER_WORD;
1088 else
1089 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1091 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1092 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1093 that VALUE might be a floating-point constant. */
1094 if (CONSTANT_P (value) && !CONST_INT_P (value))
1096 rtx word = gen_lowpart_common (word_mode, value);
1098 if (word && (value != word))
1099 value = word;
1100 else
1101 value = gen_lowpart_common (word_mode,
1102 force_reg (GET_MODE (value) != VOIDmode
1103 ? GET_MODE (value)
1104 : word_mode, value));
1107 while (bitsdone < bitsize)
1109 unsigned HOST_WIDE_INT thissize;
1110 rtx part, word;
1111 unsigned HOST_WIDE_INT thispos;
1112 unsigned HOST_WIDE_INT offset;
1114 offset = (bitpos + bitsdone) / unit;
1115 thispos = (bitpos + bitsdone) % unit;
1117 /* When region of bytes we can touch is restricted, decrease
1118 UNIT close to the end of the region as needed. */
1119 if (bitregion_end
1120 && unit > BITS_PER_UNIT
1121 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1123 unit = unit / 2;
1124 continue;
1127 /* THISSIZE must not overrun a word boundary. Otherwise,
1128 store_fixed_bit_field will call us again, and we will mutually
1129 recurse forever. */
1130 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1131 thissize = MIN (thissize, unit - thispos);
1133 if (BYTES_BIG_ENDIAN)
1135 /* Fetch successively less significant portions. */
1136 if (CONST_INT_P (value))
1137 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1138 >> (bitsize - bitsdone - thissize))
1139 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1140 else
1142 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1143 /* The args are chosen so that the last part includes the
1144 lsb. Give extract_bit_field the value it needs (with
1145 endianness compensation) to fetch the piece we want. */
1146 part = extract_fixed_bit_field (word_mode, value, thissize,
1147 total_bits - bitsize + bitsdone,
1148 NULL_RTX, 1, false);
1151 else
1153 /* Fetch successively more significant portions. */
1154 if (CONST_INT_P (value))
1155 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1156 >> bitsdone)
1157 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1158 else
1159 part = extract_fixed_bit_field (word_mode, value, thissize,
1160 bitsdone, NULL_RTX, 1, false);
1163 /* If OP0 is a register, then handle OFFSET here.
1165 When handling multiword bitfields, extract_bit_field may pass
1166 down a word_mode SUBREG of a larger REG for a bitfield that actually
1167 crosses a word boundary. Thus, for a SUBREG, we must find
1168 the current word starting from the base register. */
1169 if (GET_CODE (op0) == SUBREG)
1171 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1172 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1173 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1174 word = word_offset ? const0_rtx : op0;
1175 else
1176 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1177 GET_MODE (SUBREG_REG (op0)));
1178 offset = 0;
1180 else if (REG_P (op0))
1182 enum machine_mode op0_mode = GET_MODE (op0);
1183 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1184 word = offset ? const0_rtx : op0;
1185 else
1186 word = operand_subword_force (op0, offset, GET_MODE (op0));
1187 offset = 0;
1189 else
1190 word = op0;
1192 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1193 it is just an out-of-bounds access. Ignore it. */
1194 if (word != const0_rtx)
1195 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1196 bitregion_start, bitregion_end, part);
1197 bitsdone += thissize;
1201 /* A subroutine of extract_bit_field_1 that converts return value X
1202 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1203 to extract_bit_field. */
1205 static rtx
1206 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1207 enum machine_mode tmode, bool unsignedp)
1209 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1210 return x;
1212 /* If the x mode is not a scalar integral, first convert to the
1213 integer mode of that size and then access it as a floating-point
1214 value via a SUBREG. */
1215 if (!SCALAR_INT_MODE_P (tmode))
1217 enum machine_mode smode;
1219 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1220 x = convert_to_mode (smode, x, unsignedp);
1221 x = force_reg (smode, x);
1222 return gen_lowpart (tmode, x);
1225 return convert_to_mode (tmode, x, unsignedp);
1228 /* Try to use an ext(z)v pattern to extract a field from OP0.
1229 Return the extracted value on success, otherwise return null.
1230 EXT_MODE is the mode of the extraction and the other arguments
1231 are as for extract_bit_field. */
1233 static rtx
1234 extract_bit_field_using_extv (rtx op0, unsigned HOST_WIDE_INT bitsize,
1235 unsigned HOST_WIDE_INT bitnum,
1236 int unsignedp, rtx target,
1237 enum machine_mode mode, enum machine_mode tmode,
1238 enum machine_mode ext_mode)
1240 struct expand_operand ops[4];
1241 rtx spec_target = target;
1242 rtx spec_target_subreg = 0;
1243 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1245 if (bitsize == 0 || unit < bitsize)
1246 return NULL_RTX;
1248 if (MEM_P (op0))
1250 /* Get a reference to the first byte of the field. */
1251 op0 = adjust_bitfield_address (op0, byte_mode, bitnum / BITS_PER_UNIT);
1252 bitnum %= BITS_PER_UNIT;
1254 else
1256 /* Convert from counting within OP0 to counting in EXT_MODE. */
1257 if (BYTES_BIG_ENDIAN)
1258 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1260 /* If op0 is a register, we need it in EXT_MODE to make it
1261 acceptable to the format of ext(z)v. */
1262 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1263 return NULL_RTX;
1264 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1265 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1268 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1269 "backwards" from the size of the unit we are extracting from.
1270 Otherwise, we count bits from the most significant on a
1271 BYTES/BITS_BIG_ENDIAN machine. */
1273 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1274 bitnum = unit - bitsize - bitnum;
1276 if (target == 0)
1277 target = spec_target = gen_reg_rtx (tmode);
1279 if (GET_MODE (target) != ext_mode)
1281 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1282 between the mode of the extraction (word_mode) and the target
1283 mode. Instead, create a temporary and use convert_move to set
1284 the target. */
1285 if (REG_P (target)
1286 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1288 target = gen_lowpart (ext_mode, target);
1289 if (GET_MODE_PRECISION (ext_mode)
1290 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1291 spec_target_subreg = target;
1293 else
1294 target = gen_reg_rtx (ext_mode);
1297 create_output_operand (&ops[0], target, ext_mode);
1298 create_fixed_operand (&ops[1], op0);
1299 create_integer_operand (&ops[2], bitsize);
1300 create_integer_operand (&ops[3], bitnum);
1301 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv, 4, ops))
1303 target = ops[0].value;
1304 if (target == spec_target)
1305 return target;
1306 if (target == spec_target_subreg)
1307 return spec_target;
1308 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1310 return NULL_RTX;
1313 /* A subroutine of extract_bit_field, with the same arguments.
1314 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1315 if we can find no other means of implementing the operation.
1316 if FALLBACK_P is false, return NULL instead. */
1318 static rtx
1319 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1320 unsigned HOST_WIDE_INT bitnum,
1321 int unsignedp, bool packedp, rtx target,
1322 enum machine_mode mode, enum machine_mode tmode,
1323 bool fallback_p)
1325 rtx op0 = str_rtx;
1326 enum machine_mode int_mode;
1327 enum machine_mode ext_mode;
1328 enum machine_mode mode1;
1330 if (tmode == VOIDmode)
1331 tmode = mode;
1333 while (GET_CODE (op0) == SUBREG)
1335 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1336 op0 = SUBREG_REG (op0);
1339 /* If we have an out-of-bounds access to a register, just return an
1340 uninitialized register of the required mode. This can occur if the
1341 source code contains an out-of-bounds access to a small array. */
1342 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1343 return gen_reg_rtx (tmode);
1345 if (REG_P (op0)
1346 && mode == GET_MODE (op0)
1347 && bitnum == 0
1348 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1350 /* We're trying to extract a full register from itself. */
1351 return op0;
1354 /* See if we can get a better vector mode before extracting. */
1355 if (VECTOR_MODE_P (GET_MODE (op0))
1356 && !MEM_P (op0)
1357 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1359 enum machine_mode new_mode;
1361 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1362 new_mode = MIN_MODE_VECTOR_FLOAT;
1363 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1364 new_mode = MIN_MODE_VECTOR_FRACT;
1365 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1366 new_mode = MIN_MODE_VECTOR_UFRACT;
1367 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1368 new_mode = MIN_MODE_VECTOR_ACCUM;
1369 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1370 new_mode = MIN_MODE_VECTOR_UACCUM;
1371 else
1372 new_mode = MIN_MODE_VECTOR_INT;
1374 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1375 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1376 && targetm.vector_mode_supported_p (new_mode))
1377 break;
1378 if (new_mode != VOIDmode)
1379 op0 = gen_lowpart (new_mode, op0);
1382 /* Use vec_extract patterns for extracting parts of vectors whenever
1383 available. */
1384 if (VECTOR_MODE_P (GET_MODE (op0))
1385 && !MEM_P (op0)
1386 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1387 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1388 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1390 struct expand_operand ops[3];
1391 enum machine_mode outermode = GET_MODE (op0);
1392 enum machine_mode innermode = GET_MODE_INNER (outermode);
1393 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1394 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1396 create_output_operand (&ops[0], target, innermode);
1397 create_input_operand (&ops[1], op0, outermode);
1398 create_integer_operand (&ops[2], pos);
1399 if (maybe_expand_insn (icode, 3, ops))
1401 target = ops[0].value;
1402 if (GET_MODE (target) != mode)
1403 return gen_lowpart (tmode, target);
1404 return target;
1408 /* Make sure we are playing with integral modes. Pun with subregs
1409 if we aren't. */
1411 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1412 if (imode != GET_MODE (op0))
1414 if (MEM_P (op0))
1415 op0 = adjust_bitfield_address (op0, imode, 0);
1416 else if (imode != BLKmode)
1418 op0 = gen_lowpart (imode, op0);
1420 /* If we got a SUBREG, force it into a register since we
1421 aren't going to be able to do another SUBREG on it. */
1422 if (GET_CODE (op0) == SUBREG)
1423 op0 = force_reg (imode, op0);
1425 else if (REG_P (op0))
1427 rtx reg, subreg;
1428 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1429 MODE_INT);
1430 reg = gen_reg_rtx (imode);
1431 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1432 emit_move_insn (subreg, op0);
1433 op0 = reg;
1434 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1436 else
1438 rtx mem = assign_stack_temp (GET_MODE (op0),
1439 GET_MODE_SIZE (GET_MODE (op0)));
1440 emit_move_insn (mem, op0);
1441 op0 = adjust_bitfield_address (mem, BLKmode, 0);
1446 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1447 If that's wrong, the solution is to test for it and set TARGET to 0
1448 if needed. */
1450 /* If the bitfield is volatile, we need to make sure the access
1451 remains on a type-aligned boundary. */
1452 if (GET_CODE (op0) == MEM
1453 && MEM_VOLATILE_P (op0)
1454 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1455 && flag_strict_volatile_bitfields > 0)
1456 goto no_subreg_mode_swap;
1458 /* Only scalar integer modes can be converted via subregs. There is an
1459 additional problem for FP modes here in that they can have a precision
1460 which is different from the size. mode_for_size uses precision, but
1461 we want a mode based on the size, so we must avoid calling it for FP
1462 modes. */
1463 mode1 = mode;
1464 if (SCALAR_INT_MODE_P (tmode))
1466 enum machine_mode try_mode = mode_for_size (bitsize,
1467 GET_MODE_CLASS (tmode), 0);
1468 if (try_mode != BLKmode)
1469 mode1 = try_mode;
1471 gcc_assert (mode1 != BLKmode);
1473 /* Extraction of a full MODE1 value can be done with a subreg as long
1474 as the least significant bit of the value is the least significant
1475 bit of either OP0 or a word of OP0. */
1476 if (!MEM_P (op0)
1477 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1478 && bitsize == GET_MODE_BITSIZE (mode1)
1479 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1481 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1482 bitnum / BITS_PER_UNIT);
1483 if (sub)
1484 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1487 /* Extraction of a full MODE1 value can be done with a load as long as
1488 the field is on a byte boundary and is sufficiently aligned. */
1489 if (MEM_P (op0)
1490 && bitnum % BITS_PER_UNIT == 0
1491 && bitsize == GET_MODE_BITSIZE (mode1)
1492 && (!SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0))
1493 || (bitnum % bitsize == 0
1494 && MEM_ALIGN (op0) % bitsize == 0)))
1496 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1497 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1500 no_subreg_mode_swap:
1502 /* Handle fields bigger than a word. */
1504 if (bitsize > BITS_PER_WORD)
1506 /* Here we transfer the words of the field
1507 in the order least significant first.
1508 This is because the most significant word is the one which may
1509 be less than full. */
1511 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1512 unsigned int i;
1513 rtx last;
1515 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1516 target = gen_reg_rtx (mode);
1518 /* Indicate for flow that the entire target reg is being set. */
1519 emit_clobber (target);
1521 last = get_last_insn ();
1522 for (i = 0; i < nwords; i++)
1524 /* If I is 0, use the low-order word in both field and target;
1525 if I is 1, use the next to lowest word; and so on. */
1526 /* Word number in TARGET to use. */
1527 unsigned int wordnum
1528 = (WORDS_BIG_ENDIAN
1529 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1530 : i);
1531 /* Offset from start of field in OP0. */
1532 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1533 ? MAX (0, ((int) bitsize - ((int) i + 1)
1534 * (int) BITS_PER_WORD))
1535 : (int) i * BITS_PER_WORD);
1536 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1537 rtx result_part
1538 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1539 bitsize - i * BITS_PER_WORD),
1540 bitnum + bit_offset, 1, false, target_part,
1541 mode, word_mode, fallback_p);
1543 gcc_assert (target_part);
1544 if (!result_part)
1546 delete_insns_since (last);
1547 return NULL;
1550 if (result_part != target_part)
1551 emit_move_insn (target_part, result_part);
1554 if (unsignedp)
1556 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1557 need to be zero'd out. */
1558 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1560 unsigned int i, total_words;
1562 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1563 for (i = nwords; i < total_words; i++)
1564 emit_move_insn
1565 (operand_subword (target,
1566 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1567 1, VOIDmode),
1568 const0_rtx);
1570 return target;
1573 /* Signed bit field: sign-extend with two arithmetic shifts. */
1574 target = expand_shift (LSHIFT_EXPR, mode, target,
1575 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1576 return expand_shift (RSHIFT_EXPR, mode, target,
1577 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1580 /* If OP0 is a multi-word register, narrow it to the affected word.
1581 If the region spans two words, defer to extract_split_bit_field. */
1582 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1584 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1585 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1586 bitnum %= BITS_PER_WORD;
1587 if (bitnum + bitsize > BITS_PER_WORD)
1589 if (!fallback_p)
1590 return NULL_RTX;
1591 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1592 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1596 /* From here on we know the desired field is smaller than a word.
1597 If OP0 is a register, it too fits within a word. */
1599 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1600 if (ext_mode != MAX_MACHINE_MODE && !MEM_P (op0))
1602 rtx result = extract_bit_field_using_extv (op0, bitsize, bitnum,
1603 unsignedp, target, mode,
1604 tmode, ext_mode);
1605 if (result)
1606 return result;
1609 /* If OP0 is a memory, try copying it to a register and seeing if a
1610 cheap register alternative is available. */
1611 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1613 enum machine_mode bestmode;
1615 /* Do not use extv/extzv for volatile bitfields when
1616 -fstrict-volatile-bitfields is in effect. */
1617 if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0))
1619 rtx result = extract_bit_field_using_extv (op0, bitsize, bitnum,
1620 unsignedp, target, mode,
1621 tmode, ext_mode);
1622 if (result)
1623 return result;
1626 /* Get the mode to use for inserting into this field. If
1627 OP0 is BLKmode, get the smallest mode consistent with the
1628 alignment. If OP0 is a non-BLKmode object that is no
1629 wider than EXT_MODE, use its mode. Otherwise, use the
1630 smallest mode containing the field. */
1632 if (GET_MODE (op0) == BLKmode
1633 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode))
1634 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1635 ext_mode, MEM_VOLATILE_P (op0));
1636 else
1637 bestmode = GET_MODE (op0);
1639 if (bestmode != VOIDmode
1640 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1641 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1643 unsigned HOST_WIDE_INT offset, bitpos;
1645 /* Compute the offset as a multiple of this unit,
1646 counting in bytes. */
1647 unsigned int unit = GET_MODE_BITSIZE (bestmode);
1648 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1649 bitpos = bitnum % unit;
1651 /* Make sure the register is big enough for the whole field. */
1652 if (bitpos + bitsize <= unit)
1654 rtx last, result, xop0;
1656 last = get_last_insn ();
1658 /* Fetch it to a register in that size. */
1659 xop0 = adjust_bitfield_address (op0, bestmode, offset);
1660 xop0 = force_reg (bestmode, xop0);
1661 result = extract_bit_field_1 (xop0, bitsize, bitpos,
1662 unsignedp, packedp, target,
1663 mode, tmode, false);
1664 if (result)
1665 return result;
1667 delete_insns_since (last);
1672 if (!fallback_p)
1673 return NULL;
1675 /* Find a correspondingly-sized integer field, so we can apply
1676 shifts and masks to it. */
1677 int_mode = int_mode_for_mode (tmode);
1678 if (int_mode == BLKmode)
1679 int_mode = int_mode_for_mode (mode);
1680 /* Should probably push op0 out to memory and then do a load. */
1681 gcc_assert (int_mode != BLKmode);
1683 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1684 target, unsignedp, packedp);
1685 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1688 /* Generate code to extract a byte-field from STR_RTX
1689 containing BITSIZE bits, starting at BITNUM,
1690 and put it in TARGET if possible (if TARGET is nonzero).
1691 Regardless of TARGET, we return the rtx for where the value is placed.
1693 STR_RTX is the structure containing the byte (a REG or MEM).
1694 UNSIGNEDP is nonzero if this is an unsigned bit field.
1695 PACKEDP is nonzero if the field has the packed attribute.
1696 MODE is the natural mode of the field value once extracted.
1697 TMODE is the mode the caller would like the value to have;
1698 but the value may be returned with type MODE instead.
1700 If a TARGET is specified and we can store in it at no extra cost,
1701 we do so, and return TARGET.
1702 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1703 if they are equally easy. */
1706 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1707 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1708 rtx target, enum machine_mode mode, enum machine_mode tmode)
1710 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1711 target, mode, tmode, true);
1714 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1715 from bit BITNUM of OP0.
1717 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1718 PACKEDP is true if the field has the packed attribute.
1720 If TARGET is nonzero, attempts to store the value there
1721 and return TARGET, but this is not guaranteed.
1722 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1724 static rtx
1725 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1726 unsigned HOST_WIDE_INT bitsize,
1727 unsigned HOST_WIDE_INT bitnum, rtx target,
1728 int unsignedp, bool packedp)
1730 enum machine_mode mode;
1732 if (MEM_P (op0))
1734 /* Get the proper mode to use for this field. We want a mode that
1735 includes the entire field. If such a mode would be larger than
1736 a word, we won't be doing the extraction the normal way. */
1738 if (MEM_VOLATILE_P (op0)
1739 && flag_strict_volatile_bitfields > 0)
1741 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1742 mode = GET_MODE (op0);
1743 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1744 mode = GET_MODE (target);
1745 else
1746 mode = tmode;
1748 else
1749 mode = get_best_mode (bitsize, bitnum, 0, 0,
1750 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1752 if (mode == VOIDmode)
1753 /* The only way this should occur is if the field spans word
1754 boundaries. */
1755 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1757 unsigned int total_bits = GET_MODE_BITSIZE (mode);
1758 HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1760 /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1761 if it results in a multi-word access where we otherwise wouldn't
1762 have one. So, check for that case here. */
1763 if (MEM_P (op0)
1764 && MEM_VOLATILE_P (op0)
1765 && flag_strict_volatile_bitfields > 0
1766 && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1767 && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1769 if (STRICT_ALIGNMENT)
1771 static bool informed_about_misalignment = false;
1773 if (packedp)
1775 if (bitsize == total_bits)
1776 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1777 "multiple accesses to volatile structure"
1778 " member because of packed attribute");
1779 else
1780 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1781 "multiple accesses to volatile structure"
1782 " bitfield because of packed attribute");
1784 return extract_split_bit_field (op0, bitsize, bitnum,
1785 unsignedp);
1788 if (bitsize == total_bits)
1789 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1790 "mis-aligned access used for structure member");
1791 else
1792 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1793 "mis-aligned access used for structure bitfield");
1795 if (! informed_about_misalignment)
1797 informed_about_misalignment = true;
1798 inform (input_location,
1799 "when a volatile object spans multiple type-sized"
1800 " locations, the compiler must choose between using"
1801 " a single mis-aligned access to preserve the"
1802 " volatility, or using multiple aligned accesses"
1803 " to avoid runtime faults; this code may fail at"
1804 " runtime if the hardware does not allow this"
1805 " access");
1808 bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1810 op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1811 bitnum -= bit_offset;
1814 mode = GET_MODE (op0);
1815 gcc_assert (SCALAR_INT_MODE_P (mode));
1817 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1818 for invalid input, such as extract equivalent of f5 from
1819 gcc.dg/pr48335-2.c. */
1821 if (BYTES_BIG_ENDIAN)
1822 /* BITNUM is the distance between our msb and that of OP0.
1823 Convert it to the distance from the lsb. */
1824 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1826 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1827 We have reduced the big-endian case to the little-endian case. */
1829 if (unsignedp)
1831 if (bitnum)
1833 /* If the field does not already start at the lsb,
1834 shift it so it does. */
1835 /* Maybe propagate the target for the shift. */
1836 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1837 if (tmode != mode)
1838 subtarget = 0;
1839 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1841 /* Convert the value to the desired mode. */
1842 if (mode != tmode)
1843 op0 = convert_to_mode (tmode, op0, 1);
1845 /* Unless the msb of the field used to be the msb when we shifted,
1846 mask out the upper bits. */
1848 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1849 return expand_binop (GET_MODE (op0), and_optab, op0,
1850 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1851 target, 1, OPTAB_LIB_WIDEN);
1852 return op0;
1855 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1856 then arithmetic-shift its lsb to the lsb of the word. */
1857 op0 = force_reg (mode, op0);
1859 /* Find the narrowest integer mode that contains the field. */
1861 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1862 mode = GET_MODE_WIDER_MODE (mode))
1863 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1865 op0 = convert_to_mode (mode, op0, 0);
1866 break;
1869 if (mode != tmode)
1870 target = 0;
1872 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1874 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1875 /* Maybe propagate the target for the shift. */
1876 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1877 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1880 return expand_shift (RSHIFT_EXPR, mode, op0,
1881 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1884 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1885 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1886 complement of that if COMPLEMENT. The mask is truncated if
1887 necessary to the width of mode MODE. The mask is zero-extended if
1888 BITSIZE+BITPOS is too small for MODE. */
1890 static rtx
1891 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1893 double_int mask;
1895 mask = double_int::mask (bitsize);
1896 mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1898 if (complement)
1899 mask = ~mask;
1901 return immed_double_int_const (mask, mode);
1904 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1905 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1907 static rtx
1908 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1910 double_int val;
1912 val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1913 val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1915 return immed_double_int_const (val, mode);
1918 /* Extract a bit field that is split across two words
1919 and return an RTX for the result.
1921 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1922 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1923 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1925 static rtx
1926 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1927 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1929 unsigned int unit;
1930 unsigned int bitsdone = 0;
1931 rtx result = NULL_RTX;
1932 int first = 1;
1934 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1935 much at a time. */
1936 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1937 unit = BITS_PER_WORD;
1938 else
1939 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1941 while (bitsdone < bitsize)
1943 unsigned HOST_WIDE_INT thissize;
1944 rtx part, word;
1945 unsigned HOST_WIDE_INT thispos;
1946 unsigned HOST_WIDE_INT offset;
1948 offset = (bitpos + bitsdone) / unit;
1949 thispos = (bitpos + bitsdone) % unit;
1951 /* THISSIZE must not overrun a word boundary. Otherwise,
1952 extract_fixed_bit_field will call us again, and we will mutually
1953 recurse forever. */
1954 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1955 thissize = MIN (thissize, unit - thispos);
1957 /* If OP0 is a register, then handle OFFSET here.
1959 When handling multiword bitfields, extract_bit_field may pass
1960 down a word_mode SUBREG of a larger REG for a bitfield that actually
1961 crosses a word boundary. Thus, for a SUBREG, we must find
1962 the current word starting from the base register. */
1963 if (GET_CODE (op0) == SUBREG)
1965 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1966 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1967 GET_MODE (SUBREG_REG (op0)));
1968 offset = 0;
1970 else if (REG_P (op0))
1972 word = operand_subword_force (op0, offset, GET_MODE (op0));
1973 offset = 0;
1975 else
1976 word = op0;
1978 /* Extract the parts in bit-counting order,
1979 whose meaning is determined by BYTES_PER_UNIT.
1980 OFFSET is in UNITs, and UNIT is in bits. */
1981 part = extract_fixed_bit_field (word_mode, word, thissize,
1982 offset * unit + thispos, 0, 1, false);
1983 bitsdone += thissize;
1985 /* Shift this part into place for the result. */
1986 if (BYTES_BIG_ENDIAN)
1988 if (bitsize != bitsdone)
1989 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1990 bitsize - bitsdone, 0, 1);
1992 else
1994 if (bitsdone != thissize)
1995 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1996 bitsdone - thissize, 0, 1);
1999 if (first)
2000 result = part;
2001 else
2002 /* Combine the parts with bitwise or. This works
2003 because we extracted each part as an unsigned bit field. */
2004 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2005 OPTAB_LIB_WIDEN);
2007 first = 0;
2010 /* Unsigned bit field: we are done. */
2011 if (unsignedp)
2012 return result;
2013 /* Signed bit field: sign-extend with two arithmetic shifts. */
2014 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2015 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2016 return expand_shift (RSHIFT_EXPR, word_mode, result,
2017 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2020 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2021 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2022 MODE, fill the upper bits with zeros. Fail if the layout of either
2023 mode is unknown (as for CC modes) or if the extraction would involve
2024 unprofitable mode punning. Return the value on success, otherwise
2025 return null.
2027 This is different from gen_lowpart* in these respects:
2029 - the returned value must always be considered an rvalue
2031 - when MODE is wider than SRC_MODE, the extraction involves
2032 a zero extension
2034 - when MODE is smaller than SRC_MODE, the extraction involves
2035 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2037 In other words, this routine performs a computation, whereas the
2038 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2039 operations. */
2042 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2044 enum machine_mode int_mode, src_int_mode;
2046 if (mode == src_mode)
2047 return src;
2049 if (CONSTANT_P (src))
2051 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2052 fails, it will happily create (subreg (symbol_ref)) or similar
2053 invalid SUBREGs. */
2054 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2055 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2056 if (ret)
2057 return ret;
2059 if (GET_MODE (src) == VOIDmode
2060 || !validate_subreg (mode, src_mode, src, byte))
2061 return NULL_RTX;
2063 src = force_reg (GET_MODE (src), src);
2064 return gen_rtx_SUBREG (mode, src, byte);
2067 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2068 return NULL_RTX;
2070 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2071 && MODES_TIEABLE_P (mode, src_mode))
2073 rtx x = gen_lowpart_common (mode, src);
2074 if (x)
2075 return x;
2078 src_int_mode = int_mode_for_mode (src_mode);
2079 int_mode = int_mode_for_mode (mode);
2080 if (src_int_mode == BLKmode || int_mode == BLKmode)
2081 return NULL_RTX;
2083 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2084 return NULL_RTX;
2085 if (!MODES_TIEABLE_P (int_mode, mode))
2086 return NULL_RTX;
2088 src = gen_lowpart (src_int_mode, src);
2089 src = convert_modes (int_mode, src_int_mode, src, true);
2090 src = gen_lowpart (mode, src);
2091 return src;
2094 /* Add INC into TARGET. */
2096 void
2097 expand_inc (rtx target, rtx inc)
2099 rtx value = expand_binop (GET_MODE (target), add_optab,
2100 target, inc,
2101 target, 0, OPTAB_LIB_WIDEN);
2102 if (value != target)
2103 emit_move_insn (target, value);
2106 /* Subtract DEC from TARGET. */
2108 void
2109 expand_dec (rtx target, rtx dec)
2111 rtx value = expand_binop (GET_MODE (target), sub_optab,
2112 target, dec,
2113 target, 0, OPTAB_LIB_WIDEN);
2114 if (value != target)
2115 emit_move_insn (target, value);
2118 /* Output a shift instruction for expression code CODE,
2119 with SHIFTED being the rtx for the value to shift,
2120 and AMOUNT the rtx for the amount to shift by.
2121 Store the result in the rtx TARGET, if that is convenient.
2122 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2123 Return the rtx for where the value is. */
2125 static rtx
2126 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2127 rtx amount, rtx target, int unsignedp)
2129 rtx op1, temp = 0;
2130 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2131 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2132 optab lshift_optab = ashl_optab;
2133 optab rshift_arith_optab = ashr_optab;
2134 optab rshift_uns_optab = lshr_optab;
2135 optab lrotate_optab = rotl_optab;
2136 optab rrotate_optab = rotr_optab;
2137 enum machine_mode op1_mode;
2138 int attempt;
2139 bool speed = optimize_insn_for_speed_p ();
2141 op1 = amount;
2142 op1_mode = GET_MODE (op1);
2144 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2145 shift amount is a vector, use the vector/vector shift patterns. */
2146 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2148 lshift_optab = vashl_optab;
2149 rshift_arith_optab = vashr_optab;
2150 rshift_uns_optab = vlshr_optab;
2151 lrotate_optab = vrotl_optab;
2152 rrotate_optab = vrotr_optab;
2155 /* Previously detected shift-counts computed by NEGATE_EXPR
2156 and shifted in the other direction; but that does not work
2157 on all machines. */
2159 if (SHIFT_COUNT_TRUNCATED)
2161 if (CONST_INT_P (op1)
2162 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2163 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2164 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2165 % GET_MODE_BITSIZE (mode));
2166 else if (GET_CODE (op1) == SUBREG
2167 && subreg_lowpart_p (op1)
2168 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2169 op1 = SUBREG_REG (op1);
2172 if (op1 == const0_rtx)
2173 return shifted;
2175 /* Check whether its cheaper to implement a left shift by a constant
2176 bit count by a sequence of additions. */
2177 if (code == LSHIFT_EXPR
2178 && CONST_INT_P (op1)
2179 && INTVAL (op1) > 0
2180 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2181 && INTVAL (op1) < MAX_BITS_PER_WORD
2182 && (shift_cost (speed, mode, INTVAL (op1))
2183 > INTVAL (op1) * add_cost (speed, mode))
2184 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2186 int i;
2187 for (i = 0; i < INTVAL (op1); i++)
2189 temp = force_reg (mode, shifted);
2190 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2191 unsignedp, OPTAB_LIB_WIDEN);
2193 return shifted;
2196 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2198 enum optab_methods methods;
2200 if (attempt == 0)
2201 methods = OPTAB_DIRECT;
2202 else if (attempt == 1)
2203 methods = OPTAB_WIDEN;
2204 else
2205 methods = OPTAB_LIB_WIDEN;
2207 if (rotate)
2209 /* Widening does not work for rotation. */
2210 if (methods == OPTAB_WIDEN)
2211 continue;
2212 else if (methods == OPTAB_LIB_WIDEN)
2214 /* If we have been unable to open-code this by a rotation,
2215 do it as the IOR of two shifts. I.e., to rotate A
2216 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2217 where C is the bitsize of A.
2219 It is theoretically possible that the target machine might
2220 not be able to perform either shift and hence we would
2221 be making two libcalls rather than just the one for the
2222 shift (similarly if IOR could not be done). We will allow
2223 this extremely unlikely lossage to avoid complicating the
2224 code below. */
2226 rtx subtarget = target == shifted ? 0 : target;
2227 rtx new_amount, other_amount;
2228 rtx temp1;
2230 new_amount = op1;
2231 if (CONST_INT_P (op1))
2232 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2233 - INTVAL (op1));
2234 else
2235 other_amount
2236 = simplify_gen_binary (MINUS, GET_MODE (op1),
2237 GEN_INT (GET_MODE_PRECISION (mode)),
2238 op1);
2240 shifted = force_reg (mode, shifted);
2242 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2243 mode, shifted, new_amount, 0, 1);
2244 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2245 mode, shifted, other_amount,
2246 subtarget, 1);
2247 return expand_binop (mode, ior_optab, temp, temp1, target,
2248 unsignedp, methods);
2251 temp = expand_binop (mode,
2252 left ? lrotate_optab : rrotate_optab,
2253 shifted, op1, target, unsignedp, methods);
2255 else if (unsignedp)
2256 temp = expand_binop (mode,
2257 left ? lshift_optab : rshift_uns_optab,
2258 shifted, op1, target, unsignedp, methods);
2260 /* Do arithmetic shifts.
2261 Also, if we are going to widen the operand, we can just as well
2262 use an arithmetic right-shift instead of a logical one. */
2263 if (temp == 0 && ! rotate
2264 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2266 enum optab_methods methods1 = methods;
2268 /* If trying to widen a log shift to an arithmetic shift,
2269 don't accept an arithmetic shift of the same size. */
2270 if (unsignedp)
2271 methods1 = OPTAB_MUST_WIDEN;
2273 /* Arithmetic shift */
2275 temp = expand_binop (mode,
2276 left ? lshift_optab : rshift_arith_optab,
2277 shifted, op1, target, unsignedp, methods1);
2280 /* We used to try extzv here for logical right shifts, but that was
2281 only useful for one machine, the VAX, and caused poor code
2282 generation there for lshrdi3, so the code was deleted and a
2283 define_expand for lshrsi3 was added to vax.md. */
2286 gcc_assert (temp);
2287 return temp;
2290 /* Output a shift instruction for expression code CODE,
2291 with SHIFTED being the rtx for the value to shift,
2292 and AMOUNT the amount to shift by.
2293 Store the result in the rtx TARGET, if that is convenient.
2294 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2295 Return the rtx for where the value is. */
2298 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2299 int amount, rtx target, int unsignedp)
2301 return expand_shift_1 (code, mode,
2302 shifted, GEN_INT (amount), target, unsignedp);
2305 /* Output a shift instruction for expression code CODE,
2306 with SHIFTED being the rtx for the value to shift,
2307 and AMOUNT the tree for the amount to shift by.
2308 Store the result in the rtx TARGET, if that is convenient.
2309 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2310 Return the rtx for where the value is. */
2313 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2314 tree amount, rtx target, int unsignedp)
2316 return expand_shift_1 (code, mode,
2317 shifted, expand_normal (amount), target, unsignedp);
2321 /* Indicates the type of fixup needed after a constant multiplication.
2322 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2323 the result should be negated, and ADD_VARIANT means that the
2324 multiplicand should be added to the result. */
2325 enum mult_variant {basic_variant, negate_variant, add_variant};
2327 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2328 const struct mult_cost *, enum machine_mode mode);
2329 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2330 struct algorithm *, enum mult_variant *, int);
2331 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2332 const struct algorithm *, enum mult_variant);
2333 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2334 static rtx extract_high_half (enum machine_mode, rtx);
2335 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2336 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2337 int, int);
2338 /* Compute and return the best algorithm for multiplying by T.
2339 The algorithm must cost less than cost_limit
2340 If retval.cost >= COST_LIMIT, no algorithm was found and all
2341 other field of the returned struct are undefined.
2342 MODE is the machine mode of the multiplication. */
2344 static void
2345 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2346 const struct mult_cost *cost_limit, enum machine_mode mode)
2348 int m;
2349 struct algorithm *alg_in, *best_alg;
2350 struct mult_cost best_cost;
2351 struct mult_cost new_limit;
2352 int op_cost, op_latency;
2353 unsigned HOST_WIDE_INT orig_t = t;
2354 unsigned HOST_WIDE_INT q;
2355 int maxm, hash_index;
2356 bool cache_hit = false;
2357 enum alg_code cache_alg = alg_zero;
2358 bool speed = optimize_insn_for_speed_p ();
2359 enum machine_mode imode;
2360 struct alg_hash_entry *entry_ptr;
2362 /* Indicate that no algorithm is yet found. If no algorithm
2363 is found, this value will be returned and indicate failure. */
2364 alg_out->cost.cost = cost_limit->cost + 1;
2365 alg_out->cost.latency = cost_limit->latency + 1;
2367 if (cost_limit->cost < 0
2368 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2369 return;
2371 /* Be prepared for vector modes. */
2372 imode = GET_MODE_INNER (mode);
2373 if (imode == VOIDmode)
2374 imode = mode;
2376 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2378 /* Restrict the bits of "t" to the multiplication's mode. */
2379 t &= GET_MODE_MASK (imode);
2381 /* t == 1 can be done in zero cost. */
2382 if (t == 1)
2384 alg_out->ops = 1;
2385 alg_out->cost.cost = 0;
2386 alg_out->cost.latency = 0;
2387 alg_out->op[0] = alg_m;
2388 return;
2391 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2392 fail now. */
2393 if (t == 0)
2395 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2396 return;
2397 else
2399 alg_out->ops = 1;
2400 alg_out->cost.cost = zero_cost (speed);
2401 alg_out->cost.latency = zero_cost (speed);
2402 alg_out->op[0] = alg_zero;
2403 return;
2407 /* We'll be needing a couple extra algorithm structures now. */
2409 alg_in = XALLOCA (struct algorithm);
2410 best_alg = XALLOCA (struct algorithm);
2411 best_cost = *cost_limit;
2413 /* Compute the hash index. */
2414 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2416 /* See if we already know what to do for T. */
2417 entry_ptr = alg_hash_entry_ptr (hash_index);
2418 if (entry_ptr->t == t
2419 && entry_ptr->mode == mode
2420 && entry_ptr->mode == mode
2421 && entry_ptr->speed == speed
2422 && entry_ptr->alg != alg_unknown)
2424 cache_alg = entry_ptr->alg;
2426 if (cache_alg == alg_impossible)
2428 /* The cache tells us that it's impossible to synthesize
2429 multiplication by T within entry_ptr->cost. */
2430 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2431 /* COST_LIMIT is at least as restrictive as the one
2432 recorded in the hash table, in which case we have no
2433 hope of synthesizing a multiplication. Just
2434 return. */
2435 return;
2437 /* If we get here, COST_LIMIT is less restrictive than the
2438 one recorded in the hash table, so we may be able to
2439 synthesize a multiplication. Proceed as if we didn't
2440 have the cache entry. */
2442 else
2444 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2445 /* The cached algorithm shows that this multiplication
2446 requires more cost than COST_LIMIT. Just return. This
2447 way, we don't clobber this cache entry with
2448 alg_impossible but retain useful information. */
2449 return;
2451 cache_hit = true;
2453 switch (cache_alg)
2455 case alg_shift:
2456 goto do_alg_shift;
2458 case alg_add_t_m2:
2459 case alg_sub_t_m2:
2460 goto do_alg_addsub_t_m2;
2462 case alg_add_factor:
2463 case alg_sub_factor:
2464 goto do_alg_addsub_factor;
2466 case alg_add_t2_m:
2467 goto do_alg_add_t2_m;
2469 case alg_sub_t2_m:
2470 goto do_alg_sub_t2_m;
2472 default:
2473 gcc_unreachable ();
2478 /* If we have a group of zero bits at the low-order part of T, try
2479 multiplying by the remaining bits and then doing a shift. */
2481 if ((t & 1) == 0)
2483 do_alg_shift:
2484 m = floor_log2 (t & -t); /* m = number of low zero bits */
2485 if (m < maxm)
2487 q = t >> m;
2488 /* The function expand_shift will choose between a shift and
2489 a sequence of additions, so the observed cost is given as
2490 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2491 op_cost = m * add_cost (speed, mode);
2492 if (shift_cost (speed, mode, m) < op_cost)
2493 op_cost = shift_cost (speed, mode, m);
2494 new_limit.cost = best_cost.cost - op_cost;
2495 new_limit.latency = best_cost.latency - op_cost;
2496 synth_mult (alg_in, q, &new_limit, mode);
2498 alg_in->cost.cost += op_cost;
2499 alg_in->cost.latency += op_cost;
2500 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2502 struct algorithm *x;
2503 best_cost = alg_in->cost;
2504 x = alg_in, alg_in = best_alg, best_alg = x;
2505 best_alg->log[best_alg->ops] = m;
2506 best_alg->op[best_alg->ops] = alg_shift;
2509 /* See if treating ORIG_T as a signed number yields a better
2510 sequence. Try this sequence only for a negative ORIG_T
2511 as it would be useless for a non-negative ORIG_T. */
2512 if ((HOST_WIDE_INT) orig_t < 0)
2514 /* Shift ORIG_T as follows because a right shift of a
2515 negative-valued signed type is implementation
2516 defined. */
2517 q = ~(~orig_t >> m);
2518 /* The function expand_shift will choose between a shift
2519 and a sequence of additions, so the observed cost is
2520 given as MIN (m * add_cost(speed, mode),
2521 shift_cost(speed, mode, m)). */
2522 op_cost = m * add_cost (speed, mode);
2523 if (shift_cost (speed, mode, m) < op_cost)
2524 op_cost = shift_cost (speed, mode, m);
2525 new_limit.cost = best_cost.cost - op_cost;
2526 new_limit.latency = best_cost.latency - op_cost;
2527 synth_mult (alg_in, q, &new_limit, mode);
2529 alg_in->cost.cost += op_cost;
2530 alg_in->cost.latency += op_cost;
2531 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2533 struct algorithm *x;
2534 best_cost = alg_in->cost;
2535 x = alg_in, alg_in = best_alg, best_alg = x;
2536 best_alg->log[best_alg->ops] = m;
2537 best_alg->op[best_alg->ops] = alg_shift;
2541 if (cache_hit)
2542 goto done;
2545 /* If we have an odd number, add or subtract one. */
2546 if ((t & 1) != 0)
2548 unsigned HOST_WIDE_INT w;
2550 do_alg_addsub_t_m2:
2551 for (w = 1; (w & t) != 0; w <<= 1)
2553 /* If T was -1, then W will be zero after the loop. This is another
2554 case where T ends with ...111. Handling this with (T + 1) and
2555 subtract 1 produces slightly better code and results in algorithm
2556 selection much faster than treating it like the ...0111 case
2557 below. */
2558 if (w == 0
2559 || (w > 2
2560 /* Reject the case where t is 3.
2561 Thus we prefer addition in that case. */
2562 && t != 3))
2564 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2566 op_cost = add_cost (speed, mode);
2567 new_limit.cost = best_cost.cost - op_cost;
2568 new_limit.latency = best_cost.latency - op_cost;
2569 synth_mult (alg_in, t + 1, &new_limit, mode);
2571 alg_in->cost.cost += op_cost;
2572 alg_in->cost.latency += op_cost;
2573 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2575 struct algorithm *x;
2576 best_cost = alg_in->cost;
2577 x = alg_in, alg_in = best_alg, best_alg = x;
2578 best_alg->log[best_alg->ops] = 0;
2579 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2582 else
2584 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2586 op_cost = add_cost (speed, mode);
2587 new_limit.cost = best_cost.cost - op_cost;
2588 new_limit.latency = best_cost.latency - op_cost;
2589 synth_mult (alg_in, t - 1, &new_limit, mode);
2591 alg_in->cost.cost += op_cost;
2592 alg_in->cost.latency += op_cost;
2593 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2595 struct algorithm *x;
2596 best_cost = alg_in->cost;
2597 x = alg_in, alg_in = best_alg, best_alg = x;
2598 best_alg->log[best_alg->ops] = 0;
2599 best_alg->op[best_alg->ops] = alg_add_t_m2;
2603 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2604 quickly with a - a * n for some appropriate constant n. */
2605 m = exact_log2 (-orig_t + 1);
2606 if (m >= 0 && m < maxm)
2608 op_cost = shiftsub1_cost (speed, mode, m);
2609 new_limit.cost = best_cost.cost - op_cost;
2610 new_limit.latency = best_cost.latency - op_cost;
2611 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2612 &new_limit, mode);
2614 alg_in->cost.cost += op_cost;
2615 alg_in->cost.latency += op_cost;
2616 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2618 struct algorithm *x;
2619 best_cost = alg_in->cost;
2620 x = alg_in, alg_in = best_alg, best_alg = x;
2621 best_alg->log[best_alg->ops] = m;
2622 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2626 if (cache_hit)
2627 goto done;
2630 /* Look for factors of t of the form
2631 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2632 If we find such a factor, we can multiply by t using an algorithm that
2633 multiplies by q, shift the result by m and add/subtract it to itself.
2635 We search for large factors first and loop down, even if large factors
2636 are less probable than small; if we find a large factor we will find a
2637 good sequence quickly, and therefore be able to prune (by decreasing
2638 COST_LIMIT) the search. */
2640 do_alg_addsub_factor:
2641 for (m = floor_log2 (t - 1); m >= 2; m--)
2643 unsigned HOST_WIDE_INT d;
2645 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2646 if (t % d == 0 && t > d && m < maxm
2647 && (!cache_hit || cache_alg == alg_add_factor))
2649 /* If the target has a cheap shift-and-add instruction use
2650 that in preference to a shift insn followed by an add insn.
2651 Assume that the shift-and-add is "atomic" with a latency
2652 equal to its cost, otherwise assume that on superscalar
2653 hardware the shift may be executed concurrently with the
2654 earlier steps in the algorithm. */
2655 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2656 if (shiftadd_cost (speed, mode, m) < op_cost)
2658 op_cost = shiftadd_cost (speed, mode, m);
2659 op_latency = op_cost;
2661 else
2662 op_latency = add_cost (speed, mode);
2664 new_limit.cost = best_cost.cost - op_cost;
2665 new_limit.latency = best_cost.latency - op_latency;
2666 synth_mult (alg_in, t / d, &new_limit, mode);
2668 alg_in->cost.cost += op_cost;
2669 alg_in->cost.latency += op_latency;
2670 if (alg_in->cost.latency < op_cost)
2671 alg_in->cost.latency = op_cost;
2672 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2674 struct algorithm *x;
2675 best_cost = alg_in->cost;
2676 x = alg_in, alg_in = best_alg, best_alg = x;
2677 best_alg->log[best_alg->ops] = m;
2678 best_alg->op[best_alg->ops] = alg_add_factor;
2680 /* Other factors will have been taken care of in the recursion. */
2681 break;
2684 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2685 if (t % d == 0 && t > d && m < maxm
2686 && (!cache_hit || cache_alg == alg_sub_factor))
2688 /* If the target has a cheap shift-and-subtract insn use
2689 that in preference to a shift insn followed by a sub insn.
2690 Assume that the shift-and-sub is "atomic" with a latency
2691 equal to it's cost, otherwise assume that on superscalar
2692 hardware the shift may be executed concurrently with the
2693 earlier steps in the algorithm. */
2694 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2695 if (shiftsub0_cost (speed, mode, m) < op_cost)
2697 op_cost = shiftsub0_cost (speed, mode, m);
2698 op_latency = op_cost;
2700 else
2701 op_latency = add_cost (speed, mode);
2703 new_limit.cost = best_cost.cost - op_cost;
2704 new_limit.latency = best_cost.latency - op_latency;
2705 synth_mult (alg_in, t / d, &new_limit, mode);
2707 alg_in->cost.cost += op_cost;
2708 alg_in->cost.latency += op_latency;
2709 if (alg_in->cost.latency < op_cost)
2710 alg_in->cost.latency = op_cost;
2711 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2713 struct algorithm *x;
2714 best_cost = alg_in->cost;
2715 x = alg_in, alg_in = best_alg, best_alg = x;
2716 best_alg->log[best_alg->ops] = m;
2717 best_alg->op[best_alg->ops] = alg_sub_factor;
2719 break;
2722 if (cache_hit)
2723 goto done;
2725 /* Try shift-and-add (load effective address) instructions,
2726 i.e. do a*3, a*5, a*9. */
2727 if ((t & 1) != 0)
2729 do_alg_add_t2_m:
2730 q = t - 1;
2731 q = q & -q;
2732 m = exact_log2 (q);
2733 if (m >= 0 && m < maxm)
2735 op_cost = shiftadd_cost (speed, mode, m);
2736 new_limit.cost = best_cost.cost - op_cost;
2737 new_limit.latency = best_cost.latency - op_cost;
2738 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2740 alg_in->cost.cost += op_cost;
2741 alg_in->cost.latency += op_cost;
2742 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2744 struct algorithm *x;
2745 best_cost = alg_in->cost;
2746 x = alg_in, alg_in = best_alg, best_alg = x;
2747 best_alg->log[best_alg->ops] = m;
2748 best_alg->op[best_alg->ops] = alg_add_t2_m;
2751 if (cache_hit)
2752 goto done;
2754 do_alg_sub_t2_m:
2755 q = t + 1;
2756 q = q & -q;
2757 m = exact_log2 (q);
2758 if (m >= 0 && m < maxm)
2760 op_cost = shiftsub0_cost (speed, mode, m);
2761 new_limit.cost = best_cost.cost - op_cost;
2762 new_limit.latency = best_cost.latency - op_cost;
2763 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2765 alg_in->cost.cost += op_cost;
2766 alg_in->cost.latency += op_cost;
2767 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2769 struct algorithm *x;
2770 best_cost = alg_in->cost;
2771 x = alg_in, alg_in = best_alg, best_alg = x;
2772 best_alg->log[best_alg->ops] = m;
2773 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2776 if (cache_hit)
2777 goto done;
2780 done:
2781 /* If best_cost has not decreased, we have not found any algorithm. */
2782 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2784 /* We failed to find an algorithm. Record alg_impossible for
2785 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2786 we are asked to find an algorithm for T within the same or
2787 lower COST_LIMIT, we can immediately return to the
2788 caller. */
2789 entry_ptr->t = t;
2790 entry_ptr->mode = mode;
2791 entry_ptr->speed = speed;
2792 entry_ptr->alg = alg_impossible;
2793 entry_ptr->cost = *cost_limit;
2794 return;
2797 /* Cache the result. */
2798 if (!cache_hit)
2800 entry_ptr->t = t;
2801 entry_ptr->mode = mode;
2802 entry_ptr->speed = speed;
2803 entry_ptr->alg = best_alg->op[best_alg->ops];
2804 entry_ptr->cost.cost = best_cost.cost;
2805 entry_ptr->cost.latency = best_cost.latency;
2808 /* If we are getting a too long sequence for `struct algorithm'
2809 to record, make this search fail. */
2810 if (best_alg->ops == MAX_BITS_PER_WORD)
2811 return;
2813 /* Copy the algorithm from temporary space to the space at alg_out.
2814 We avoid using structure assignment because the majority of
2815 best_alg is normally undefined, and this is a critical function. */
2816 alg_out->ops = best_alg->ops + 1;
2817 alg_out->cost = best_cost;
2818 memcpy (alg_out->op, best_alg->op,
2819 alg_out->ops * sizeof *alg_out->op);
2820 memcpy (alg_out->log, best_alg->log,
2821 alg_out->ops * sizeof *alg_out->log);
2824 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2825 Try three variations:
2827 - a shift/add sequence based on VAL itself
2828 - a shift/add sequence based on -VAL, followed by a negation
2829 - a shift/add sequence based on VAL - 1, followed by an addition.
2831 Return true if the cheapest of these cost less than MULT_COST,
2832 describing the algorithm in *ALG and final fixup in *VARIANT. */
2834 static bool
2835 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2836 struct algorithm *alg, enum mult_variant *variant,
2837 int mult_cost)
2839 struct algorithm alg2;
2840 struct mult_cost limit;
2841 int op_cost;
2842 bool speed = optimize_insn_for_speed_p ();
2844 /* Fail quickly for impossible bounds. */
2845 if (mult_cost < 0)
2846 return false;
2848 /* Ensure that mult_cost provides a reasonable upper bound.
2849 Any constant multiplication can be performed with less
2850 than 2 * bits additions. */
2851 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2852 if (mult_cost > op_cost)
2853 mult_cost = op_cost;
2855 *variant = basic_variant;
2856 limit.cost = mult_cost;
2857 limit.latency = mult_cost;
2858 synth_mult (alg, val, &limit, mode);
2860 /* This works only if the inverted value actually fits in an
2861 `unsigned int' */
2862 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2864 op_cost = neg_cost(speed, mode);
2865 if (MULT_COST_LESS (&alg->cost, mult_cost))
2867 limit.cost = alg->cost.cost - op_cost;
2868 limit.latency = alg->cost.latency - op_cost;
2870 else
2872 limit.cost = mult_cost - op_cost;
2873 limit.latency = mult_cost - op_cost;
2876 synth_mult (&alg2, -val, &limit, mode);
2877 alg2.cost.cost += op_cost;
2878 alg2.cost.latency += op_cost;
2879 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2880 *alg = alg2, *variant = negate_variant;
2883 /* This proves very useful for division-by-constant. */
2884 op_cost = add_cost (speed, mode);
2885 if (MULT_COST_LESS (&alg->cost, mult_cost))
2887 limit.cost = alg->cost.cost - op_cost;
2888 limit.latency = alg->cost.latency - op_cost;
2890 else
2892 limit.cost = mult_cost - op_cost;
2893 limit.latency = mult_cost - op_cost;
2896 synth_mult (&alg2, val - 1, &limit, mode);
2897 alg2.cost.cost += op_cost;
2898 alg2.cost.latency += op_cost;
2899 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2900 *alg = alg2, *variant = add_variant;
2902 return MULT_COST_LESS (&alg->cost, mult_cost);
2905 /* A subroutine of expand_mult, used for constant multiplications.
2906 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2907 convenient. Use the shift/add sequence described by ALG and apply
2908 the final fixup specified by VARIANT. */
2910 static rtx
2911 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2912 rtx target, const struct algorithm *alg,
2913 enum mult_variant variant)
2915 HOST_WIDE_INT val_so_far;
2916 rtx insn, accum, tem;
2917 int opno;
2918 enum machine_mode nmode;
2920 /* Avoid referencing memory over and over and invalid sharing
2921 on SUBREGs. */
2922 op0 = force_reg (mode, op0);
2924 /* ACCUM starts out either as OP0 or as a zero, depending on
2925 the first operation. */
2927 if (alg->op[0] == alg_zero)
2929 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2930 val_so_far = 0;
2932 else if (alg->op[0] == alg_m)
2934 accum = copy_to_mode_reg (mode, op0);
2935 val_so_far = 1;
2937 else
2938 gcc_unreachable ();
2940 for (opno = 1; opno < alg->ops; opno++)
2942 int log = alg->log[opno];
2943 rtx shift_subtarget = optimize ? 0 : accum;
2944 rtx add_target
2945 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2946 && !optimize)
2947 ? target : 0;
2948 rtx accum_target = optimize ? 0 : accum;
2949 rtx accum_inner;
2951 switch (alg->op[opno])
2953 case alg_shift:
2954 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2955 /* REG_EQUAL note will be attached to the following insn. */
2956 emit_move_insn (accum, tem);
2957 val_so_far <<= log;
2958 break;
2960 case alg_add_t_m2:
2961 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2962 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2963 add_target ? add_target : accum_target);
2964 val_so_far += (HOST_WIDE_INT) 1 << log;
2965 break;
2967 case alg_sub_t_m2:
2968 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2969 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2970 add_target ? add_target : accum_target);
2971 val_so_far -= (HOST_WIDE_INT) 1 << log;
2972 break;
2974 case alg_add_t2_m:
2975 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2976 log, shift_subtarget, 0);
2977 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2978 add_target ? add_target : accum_target);
2979 val_so_far = (val_so_far << log) + 1;
2980 break;
2982 case alg_sub_t2_m:
2983 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2984 log, shift_subtarget, 0);
2985 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2986 add_target ? add_target : accum_target);
2987 val_so_far = (val_so_far << log) - 1;
2988 break;
2990 case alg_add_factor:
2991 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2992 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2993 add_target ? add_target : accum_target);
2994 val_so_far += val_so_far << log;
2995 break;
2997 case alg_sub_factor:
2998 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2999 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3000 (add_target
3001 ? add_target : (optimize ? 0 : tem)));
3002 val_so_far = (val_so_far << log) - val_so_far;
3003 break;
3005 default:
3006 gcc_unreachable ();
3009 if (SCALAR_INT_MODE_P (mode))
3011 /* Write a REG_EQUAL note on the last insn so that we can cse
3012 multiplication sequences. Note that if ACCUM is a SUBREG,
3013 we've set the inner register and must properly indicate that. */
3014 tem = op0, nmode = mode;
3015 accum_inner = accum;
3016 if (GET_CODE (accum) == SUBREG)
3018 accum_inner = SUBREG_REG (accum);
3019 nmode = GET_MODE (accum_inner);
3020 tem = gen_lowpart (nmode, op0);
3023 insn = get_last_insn ();
3024 set_dst_reg_note (insn, REG_EQUAL,
3025 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3026 accum_inner);
3030 if (variant == negate_variant)
3032 val_so_far = -val_so_far;
3033 accum = expand_unop (mode, neg_optab, accum, target, 0);
3035 else if (variant == add_variant)
3037 val_so_far = val_so_far + 1;
3038 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3041 /* Compare only the bits of val and val_so_far that are significant
3042 in the result mode, to avoid sign-/zero-extension confusion. */
3043 nmode = GET_MODE_INNER (mode);
3044 if (nmode == VOIDmode)
3045 nmode = mode;
3046 val &= GET_MODE_MASK (nmode);
3047 val_so_far &= GET_MODE_MASK (nmode);
3048 gcc_assert (val == val_so_far);
3050 return accum;
3053 /* Perform a multiplication and return an rtx for the result.
3054 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3055 TARGET is a suggestion for where to store the result (an rtx).
3057 We check specially for a constant integer as OP1.
3058 If you want this check for OP0 as well, then before calling
3059 you should swap the two operands if OP0 would be constant. */
3062 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3063 int unsignedp)
3065 enum mult_variant variant;
3066 struct algorithm algorithm;
3067 rtx scalar_op1;
3068 int max_cost;
3069 bool speed = optimize_insn_for_speed_p ();
3070 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3072 if (CONSTANT_P (op0))
3074 rtx temp = op0;
3075 op0 = op1;
3076 op1 = temp;
3079 /* For vectors, there are several simplifications that can be made if
3080 all elements of the vector constant are identical. */
3081 scalar_op1 = op1;
3082 if (GET_CODE (op1) == CONST_VECTOR)
3084 int i, n = CONST_VECTOR_NUNITS (op1);
3085 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3086 for (i = 1; i < n; ++i)
3087 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3088 goto skip_scalar;
3091 if (INTEGRAL_MODE_P (mode))
3093 rtx fake_reg;
3094 HOST_WIDE_INT coeff;
3095 bool is_neg;
3096 int mode_bitsize;
3098 if (op1 == CONST0_RTX (mode))
3099 return op1;
3100 if (op1 == CONST1_RTX (mode))
3101 return op0;
3102 if (op1 == CONSTM1_RTX (mode))
3103 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3104 op0, target, 0);
3106 if (do_trapv)
3107 goto skip_synth;
3109 /* These are the operations that are potentially turned into
3110 a sequence of shifts and additions. */
3111 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3113 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3114 less than or equal in size to `unsigned int' this doesn't matter.
3115 If the mode is larger than `unsigned int', then synth_mult works
3116 only if the constant value exactly fits in an `unsigned int' without
3117 any truncation. This means that multiplying by negative values does
3118 not work; results are off by 2^32 on a 32 bit machine. */
3120 if (CONST_INT_P (scalar_op1))
3122 coeff = INTVAL (scalar_op1);
3123 is_neg = coeff < 0;
3125 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3127 /* If we are multiplying in DImode, it may still be a win
3128 to try to work with shifts and adds. */
3129 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3130 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3132 coeff = CONST_DOUBLE_LOW (scalar_op1);
3133 is_neg = false;
3135 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3137 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3138 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3140 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3141 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3142 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3143 return expand_shift (LSHIFT_EXPR, mode, op0,
3144 shift, target, unsignedp);
3146 goto skip_synth;
3148 else
3149 goto skip_synth;
3151 else
3152 goto skip_synth;
3154 /* We used to test optimize here, on the grounds that it's better to
3155 produce a smaller program when -O is not used. But this causes
3156 such a terrible slowdown sometimes that it seems better to always
3157 use synth_mult. */
3159 /* Special case powers of two. */
3160 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3161 return expand_shift (LSHIFT_EXPR, mode, op0,
3162 floor_log2 (coeff), target, unsignedp);
3164 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3166 /* Attempt to handle multiplication of DImode values by negative
3167 coefficients, by performing the multiplication by a positive
3168 multiplier and then inverting the result. */
3169 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3171 /* Its safe to use -coeff even for INT_MIN, as the
3172 result is interpreted as an unsigned coefficient.
3173 Exclude cost of op0 from max_cost to match the cost
3174 calculation of the synth_mult. */
3175 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3176 - neg_cost(speed, mode));
3177 if (max_cost > 0
3178 && choose_mult_variant (mode, -coeff, &algorithm,
3179 &variant, max_cost))
3181 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3182 &algorithm, variant);
3183 return expand_unop (mode, neg_optab, temp, target, 0);
3185 goto skip_synth;
3188 /* Exclude cost of op0 from max_cost to match the cost
3189 calculation of the synth_mult. */
3190 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3191 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3192 return expand_mult_const (mode, op0, coeff, target,
3193 &algorithm, variant);
3195 skip_synth:
3197 /* Expand x*2.0 as x+x. */
3198 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3200 REAL_VALUE_TYPE d;
3201 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3203 if (REAL_VALUES_EQUAL (d, dconst2))
3205 op0 = force_reg (GET_MODE (op0), op0);
3206 return expand_binop (mode, add_optab, op0, op0,
3207 target, unsignedp, OPTAB_LIB_WIDEN);
3210 skip_scalar:
3212 /* This used to use umul_optab if unsigned, but for non-widening multiply
3213 there is no difference between signed and unsigned. */
3214 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3215 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3216 gcc_assert (op0);
3217 return op0;
3220 /* Return a cost estimate for multiplying a register by the given
3221 COEFFicient in the given MODE and SPEED. */
3224 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3226 int max_cost;
3227 struct algorithm algorithm;
3228 enum mult_variant variant;
3230 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3231 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3232 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3233 return algorithm.cost.cost;
3234 else
3235 return max_cost;
3238 /* Perform a widening multiplication and return an rtx for the result.
3239 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3240 TARGET is a suggestion for where to store the result (an rtx).
3241 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3242 or smul_widen_optab.
3244 We check specially for a constant integer as OP1, comparing the
3245 cost of a widening multiply against the cost of a sequence of shifts
3246 and adds. */
3249 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3250 int unsignedp, optab this_optab)
3252 bool speed = optimize_insn_for_speed_p ();
3253 rtx cop1;
3255 if (CONST_INT_P (op1)
3256 && GET_MODE (op0) != VOIDmode
3257 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3258 this_optab == umul_widen_optab))
3259 && CONST_INT_P (cop1)
3260 && (INTVAL (cop1) >= 0
3261 || HWI_COMPUTABLE_MODE_P (mode)))
3263 HOST_WIDE_INT coeff = INTVAL (cop1);
3264 int max_cost;
3265 enum mult_variant variant;
3266 struct algorithm algorithm;
3268 /* Special case powers of two. */
3269 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3271 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3272 return expand_shift (LSHIFT_EXPR, mode, op0,
3273 floor_log2 (coeff), target, unsignedp);
3276 /* Exclude cost of op0 from max_cost to match the cost
3277 calculation of the synth_mult. */
3278 max_cost = mul_widen_cost (speed, mode);
3279 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3280 max_cost))
3282 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3283 return expand_mult_const (mode, op0, coeff, target,
3284 &algorithm, variant);
3287 return expand_binop (mode, this_optab, op0, op1, target,
3288 unsignedp, OPTAB_LIB_WIDEN);
3291 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3292 replace division by D, and put the least significant N bits of the result
3293 in *MULTIPLIER_PTR and return the most significant bit.
3295 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3296 needed precision is in PRECISION (should be <= N).
3298 PRECISION should be as small as possible so this function can choose
3299 multiplier more freely.
3301 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3302 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3304 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3305 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3307 unsigned HOST_WIDE_INT
3308 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3309 unsigned HOST_WIDE_INT *multiplier_ptr,
3310 int *post_shift_ptr, int *lgup_ptr)
3312 double_int mhigh, mlow;
3313 int lgup, post_shift;
3314 int pow, pow2;
3316 /* lgup = ceil(log2(divisor)); */
3317 lgup = ceil_log2 (d);
3319 gcc_assert (lgup <= n);
3321 pow = n + lgup;
3322 pow2 = n + lgup - precision;
3324 /* We could handle this with some effort, but this case is much
3325 better handled directly with a scc insn, so rely on caller using
3326 that. */
3327 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3329 /* mlow = 2^(N + lgup)/d */
3330 double_int val = double_int_zero.set_bit (pow);
3331 mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3333 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3334 val |= double_int_zero.set_bit (pow2);
3335 mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3337 gcc_assert (!mhigh.high || val.high - d < d);
3338 gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3339 /* Assert that mlow < mhigh. */
3340 gcc_assert (mlow.ult (mhigh));
3342 /* If precision == N, then mlow, mhigh exceed 2^N
3343 (but they do not exceed 2^(N+1)). */
3345 /* Reduce to lowest terms. */
3346 for (post_shift = lgup; post_shift > 0; post_shift--)
3348 int shft = HOST_BITS_PER_WIDE_INT - 1;
3349 unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3350 unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3351 if (ml_lo >= mh_lo)
3352 break;
3354 mlow = double_int::from_uhwi (ml_lo);
3355 mhigh = double_int::from_uhwi (mh_lo);
3358 *post_shift_ptr = post_shift;
3359 *lgup_ptr = lgup;
3360 if (n < HOST_BITS_PER_WIDE_INT)
3362 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3363 *multiplier_ptr = mhigh.low & mask;
3364 return mhigh.low >= mask;
3366 else
3368 *multiplier_ptr = mhigh.low;
3369 return mhigh.high;
3373 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3374 congruent to 1 (mod 2**N). */
3376 static unsigned HOST_WIDE_INT
3377 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3379 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3381 /* The algorithm notes that the choice y = x satisfies
3382 x*y == 1 mod 2^3, since x is assumed odd.
3383 Each iteration doubles the number of bits of significance in y. */
3385 unsigned HOST_WIDE_INT mask;
3386 unsigned HOST_WIDE_INT y = x;
3387 int nbit = 3;
3389 mask = (n == HOST_BITS_PER_WIDE_INT
3390 ? ~(unsigned HOST_WIDE_INT) 0
3391 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3393 while (nbit < n)
3395 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3396 nbit *= 2;
3398 return y;
3401 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3402 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3403 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3404 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3405 become signed.
3407 The result is put in TARGET if that is convenient.
3409 MODE is the mode of operation. */
3412 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3413 rtx op1, rtx target, int unsignedp)
3415 rtx tem;
3416 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3418 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3419 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3420 tem = expand_and (mode, tem, op1, NULL_RTX);
3421 adj_operand
3422 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3423 adj_operand);
3425 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3426 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3427 tem = expand_and (mode, tem, op0, NULL_RTX);
3428 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3429 target);
3431 return target;
3434 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3436 static rtx
3437 extract_high_half (enum machine_mode mode, rtx op)
3439 enum machine_mode wider_mode;
3441 if (mode == word_mode)
3442 return gen_highpart (mode, op);
3444 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3446 wider_mode = GET_MODE_WIDER_MODE (mode);
3447 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3448 GET_MODE_BITSIZE (mode), 0, 1);
3449 return convert_modes (mode, wider_mode, op, 0);
3452 /* Like expmed_mult_highpart, but only consider using a multiplication
3453 optab. OP1 is an rtx for the constant operand. */
3455 static rtx
3456 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3457 rtx target, int unsignedp, int max_cost)
3459 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3460 enum machine_mode wider_mode;
3461 optab moptab;
3462 rtx tem;
3463 int size;
3464 bool speed = optimize_insn_for_speed_p ();
3466 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3468 wider_mode = GET_MODE_WIDER_MODE (mode);
3469 size = GET_MODE_BITSIZE (mode);
3471 /* Firstly, try using a multiplication insn that only generates the needed
3472 high part of the product, and in the sign flavor of unsignedp. */
3473 if (mul_highpart_cost (speed, mode) < max_cost)
3475 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3476 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3477 unsignedp, OPTAB_DIRECT);
3478 if (tem)
3479 return tem;
3482 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3483 Need to adjust the result after the multiplication. */
3484 if (size - 1 < BITS_PER_WORD
3485 && (mul_highpart_cost (speed, mode)
3486 + 2 * shift_cost (speed, mode, size-1)
3487 + 4 * add_cost (speed, mode) < max_cost))
3489 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3490 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3491 unsignedp, OPTAB_DIRECT);
3492 if (tem)
3493 /* We used the wrong signedness. Adjust the result. */
3494 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3495 tem, unsignedp);
3498 /* Try widening multiplication. */
3499 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3500 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3501 && mul_widen_cost (speed, wider_mode) < max_cost)
3503 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3504 unsignedp, OPTAB_WIDEN);
3505 if (tem)
3506 return extract_high_half (mode, tem);
3509 /* Try widening the mode and perform a non-widening multiplication. */
3510 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3511 && size - 1 < BITS_PER_WORD
3512 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3513 < max_cost))
3515 rtx insns, wop0, wop1;
3517 /* We need to widen the operands, for example to ensure the
3518 constant multiplier is correctly sign or zero extended.
3519 Use a sequence to clean-up any instructions emitted by
3520 the conversions if things don't work out. */
3521 start_sequence ();
3522 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3523 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3524 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3525 unsignedp, OPTAB_WIDEN);
3526 insns = get_insns ();
3527 end_sequence ();
3529 if (tem)
3531 emit_insn (insns);
3532 return extract_high_half (mode, tem);
3536 /* Try widening multiplication of opposite signedness, and adjust. */
3537 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3538 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3539 && size - 1 < BITS_PER_WORD
3540 && (mul_widen_cost (speed, wider_mode)
3541 + 2 * shift_cost (speed, mode, size-1)
3542 + 4 * add_cost (speed, mode) < max_cost))
3544 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3545 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3546 if (tem != 0)
3548 tem = extract_high_half (mode, tem);
3549 /* We used the wrong signedness. Adjust the result. */
3550 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3551 target, unsignedp);
3555 return 0;
3558 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3559 putting the high half of the result in TARGET if that is convenient,
3560 and return where the result is. If the operation can not be performed,
3561 0 is returned.
3563 MODE is the mode of operation and result.
3565 UNSIGNEDP nonzero means unsigned multiply.
3567 MAX_COST is the total allowed cost for the expanded RTL. */
3569 static rtx
3570 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3571 rtx target, int unsignedp, int max_cost)
3573 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3574 unsigned HOST_WIDE_INT cnst1;
3575 int extra_cost;
3576 bool sign_adjust = false;
3577 enum mult_variant variant;
3578 struct algorithm alg;
3579 rtx tem;
3580 bool speed = optimize_insn_for_speed_p ();
3582 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3583 /* We can't support modes wider than HOST_BITS_PER_INT. */
3584 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3586 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3588 /* We can't optimize modes wider than BITS_PER_WORD.
3589 ??? We might be able to perform double-word arithmetic if
3590 mode == word_mode, however all the cost calculations in
3591 synth_mult etc. assume single-word operations. */
3592 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3593 return expmed_mult_highpart_optab (mode, op0, op1, target,
3594 unsignedp, max_cost);
3596 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3598 /* Check whether we try to multiply by a negative constant. */
3599 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3601 sign_adjust = true;
3602 extra_cost += add_cost (speed, mode);
3605 /* See whether shift/add multiplication is cheap enough. */
3606 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3607 max_cost - extra_cost))
3609 /* See whether the specialized multiplication optabs are
3610 cheaper than the shift/add version. */
3611 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3612 alg.cost.cost + extra_cost);
3613 if (tem)
3614 return tem;
3616 tem = convert_to_mode (wider_mode, op0, unsignedp);
3617 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3618 tem = extract_high_half (mode, tem);
3620 /* Adjust result for signedness. */
3621 if (sign_adjust)
3622 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3624 return tem;
3626 return expmed_mult_highpart_optab (mode, op0, op1, target,
3627 unsignedp, max_cost);
3631 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3633 static rtx
3634 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3636 unsigned HOST_WIDE_INT masklow, maskhigh;
3637 rtx result, temp, shift, label;
3638 int logd;
3640 logd = floor_log2 (d);
3641 result = gen_reg_rtx (mode);
3643 /* Avoid conditional branches when they're expensive. */
3644 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3645 && optimize_insn_for_speed_p ())
3647 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3648 mode, 0, -1);
3649 if (signmask)
3651 signmask = force_reg (mode, signmask);
3652 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3653 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3655 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3656 which instruction sequence to use. If logical right shifts
3657 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3658 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3660 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3661 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3662 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3663 > COSTS_N_INSNS (2)))
3665 temp = expand_binop (mode, xor_optab, op0, signmask,
3666 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3667 temp = expand_binop (mode, sub_optab, temp, signmask,
3668 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3669 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3670 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3671 temp = expand_binop (mode, xor_optab, temp, signmask,
3672 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3673 temp = expand_binop (mode, sub_optab, temp, signmask,
3674 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3676 else
3678 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3679 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3680 signmask = force_reg (mode, signmask);
3682 temp = expand_binop (mode, add_optab, op0, signmask,
3683 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3685 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3686 temp = expand_binop (mode, sub_optab, temp, signmask,
3687 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3689 return temp;
3693 /* Mask contains the mode's signbit and the significant bits of the
3694 modulus. By including the signbit in the operation, many targets
3695 can avoid an explicit compare operation in the following comparison
3696 against zero. */
3698 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3699 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3701 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3702 maskhigh = -1;
3704 else
3705 maskhigh = (HOST_WIDE_INT) -1
3706 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3708 temp = expand_binop (mode, and_optab, op0,
3709 immed_double_const (masklow, maskhigh, mode),
3710 result, 1, OPTAB_LIB_WIDEN);
3711 if (temp != result)
3712 emit_move_insn (result, temp);
3714 label = gen_label_rtx ();
3715 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3717 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3718 0, OPTAB_LIB_WIDEN);
3719 masklow = (HOST_WIDE_INT) -1 << logd;
3720 maskhigh = -1;
3721 temp = expand_binop (mode, ior_optab, temp,
3722 immed_double_const (masklow, maskhigh, mode),
3723 result, 1, OPTAB_LIB_WIDEN);
3724 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3725 0, OPTAB_LIB_WIDEN);
3726 if (temp != result)
3727 emit_move_insn (result, temp);
3728 emit_label (label);
3729 return result;
3732 /* Expand signed division of OP0 by a power of two D in mode MODE.
3733 This routine is only called for positive values of D. */
3735 static rtx
3736 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3738 rtx temp, label;
3739 int logd;
3741 logd = floor_log2 (d);
3743 if (d == 2
3744 && BRANCH_COST (optimize_insn_for_speed_p (),
3745 false) >= 1)
3747 temp = gen_reg_rtx (mode);
3748 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3749 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3750 0, OPTAB_LIB_WIDEN);
3751 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3754 #ifdef HAVE_conditional_move
3755 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3756 >= 2)
3758 rtx temp2;
3760 /* ??? emit_conditional_move forces a stack adjustment via
3761 compare_from_rtx so, if the sequence is discarded, it will
3762 be lost. Do it now instead. */
3763 do_pending_stack_adjust ();
3765 start_sequence ();
3766 temp2 = copy_to_mode_reg (mode, op0);
3767 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3768 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3769 temp = force_reg (mode, temp);
3771 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3772 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3773 mode, temp, temp2, mode, 0);
3774 if (temp2)
3776 rtx seq = get_insns ();
3777 end_sequence ();
3778 emit_insn (seq);
3779 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3781 end_sequence ();
3783 #endif
3785 if (BRANCH_COST (optimize_insn_for_speed_p (),
3786 false) >= 2)
3788 int ushift = GET_MODE_BITSIZE (mode) - logd;
3790 temp = gen_reg_rtx (mode);
3791 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3792 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3793 > COSTS_N_INSNS (1))
3794 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3795 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3796 else
3797 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3798 ushift, NULL_RTX, 1);
3799 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3800 0, OPTAB_LIB_WIDEN);
3801 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3804 label = gen_label_rtx ();
3805 temp = copy_to_mode_reg (mode, op0);
3806 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3807 expand_inc (temp, GEN_INT (d - 1));
3808 emit_label (label);
3809 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3812 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3813 if that is convenient, and returning where the result is.
3814 You may request either the quotient or the remainder as the result;
3815 specify REM_FLAG nonzero to get the remainder.
3817 CODE is the expression code for which kind of division this is;
3818 it controls how rounding is done. MODE is the machine mode to use.
3819 UNSIGNEDP nonzero means do unsigned division. */
3821 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3822 and then correct it by or'ing in missing high bits
3823 if result of ANDI is nonzero.
3824 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3825 This could optimize to a bfexts instruction.
3826 But C doesn't use these operations, so their optimizations are
3827 left for later. */
3828 /* ??? For modulo, we don't actually need the highpart of the first product,
3829 the low part will do nicely. And for small divisors, the second multiply
3830 can also be a low-part only multiply or even be completely left out.
3831 E.g. to calculate the remainder of a division by 3 with a 32 bit
3832 multiply, multiply with 0x55555556 and extract the upper two bits;
3833 the result is exact for inputs up to 0x1fffffff.
3834 The input range can be reduced by using cross-sum rules.
3835 For odd divisors >= 3, the following table gives right shift counts
3836 so that if a number is shifted by an integer multiple of the given
3837 amount, the remainder stays the same:
3838 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3839 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3840 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3841 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3842 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3844 Cross-sum rules for even numbers can be derived by leaving as many bits
3845 to the right alone as the divisor has zeros to the right.
3846 E.g. if x is an unsigned 32 bit number:
3847 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3851 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3852 rtx op0, rtx op1, rtx target, int unsignedp)
3854 enum machine_mode compute_mode;
3855 rtx tquotient;
3856 rtx quotient = 0, remainder = 0;
3857 rtx last;
3858 int size;
3859 rtx insn;
3860 optab optab1, optab2;
3861 int op1_is_constant, op1_is_pow2 = 0;
3862 int max_cost, extra_cost;
3863 static HOST_WIDE_INT last_div_const = 0;
3864 static HOST_WIDE_INT ext_op1;
3865 bool speed = optimize_insn_for_speed_p ();
3867 op1_is_constant = CONST_INT_P (op1);
3868 if (op1_is_constant)
3870 ext_op1 = INTVAL (op1);
3871 if (unsignedp)
3872 ext_op1 &= GET_MODE_MASK (mode);
3873 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3874 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3878 This is the structure of expand_divmod:
3880 First comes code to fix up the operands so we can perform the operations
3881 correctly and efficiently.
3883 Second comes a switch statement with code specific for each rounding mode.
3884 For some special operands this code emits all RTL for the desired
3885 operation, for other cases, it generates only a quotient and stores it in
3886 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3887 to indicate that it has not done anything.
3889 Last comes code that finishes the operation. If QUOTIENT is set and
3890 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3891 QUOTIENT is not set, it is computed using trunc rounding.
3893 We try to generate special code for division and remainder when OP1 is a
3894 constant. If |OP1| = 2**n we can use shifts and some other fast
3895 operations. For other values of OP1, we compute a carefully selected
3896 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3897 by m.
3899 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3900 half of the product. Different strategies for generating the product are
3901 implemented in expmed_mult_highpart.
3903 If what we actually want is the remainder, we generate that by another
3904 by-constant multiplication and a subtraction. */
3906 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3907 code below will malfunction if we are, so check here and handle
3908 the special case if so. */
3909 if (op1 == const1_rtx)
3910 return rem_flag ? const0_rtx : op0;
3912 /* When dividing by -1, we could get an overflow.
3913 negv_optab can handle overflows. */
3914 if (! unsignedp && op1 == constm1_rtx)
3916 if (rem_flag)
3917 return const0_rtx;
3918 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3919 ? negv_optab : neg_optab, op0, target, 0);
3922 if (target
3923 /* Don't use the function value register as a target
3924 since we have to read it as well as write it,
3925 and function-inlining gets confused by this. */
3926 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3927 /* Don't clobber an operand while doing a multi-step calculation. */
3928 || ((rem_flag || op1_is_constant)
3929 && (reg_mentioned_p (target, op0)
3930 || (MEM_P (op0) && MEM_P (target))))
3931 || reg_mentioned_p (target, op1)
3932 || (MEM_P (op1) && MEM_P (target))))
3933 target = 0;
3935 /* Get the mode in which to perform this computation. Normally it will
3936 be MODE, but sometimes we can't do the desired operation in MODE.
3937 If so, pick a wider mode in which we can do the operation. Convert
3938 to that mode at the start to avoid repeated conversions.
3940 First see what operations we need. These depend on the expression
3941 we are evaluating. (We assume that divxx3 insns exist under the
3942 same conditions that modxx3 insns and that these insns don't normally
3943 fail. If these assumptions are not correct, we may generate less
3944 efficient code in some cases.)
3946 Then see if we find a mode in which we can open-code that operation
3947 (either a division, modulus, or shift). Finally, check for the smallest
3948 mode for which we can do the operation with a library call. */
3950 /* We might want to refine this now that we have division-by-constant
3951 optimization. Since expmed_mult_highpart tries so many variants, it is
3952 not straightforward to generalize this. Maybe we should make an array
3953 of possible modes in init_expmed? Save this for GCC 2.7. */
3955 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3956 ? (unsignedp ? lshr_optab : ashr_optab)
3957 : (unsignedp ? udiv_optab : sdiv_optab));
3958 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3959 ? optab1
3960 : (unsignedp ? udivmod_optab : sdivmod_optab));
3962 for (compute_mode = mode; compute_mode != VOIDmode;
3963 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3964 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3965 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3966 break;
3968 if (compute_mode == VOIDmode)
3969 for (compute_mode = mode; compute_mode != VOIDmode;
3970 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3971 if (optab_libfunc (optab1, compute_mode)
3972 || optab_libfunc (optab2, compute_mode))
3973 break;
3975 /* If we still couldn't find a mode, use MODE, but expand_binop will
3976 probably die. */
3977 if (compute_mode == VOIDmode)
3978 compute_mode = mode;
3980 if (target && GET_MODE (target) == compute_mode)
3981 tquotient = target;
3982 else
3983 tquotient = gen_reg_rtx (compute_mode);
3985 size = GET_MODE_BITSIZE (compute_mode);
3986 #if 0
3987 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3988 (mode), and thereby get better code when OP1 is a constant. Do that
3989 later. It will require going over all usages of SIZE below. */
3990 size = GET_MODE_BITSIZE (mode);
3991 #endif
3993 /* Only deduct something for a REM if the last divide done was
3994 for a different constant. Then set the constant of the last
3995 divide. */
3996 max_cost = (unsignedp
3997 ? udiv_cost (speed, compute_mode)
3998 : sdiv_cost (speed, compute_mode));
3999 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4000 && INTVAL (op1) == last_div_const))
4001 max_cost -= (mul_cost (speed, compute_mode)
4002 + add_cost (speed, compute_mode));
4004 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4006 /* Now convert to the best mode to use. */
4007 if (compute_mode != mode)
4009 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4010 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4012 /* convert_modes may have placed op1 into a register, so we
4013 must recompute the following. */
4014 op1_is_constant = CONST_INT_P (op1);
4015 op1_is_pow2 = (op1_is_constant
4016 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4017 || (! unsignedp
4018 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4021 /* If one of the operands is a volatile MEM, copy it into a register. */
4023 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4024 op0 = force_reg (compute_mode, op0);
4025 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4026 op1 = force_reg (compute_mode, op1);
4028 /* If we need the remainder or if OP1 is constant, we need to
4029 put OP0 in a register in case it has any queued subexpressions. */
4030 if (rem_flag || op1_is_constant)
4031 op0 = force_reg (compute_mode, op0);
4033 last = get_last_insn ();
4035 /* Promote floor rounding to trunc rounding for unsigned operations. */
4036 if (unsignedp)
4038 if (code == FLOOR_DIV_EXPR)
4039 code = TRUNC_DIV_EXPR;
4040 if (code == FLOOR_MOD_EXPR)
4041 code = TRUNC_MOD_EXPR;
4042 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4043 code = TRUNC_DIV_EXPR;
4046 if (op1 != const0_rtx)
4047 switch (code)
4049 case TRUNC_MOD_EXPR:
4050 case TRUNC_DIV_EXPR:
4051 if (op1_is_constant)
4053 if (unsignedp)
4055 unsigned HOST_WIDE_INT mh, ml;
4056 int pre_shift, post_shift;
4057 int dummy;
4058 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4059 & GET_MODE_MASK (compute_mode));
4061 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4063 pre_shift = floor_log2 (d);
4064 if (rem_flag)
4066 remainder
4067 = expand_binop (compute_mode, and_optab, op0,
4068 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4069 remainder, 1,
4070 OPTAB_LIB_WIDEN);
4071 if (remainder)
4072 return gen_lowpart (mode, remainder);
4074 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4075 pre_shift, tquotient, 1);
4077 else if (size <= HOST_BITS_PER_WIDE_INT)
4079 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4081 /* Most significant bit of divisor is set; emit an scc
4082 insn. */
4083 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4084 compute_mode, 1, 1);
4086 else
4088 /* Find a suitable multiplier and right shift count
4089 instead of multiplying with D. */
4091 mh = choose_multiplier (d, size, size,
4092 &ml, &post_shift, &dummy);
4094 /* If the suggested multiplier is more than SIZE bits,
4095 we can do better for even divisors, using an
4096 initial right shift. */
4097 if (mh != 0 && (d & 1) == 0)
4099 pre_shift = floor_log2 (d & -d);
4100 mh = choose_multiplier (d >> pre_shift, size,
4101 size - pre_shift,
4102 &ml, &post_shift, &dummy);
4103 gcc_assert (!mh);
4105 else
4106 pre_shift = 0;
4108 if (mh != 0)
4110 rtx t1, t2, t3, t4;
4112 if (post_shift - 1 >= BITS_PER_WORD)
4113 goto fail1;
4115 extra_cost
4116 = (shift_cost (speed, compute_mode, post_shift - 1)
4117 + shift_cost (speed, compute_mode, 1)
4118 + 2 * add_cost (speed, compute_mode));
4119 t1 = expmed_mult_highpart (compute_mode, op0,
4120 GEN_INT (ml),
4121 NULL_RTX, 1,
4122 max_cost - extra_cost);
4123 if (t1 == 0)
4124 goto fail1;
4125 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4126 op0, t1),
4127 NULL_RTX);
4128 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4129 t2, 1, NULL_RTX, 1);
4130 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4131 t1, t3),
4132 NULL_RTX);
4133 quotient = expand_shift
4134 (RSHIFT_EXPR, compute_mode, t4,
4135 post_shift - 1, tquotient, 1);
4137 else
4139 rtx t1, t2;
4141 if (pre_shift >= BITS_PER_WORD
4142 || post_shift >= BITS_PER_WORD)
4143 goto fail1;
4145 t1 = expand_shift
4146 (RSHIFT_EXPR, compute_mode, op0,
4147 pre_shift, NULL_RTX, 1);
4148 extra_cost
4149 = (shift_cost (speed, compute_mode, pre_shift)
4150 + shift_cost (speed, compute_mode, post_shift));
4151 t2 = expmed_mult_highpart (compute_mode, t1,
4152 GEN_INT (ml),
4153 NULL_RTX, 1,
4154 max_cost - extra_cost);
4155 if (t2 == 0)
4156 goto fail1;
4157 quotient = expand_shift
4158 (RSHIFT_EXPR, compute_mode, t2,
4159 post_shift, tquotient, 1);
4163 else /* Too wide mode to use tricky code */
4164 break;
4166 insn = get_last_insn ();
4167 if (insn != last)
4168 set_dst_reg_note (insn, REG_EQUAL,
4169 gen_rtx_UDIV (compute_mode, op0, op1),
4170 quotient);
4172 else /* TRUNC_DIV, signed */
4174 unsigned HOST_WIDE_INT ml;
4175 int lgup, post_shift;
4176 rtx mlr;
4177 HOST_WIDE_INT d = INTVAL (op1);
4178 unsigned HOST_WIDE_INT abs_d;
4180 /* Since d might be INT_MIN, we have to cast to
4181 unsigned HOST_WIDE_INT before negating to avoid
4182 undefined signed overflow. */
4183 abs_d = (d >= 0
4184 ? (unsigned HOST_WIDE_INT) d
4185 : - (unsigned HOST_WIDE_INT) d);
4187 /* n rem d = n rem -d */
4188 if (rem_flag && d < 0)
4190 d = abs_d;
4191 op1 = gen_int_mode (abs_d, compute_mode);
4194 if (d == 1)
4195 quotient = op0;
4196 else if (d == -1)
4197 quotient = expand_unop (compute_mode, neg_optab, op0,
4198 tquotient, 0);
4199 else if (HOST_BITS_PER_WIDE_INT >= size
4200 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4202 /* This case is not handled correctly below. */
4203 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4204 compute_mode, 1, 1);
4205 if (quotient == 0)
4206 goto fail1;
4208 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4209 && (rem_flag
4210 ? smod_pow2_cheap (speed, compute_mode)
4211 : sdiv_pow2_cheap (speed, compute_mode))
4212 /* We assume that cheap metric is true if the
4213 optab has an expander for this mode. */
4214 && ((optab_handler ((rem_flag ? smod_optab
4215 : sdiv_optab),
4216 compute_mode)
4217 != CODE_FOR_nothing)
4218 || (optab_handler (sdivmod_optab,
4219 compute_mode)
4220 != CODE_FOR_nothing)))
4222 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4224 if (rem_flag)
4226 remainder = expand_smod_pow2 (compute_mode, op0, d);
4227 if (remainder)
4228 return gen_lowpart (mode, remainder);
4231 if (sdiv_pow2_cheap (speed, compute_mode)
4232 && ((optab_handler (sdiv_optab, compute_mode)
4233 != CODE_FOR_nothing)
4234 || (optab_handler (sdivmod_optab, compute_mode)
4235 != CODE_FOR_nothing)))
4236 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4237 compute_mode, op0,
4238 gen_int_mode (abs_d,
4239 compute_mode),
4240 NULL_RTX, 0);
4241 else
4242 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4244 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4245 negate the quotient. */
4246 if (d < 0)
4248 insn = get_last_insn ();
4249 if (insn != last
4250 && abs_d < ((unsigned HOST_WIDE_INT) 1
4251 << (HOST_BITS_PER_WIDE_INT - 1)))
4252 set_dst_reg_note (insn, REG_EQUAL,
4253 gen_rtx_DIV (compute_mode, op0,
4254 gen_int_mode
4255 (abs_d,
4256 compute_mode)),
4257 quotient);
4259 quotient = expand_unop (compute_mode, neg_optab,
4260 quotient, quotient, 0);
4263 else if (size <= HOST_BITS_PER_WIDE_INT)
4265 choose_multiplier (abs_d, size, size - 1,
4266 &ml, &post_shift, &lgup);
4267 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4269 rtx t1, t2, t3;
4271 if (post_shift >= BITS_PER_WORD
4272 || size - 1 >= BITS_PER_WORD)
4273 goto fail1;
4275 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4276 + shift_cost (speed, compute_mode, size - 1)
4277 + add_cost (speed, compute_mode));
4278 t1 = expmed_mult_highpart (compute_mode, op0,
4279 GEN_INT (ml), NULL_RTX, 0,
4280 max_cost - extra_cost);
4281 if (t1 == 0)
4282 goto fail1;
4283 t2 = expand_shift
4284 (RSHIFT_EXPR, compute_mode, t1,
4285 post_shift, NULL_RTX, 0);
4286 t3 = expand_shift
4287 (RSHIFT_EXPR, compute_mode, op0,
4288 size - 1, NULL_RTX, 0);
4289 if (d < 0)
4290 quotient
4291 = force_operand (gen_rtx_MINUS (compute_mode,
4292 t3, t2),
4293 tquotient);
4294 else
4295 quotient
4296 = force_operand (gen_rtx_MINUS (compute_mode,
4297 t2, t3),
4298 tquotient);
4300 else
4302 rtx t1, t2, t3, t4;
4304 if (post_shift >= BITS_PER_WORD
4305 || size - 1 >= BITS_PER_WORD)
4306 goto fail1;
4308 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4309 mlr = gen_int_mode (ml, compute_mode);
4310 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4311 + shift_cost (speed, compute_mode, size - 1)
4312 + 2 * add_cost (speed, compute_mode));
4313 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4314 NULL_RTX, 0,
4315 max_cost - extra_cost);
4316 if (t1 == 0)
4317 goto fail1;
4318 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4319 t1, op0),
4320 NULL_RTX);
4321 t3 = expand_shift
4322 (RSHIFT_EXPR, compute_mode, t2,
4323 post_shift, NULL_RTX, 0);
4324 t4 = expand_shift
4325 (RSHIFT_EXPR, compute_mode, op0,
4326 size - 1, NULL_RTX, 0);
4327 if (d < 0)
4328 quotient
4329 = force_operand (gen_rtx_MINUS (compute_mode,
4330 t4, t3),
4331 tquotient);
4332 else
4333 quotient
4334 = force_operand (gen_rtx_MINUS (compute_mode,
4335 t3, t4),
4336 tquotient);
4339 else /* Too wide mode to use tricky code */
4340 break;
4342 insn = get_last_insn ();
4343 if (insn != last)
4344 set_dst_reg_note (insn, REG_EQUAL,
4345 gen_rtx_DIV (compute_mode, op0, op1),
4346 quotient);
4348 break;
4350 fail1:
4351 delete_insns_since (last);
4352 break;
4354 case FLOOR_DIV_EXPR:
4355 case FLOOR_MOD_EXPR:
4356 /* We will come here only for signed operations. */
4357 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4359 unsigned HOST_WIDE_INT mh, ml;
4360 int pre_shift, lgup, post_shift;
4361 HOST_WIDE_INT d = INTVAL (op1);
4363 if (d > 0)
4365 /* We could just as easily deal with negative constants here,
4366 but it does not seem worth the trouble for GCC 2.6. */
4367 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4369 pre_shift = floor_log2 (d);
4370 if (rem_flag)
4372 remainder = expand_binop (compute_mode, and_optab, op0,
4373 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4374 remainder, 0, OPTAB_LIB_WIDEN);
4375 if (remainder)
4376 return gen_lowpart (mode, remainder);
4378 quotient = expand_shift
4379 (RSHIFT_EXPR, compute_mode, op0,
4380 pre_shift, tquotient, 0);
4382 else
4384 rtx t1, t2, t3, t4;
4386 mh = choose_multiplier (d, size, size - 1,
4387 &ml, &post_shift, &lgup);
4388 gcc_assert (!mh);
4390 if (post_shift < BITS_PER_WORD
4391 && size - 1 < BITS_PER_WORD)
4393 t1 = expand_shift
4394 (RSHIFT_EXPR, compute_mode, op0,
4395 size - 1, NULL_RTX, 0);
4396 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4397 NULL_RTX, 0, OPTAB_WIDEN);
4398 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4399 + shift_cost (speed, compute_mode, size - 1)
4400 + 2 * add_cost (speed, compute_mode));
4401 t3 = expmed_mult_highpart (compute_mode, t2,
4402 GEN_INT (ml), NULL_RTX, 1,
4403 max_cost - extra_cost);
4404 if (t3 != 0)
4406 t4 = expand_shift
4407 (RSHIFT_EXPR, compute_mode, t3,
4408 post_shift, NULL_RTX, 1);
4409 quotient = expand_binop (compute_mode, xor_optab,
4410 t4, t1, tquotient, 0,
4411 OPTAB_WIDEN);
4416 else
4418 rtx nsign, t1, t2, t3, t4;
4419 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4420 op0, constm1_rtx), NULL_RTX);
4421 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4422 0, OPTAB_WIDEN);
4423 nsign = expand_shift
4424 (RSHIFT_EXPR, compute_mode, t2,
4425 size - 1, NULL_RTX, 0);
4426 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4427 NULL_RTX);
4428 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4429 NULL_RTX, 0);
4430 if (t4)
4432 rtx t5;
4433 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4434 NULL_RTX, 0);
4435 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4436 t4, t5),
4437 tquotient);
4442 if (quotient != 0)
4443 break;
4444 delete_insns_since (last);
4446 /* Try using an instruction that produces both the quotient and
4447 remainder, using truncation. We can easily compensate the quotient
4448 or remainder to get floor rounding, once we have the remainder.
4449 Notice that we compute also the final remainder value here,
4450 and return the result right away. */
4451 if (target == 0 || GET_MODE (target) != compute_mode)
4452 target = gen_reg_rtx (compute_mode);
4454 if (rem_flag)
4456 remainder
4457 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4458 quotient = gen_reg_rtx (compute_mode);
4460 else
4462 quotient
4463 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4464 remainder = gen_reg_rtx (compute_mode);
4467 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4468 quotient, remainder, 0))
4470 /* This could be computed with a branch-less sequence.
4471 Save that for later. */
4472 rtx tem;
4473 rtx label = gen_label_rtx ();
4474 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4475 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4476 NULL_RTX, 0, OPTAB_WIDEN);
4477 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4478 expand_dec (quotient, const1_rtx);
4479 expand_inc (remainder, op1);
4480 emit_label (label);
4481 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4484 /* No luck with division elimination or divmod. Have to do it
4485 by conditionally adjusting op0 *and* the result. */
4487 rtx label1, label2, label3, label4, label5;
4488 rtx adjusted_op0;
4489 rtx tem;
4491 quotient = gen_reg_rtx (compute_mode);
4492 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4493 label1 = gen_label_rtx ();
4494 label2 = gen_label_rtx ();
4495 label3 = gen_label_rtx ();
4496 label4 = gen_label_rtx ();
4497 label5 = gen_label_rtx ();
4498 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4499 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4500 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4501 quotient, 0, OPTAB_LIB_WIDEN);
4502 if (tem != quotient)
4503 emit_move_insn (quotient, tem);
4504 emit_jump_insn (gen_jump (label5));
4505 emit_barrier ();
4506 emit_label (label1);
4507 expand_inc (adjusted_op0, const1_rtx);
4508 emit_jump_insn (gen_jump (label4));
4509 emit_barrier ();
4510 emit_label (label2);
4511 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4512 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4513 quotient, 0, OPTAB_LIB_WIDEN);
4514 if (tem != quotient)
4515 emit_move_insn (quotient, tem);
4516 emit_jump_insn (gen_jump (label5));
4517 emit_barrier ();
4518 emit_label (label3);
4519 expand_dec (adjusted_op0, const1_rtx);
4520 emit_label (label4);
4521 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4522 quotient, 0, OPTAB_LIB_WIDEN);
4523 if (tem != quotient)
4524 emit_move_insn (quotient, tem);
4525 expand_dec (quotient, const1_rtx);
4526 emit_label (label5);
4528 break;
4530 case CEIL_DIV_EXPR:
4531 case CEIL_MOD_EXPR:
4532 if (unsignedp)
4534 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4536 rtx t1, t2, t3;
4537 unsigned HOST_WIDE_INT d = INTVAL (op1);
4538 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4539 floor_log2 (d), tquotient, 1);
4540 t2 = expand_binop (compute_mode, and_optab, op0,
4541 GEN_INT (d - 1),
4542 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4543 t3 = gen_reg_rtx (compute_mode);
4544 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4545 compute_mode, 1, 1);
4546 if (t3 == 0)
4548 rtx lab;
4549 lab = gen_label_rtx ();
4550 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4551 expand_inc (t1, const1_rtx);
4552 emit_label (lab);
4553 quotient = t1;
4555 else
4556 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4557 t1, t3),
4558 tquotient);
4559 break;
4562 /* Try using an instruction that produces both the quotient and
4563 remainder, using truncation. We can easily compensate the
4564 quotient or remainder to get ceiling rounding, once we have the
4565 remainder. Notice that we compute also the final remainder
4566 value here, and return the result right away. */
4567 if (target == 0 || GET_MODE (target) != compute_mode)
4568 target = gen_reg_rtx (compute_mode);
4570 if (rem_flag)
4572 remainder = (REG_P (target)
4573 ? target : gen_reg_rtx (compute_mode));
4574 quotient = gen_reg_rtx (compute_mode);
4576 else
4578 quotient = (REG_P (target)
4579 ? target : gen_reg_rtx (compute_mode));
4580 remainder = gen_reg_rtx (compute_mode);
4583 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4584 remainder, 1))
4586 /* This could be computed with a branch-less sequence.
4587 Save that for later. */
4588 rtx label = gen_label_rtx ();
4589 do_cmp_and_jump (remainder, const0_rtx, EQ,
4590 compute_mode, label);
4591 expand_inc (quotient, const1_rtx);
4592 expand_dec (remainder, op1);
4593 emit_label (label);
4594 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4597 /* No luck with division elimination or divmod. Have to do it
4598 by conditionally adjusting op0 *and* the result. */
4600 rtx label1, label2;
4601 rtx adjusted_op0, tem;
4603 quotient = gen_reg_rtx (compute_mode);
4604 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4605 label1 = gen_label_rtx ();
4606 label2 = gen_label_rtx ();
4607 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4608 compute_mode, label1);
4609 emit_move_insn (quotient, const0_rtx);
4610 emit_jump_insn (gen_jump (label2));
4611 emit_barrier ();
4612 emit_label (label1);
4613 expand_dec (adjusted_op0, const1_rtx);
4614 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4615 quotient, 1, OPTAB_LIB_WIDEN);
4616 if (tem != quotient)
4617 emit_move_insn (quotient, tem);
4618 expand_inc (quotient, const1_rtx);
4619 emit_label (label2);
4622 else /* signed */
4624 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4625 && INTVAL (op1) >= 0)
4627 /* This is extremely similar to the code for the unsigned case
4628 above. For 2.7 we should merge these variants, but for
4629 2.6.1 I don't want to touch the code for unsigned since that
4630 get used in C. The signed case will only be used by other
4631 languages (Ada). */
4633 rtx t1, t2, t3;
4634 unsigned HOST_WIDE_INT d = INTVAL (op1);
4635 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4636 floor_log2 (d), tquotient, 0);
4637 t2 = expand_binop (compute_mode, and_optab, op0,
4638 GEN_INT (d - 1),
4639 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4640 t3 = gen_reg_rtx (compute_mode);
4641 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4642 compute_mode, 1, 1);
4643 if (t3 == 0)
4645 rtx lab;
4646 lab = gen_label_rtx ();
4647 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4648 expand_inc (t1, const1_rtx);
4649 emit_label (lab);
4650 quotient = t1;
4652 else
4653 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4654 t1, t3),
4655 tquotient);
4656 break;
4659 /* Try using an instruction that produces both the quotient and
4660 remainder, using truncation. We can easily compensate the
4661 quotient or remainder to get ceiling rounding, once we have the
4662 remainder. Notice that we compute also the final remainder
4663 value here, and return the result right away. */
4664 if (target == 0 || GET_MODE (target) != compute_mode)
4665 target = gen_reg_rtx (compute_mode);
4666 if (rem_flag)
4668 remainder= (REG_P (target)
4669 ? target : gen_reg_rtx (compute_mode));
4670 quotient = gen_reg_rtx (compute_mode);
4672 else
4674 quotient = (REG_P (target)
4675 ? target : gen_reg_rtx (compute_mode));
4676 remainder = gen_reg_rtx (compute_mode);
4679 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4680 remainder, 0))
4682 /* This could be computed with a branch-less sequence.
4683 Save that for later. */
4684 rtx tem;
4685 rtx label = gen_label_rtx ();
4686 do_cmp_and_jump (remainder, const0_rtx, EQ,
4687 compute_mode, label);
4688 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4689 NULL_RTX, 0, OPTAB_WIDEN);
4690 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4691 expand_inc (quotient, const1_rtx);
4692 expand_dec (remainder, op1);
4693 emit_label (label);
4694 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4697 /* No luck with division elimination or divmod. Have to do it
4698 by conditionally adjusting op0 *and* the result. */
4700 rtx label1, label2, label3, label4, label5;
4701 rtx adjusted_op0;
4702 rtx tem;
4704 quotient = gen_reg_rtx (compute_mode);
4705 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4706 label1 = gen_label_rtx ();
4707 label2 = gen_label_rtx ();
4708 label3 = gen_label_rtx ();
4709 label4 = gen_label_rtx ();
4710 label5 = gen_label_rtx ();
4711 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4712 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4713 compute_mode, label1);
4714 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4715 quotient, 0, OPTAB_LIB_WIDEN);
4716 if (tem != quotient)
4717 emit_move_insn (quotient, tem);
4718 emit_jump_insn (gen_jump (label5));
4719 emit_barrier ();
4720 emit_label (label1);
4721 expand_dec (adjusted_op0, const1_rtx);
4722 emit_jump_insn (gen_jump (label4));
4723 emit_barrier ();
4724 emit_label (label2);
4725 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4726 compute_mode, label3);
4727 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4728 quotient, 0, OPTAB_LIB_WIDEN);
4729 if (tem != quotient)
4730 emit_move_insn (quotient, tem);
4731 emit_jump_insn (gen_jump (label5));
4732 emit_barrier ();
4733 emit_label (label3);
4734 expand_inc (adjusted_op0, const1_rtx);
4735 emit_label (label4);
4736 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4737 quotient, 0, OPTAB_LIB_WIDEN);
4738 if (tem != quotient)
4739 emit_move_insn (quotient, tem);
4740 expand_inc (quotient, const1_rtx);
4741 emit_label (label5);
4744 break;
4746 case EXACT_DIV_EXPR:
4747 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4749 HOST_WIDE_INT d = INTVAL (op1);
4750 unsigned HOST_WIDE_INT ml;
4751 int pre_shift;
4752 rtx t1;
4754 pre_shift = floor_log2 (d & -d);
4755 ml = invert_mod2n (d >> pre_shift, size);
4756 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4757 pre_shift, NULL_RTX, unsignedp);
4758 quotient = expand_mult (compute_mode, t1,
4759 gen_int_mode (ml, compute_mode),
4760 NULL_RTX, 1);
4762 insn = get_last_insn ();
4763 set_dst_reg_note (insn, REG_EQUAL,
4764 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4765 compute_mode, op0, op1),
4766 quotient);
4768 break;
4770 case ROUND_DIV_EXPR:
4771 case ROUND_MOD_EXPR:
4772 if (unsignedp)
4774 rtx tem;
4775 rtx label;
4776 label = gen_label_rtx ();
4777 quotient = gen_reg_rtx (compute_mode);
4778 remainder = gen_reg_rtx (compute_mode);
4779 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4781 rtx tem;
4782 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4783 quotient, 1, OPTAB_LIB_WIDEN);
4784 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4785 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4786 remainder, 1, OPTAB_LIB_WIDEN);
4788 tem = plus_constant (compute_mode, op1, -1);
4789 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4790 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4791 expand_inc (quotient, const1_rtx);
4792 expand_dec (remainder, op1);
4793 emit_label (label);
4795 else
4797 rtx abs_rem, abs_op1, tem, mask;
4798 rtx label;
4799 label = gen_label_rtx ();
4800 quotient = gen_reg_rtx (compute_mode);
4801 remainder = gen_reg_rtx (compute_mode);
4802 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4804 rtx tem;
4805 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4806 quotient, 0, OPTAB_LIB_WIDEN);
4807 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4808 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4809 remainder, 0, OPTAB_LIB_WIDEN);
4811 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4812 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4813 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4814 1, NULL_RTX, 1);
4815 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4816 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4817 NULL_RTX, 0, OPTAB_WIDEN);
4818 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4819 size - 1, NULL_RTX, 0);
4820 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4821 NULL_RTX, 0, OPTAB_WIDEN);
4822 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4823 NULL_RTX, 0, OPTAB_WIDEN);
4824 expand_inc (quotient, tem);
4825 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4826 NULL_RTX, 0, OPTAB_WIDEN);
4827 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4828 NULL_RTX, 0, OPTAB_WIDEN);
4829 expand_dec (remainder, tem);
4830 emit_label (label);
4832 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4834 default:
4835 gcc_unreachable ();
4838 if (quotient == 0)
4840 if (target && GET_MODE (target) != compute_mode)
4841 target = 0;
4843 if (rem_flag)
4845 /* Try to produce the remainder without producing the quotient.
4846 If we seem to have a divmod pattern that does not require widening,
4847 don't try widening here. We should really have a WIDEN argument
4848 to expand_twoval_binop, since what we'd really like to do here is
4849 1) try a mod insn in compute_mode
4850 2) try a divmod insn in compute_mode
4851 3) try a div insn in compute_mode and multiply-subtract to get
4852 remainder
4853 4) try the same things with widening allowed. */
4854 remainder
4855 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4856 op0, op1, target,
4857 unsignedp,
4858 ((optab_handler (optab2, compute_mode)
4859 != CODE_FOR_nothing)
4860 ? OPTAB_DIRECT : OPTAB_WIDEN));
4861 if (remainder == 0)
4863 /* No luck there. Can we do remainder and divide at once
4864 without a library call? */
4865 remainder = gen_reg_rtx (compute_mode);
4866 if (! expand_twoval_binop ((unsignedp
4867 ? udivmod_optab
4868 : sdivmod_optab),
4869 op0, op1,
4870 NULL_RTX, remainder, unsignedp))
4871 remainder = 0;
4874 if (remainder)
4875 return gen_lowpart (mode, remainder);
4878 /* Produce the quotient. Try a quotient insn, but not a library call.
4879 If we have a divmod in this mode, use it in preference to widening
4880 the div (for this test we assume it will not fail). Note that optab2
4881 is set to the one of the two optabs that the call below will use. */
4882 quotient
4883 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4884 op0, op1, rem_flag ? NULL_RTX : target,
4885 unsignedp,
4886 ((optab_handler (optab2, compute_mode)
4887 != CODE_FOR_nothing)
4888 ? OPTAB_DIRECT : OPTAB_WIDEN));
4890 if (quotient == 0)
4892 /* No luck there. Try a quotient-and-remainder insn,
4893 keeping the quotient alone. */
4894 quotient = gen_reg_rtx (compute_mode);
4895 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4896 op0, op1,
4897 quotient, NULL_RTX, unsignedp))
4899 quotient = 0;
4900 if (! rem_flag)
4901 /* Still no luck. If we are not computing the remainder,
4902 use a library call for the quotient. */
4903 quotient = sign_expand_binop (compute_mode,
4904 udiv_optab, sdiv_optab,
4905 op0, op1, target,
4906 unsignedp, OPTAB_LIB_WIDEN);
4911 if (rem_flag)
4913 if (target && GET_MODE (target) != compute_mode)
4914 target = 0;
4916 if (quotient == 0)
4918 /* No divide instruction either. Use library for remainder. */
4919 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4920 op0, op1, target,
4921 unsignedp, OPTAB_LIB_WIDEN);
4922 /* No remainder function. Try a quotient-and-remainder
4923 function, keeping the remainder. */
4924 if (!remainder)
4926 remainder = gen_reg_rtx (compute_mode);
4927 if (!expand_twoval_binop_libfunc
4928 (unsignedp ? udivmod_optab : sdivmod_optab,
4929 op0, op1,
4930 NULL_RTX, remainder,
4931 unsignedp ? UMOD : MOD))
4932 remainder = NULL_RTX;
4935 else
4937 /* We divided. Now finish doing X - Y * (X / Y). */
4938 remainder = expand_mult (compute_mode, quotient, op1,
4939 NULL_RTX, unsignedp);
4940 remainder = expand_binop (compute_mode, sub_optab, op0,
4941 remainder, target, unsignedp,
4942 OPTAB_LIB_WIDEN);
4946 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4949 /* Return a tree node with data type TYPE, describing the value of X.
4950 Usually this is an VAR_DECL, if there is no obvious better choice.
4951 X may be an expression, however we only support those expressions
4952 generated by loop.c. */
4954 tree
4955 make_tree (tree type, rtx x)
4957 tree t;
4959 switch (GET_CODE (x))
4961 case CONST_INT:
4963 HOST_WIDE_INT hi = 0;
4965 if (INTVAL (x) < 0
4966 && !(TYPE_UNSIGNED (type)
4967 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4968 < HOST_BITS_PER_WIDE_INT)))
4969 hi = -1;
4971 t = build_int_cst_wide (type, INTVAL (x), hi);
4973 return t;
4976 case CONST_DOUBLE:
4977 if (GET_MODE (x) == VOIDmode)
4978 t = build_int_cst_wide (type,
4979 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4980 else
4982 REAL_VALUE_TYPE d;
4984 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4985 t = build_real (type, d);
4988 return t;
4990 case CONST_VECTOR:
4992 int units = CONST_VECTOR_NUNITS (x);
4993 tree itype = TREE_TYPE (type);
4994 tree *elts;
4995 int i;
4997 /* Build a tree with vector elements. */
4998 elts = XALLOCAVEC (tree, units);
4999 for (i = units - 1; i >= 0; --i)
5001 rtx elt = CONST_VECTOR_ELT (x, i);
5002 elts[i] = make_tree (itype, elt);
5005 return build_vector (type, elts);
5008 case PLUS:
5009 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5010 make_tree (type, XEXP (x, 1)));
5012 case MINUS:
5013 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5014 make_tree (type, XEXP (x, 1)));
5016 case NEG:
5017 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5019 case MULT:
5020 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5021 make_tree (type, XEXP (x, 1)));
5023 case ASHIFT:
5024 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5025 make_tree (type, XEXP (x, 1)));
5027 case LSHIFTRT:
5028 t = unsigned_type_for (type);
5029 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5030 make_tree (t, XEXP (x, 0)),
5031 make_tree (type, XEXP (x, 1))));
5033 case ASHIFTRT:
5034 t = signed_type_for (type);
5035 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5036 make_tree (t, XEXP (x, 0)),
5037 make_tree (type, XEXP (x, 1))));
5039 case DIV:
5040 if (TREE_CODE (type) != REAL_TYPE)
5041 t = signed_type_for (type);
5042 else
5043 t = type;
5045 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5046 make_tree (t, XEXP (x, 0)),
5047 make_tree (t, XEXP (x, 1))));
5048 case UDIV:
5049 t = unsigned_type_for (type);
5050 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5051 make_tree (t, XEXP (x, 0)),
5052 make_tree (t, XEXP (x, 1))));
5054 case SIGN_EXTEND:
5055 case ZERO_EXTEND:
5056 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5057 GET_CODE (x) == ZERO_EXTEND);
5058 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5060 case CONST:
5061 return make_tree (type, XEXP (x, 0));
5063 case SYMBOL_REF:
5064 t = SYMBOL_REF_DECL (x);
5065 if (t)
5066 return fold_convert (type, build_fold_addr_expr (t));
5067 /* else fall through. */
5069 default:
5070 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5072 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5073 address mode to pointer mode. */
5074 if (POINTER_TYPE_P (type))
5075 x = convert_memory_address_addr_space
5076 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5078 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5079 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5080 t->decl_with_rtl.rtl = x;
5082 return t;
5086 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5087 and returning TARGET.
5089 If TARGET is 0, a pseudo-register or constant is returned. */
5092 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5094 rtx tem = 0;
5096 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5097 tem = simplify_binary_operation (AND, mode, op0, op1);
5098 if (tem == 0)
5099 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5101 if (target == 0)
5102 target = tem;
5103 else if (tem != target)
5104 emit_move_insn (target, tem);
5105 return target;
5108 /* Helper function for emit_store_flag. */
5109 static rtx
5110 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5111 enum machine_mode mode, enum machine_mode compare_mode,
5112 int unsignedp, rtx x, rtx y, int normalizep,
5113 enum machine_mode target_mode)
5115 struct expand_operand ops[4];
5116 rtx op0, last, comparison, subtarget;
5117 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5119 last = get_last_insn ();
5120 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5121 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5122 if (!x || !y)
5124 delete_insns_since (last);
5125 return NULL_RTX;
5128 if (target_mode == VOIDmode)
5129 target_mode = result_mode;
5130 if (!target)
5131 target = gen_reg_rtx (target_mode);
5133 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5135 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5136 create_fixed_operand (&ops[1], comparison);
5137 create_fixed_operand (&ops[2], x);
5138 create_fixed_operand (&ops[3], y);
5139 if (!maybe_expand_insn (icode, 4, ops))
5141 delete_insns_since (last);
5142 return NULL_RTX;
5144 subtarget = ops[0].value;
5146 /* If we are converting to a wider mode, first convert to
5147 TARGET_MODE, then normalize. This produces better combining
5148 opportunities on machines that have a SIGN_EXTRACT when we are
5149 testing a single bit. This mostly benefits the 68k.
5151 If STORE_FLAG_VALUE does not have the sign bit set when
5152 interpreted in MODE, we can do this conversion as unsigned, which
5153 is usually more efficient. */
5154 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5156 convert_move (target, subtarget,
5157 val_signbit_known_clear_p (result_mode,
5158 STORE_FLAG_VALUE));
5159 op0 = target;
5160 result_mode = target_mode;
5162 else
5163 op0 = subtarget;
5165 /* If we want to keep subexpressions around, don't reuse our last
5166 target. */
5167 if (optimize)
5168 subtarget = 0;
5170 /* Now normalize to the proper value in MODE. Sometimes we don't
5171 have to do anything. */
5172 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5174 /* STORE_FLAG_VALUE might be the most negative number, so write
5175 the comparison this way to avoid a compiler-time warning. */
5176 else if (- normalizep == STORE_FLAG_VALUE)
5177 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5179 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5180 it hard to use a value of just the sign bit due to ANSI integer
5181 constant typing rules. */
5182 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5183 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5184 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5185 normalizep == 1);
5186 else
5188 gcc_assert (STORE_FLAG_VALUE & 1);
5190 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5191 if (normalizep == -1)
5192 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5195 /* If we were converting to a smaller mode, do the conversion now. */
5196 if (target_mode != result_mode)
5198 convert_move (target, op0, 0);
5199 return target;
5201 else
5202 return op0;
5206 /* A subroutine of emit_store_flag only including "tricks" that do not
5207 need a recursive call. These are kept separate to avoid infinite
5208 loops. */
5210 static rtx
5211 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5212 enum machine_mode mode, int unsignedp, int normalizep,
5213 enum machine_mode target_mode)
5215 rtx subtarget;
5216 enum insn_code icode;
5217 enum machine_mode compare_mode;
5218 enum mode_class mclass;
5219 enum rtx_code scode;
5220 rtx tem;
5222 if (unsignedp)
5223 code = unsigned_condition (code);
5224 scode = swap_condition (code);
5226 /* If one operand is constant, make it the second one. Only do this
5227 if the other operand is not constant as well. */
5229 if (swap_commutative_operands_p (op0, op1))
5231 tem = op0;
5232 op0 = op1;
5233 op1 = tem;
5234 code = swap_condition (code);
5237 if (mode == VOIDmode)
5238 mode = GET_MODE (op0);
5240 /* For some comparisons with 1 and -1, we can convert this to
5241 comparisons with zero. This will often produce more opportunities for
5242 store-flag insns. */
5244 switch (code)
5246 case LT:
5247 if (op1 == const1_rtx)
5248 op1 = const0_rtx, code = LE;
5249 break;
5250 case LE:
5251 if (op1 == constm1_rtx)
5252 op1 = const0_rtx, code = LT;
5253 break;
5254 case GE:
5255 if (op1 == const1_rtx)
5256 op1 = const0_rtx, code = GT;
5257 break;
5258 case GT:
5259 if (op1 == constm1_rtx)
5260 op1 = const0_rtx, code = GE;
5261 break;
5262 case GEU:
5263 if (op1 == const1_rtx)
5264 op1 = const0_rtx, code = NE;
5265 break;
5266 case LTU:
5267 if (op1 == const1_rtx)
5268 op1 = const0_rtx, code = EQ;
5269 break;
5270 default:
5271 break;
5274 /* If we are comparing a double-word integer with zero or -1, we can
5275 convert the comparison into one involving a single word. */
5276 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5277 && GET_MODE_CLASS (mode) == MODE_INT
5278 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5280 if ((code == EQ || code == NE)
5281 && (op1 == const0_rtx || op1 == constm1_rtx))
5283 rtx op00, op01;
5285 /* Do a logical OR or AND of the two words and compare the
5286 result. */
5287 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5288 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5289 tem = expand_binop (word_mode,
5290 op1 == const0_rtx ? ior_optab : and_optab,
5291 op00, op01, NULL_RTX, unsignedp,
5292 OPTAB_DIRECT);
5294 if (tem != 0)
5295 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5296 unsignedp, normalizep);
5298 else if ((code == LT || code == GE) && op1 == const0_rtx)
5300 rtx op0h;
5302 /* If testing the sign bit, can just test on high word. */
5303 op0h = simplify_gen_subreg (word_mode, op0, mode,
5304 subreg_highpart_offset (word_mode,
5305 mode));
5306 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5307 unsignedp, normalizep);
5309 else
5310 tem = NULL_RTX;
5312 if (tem)
5314 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5315 return tem;
5316 if (!target)
5317 target = gen_reg_rtx (target_mode);
5319 convert_move (target, tem,
5320 !val_signbit_known_set_p (word_mode,
5321 (normalizep ? normalizep
5322 : STORE_FLAG_VALUE)));
5323 return target;
5327 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5328 complement of A (for GE) and shifting the sign bit to the low bit. */
5329 if (op1 == const0_rtx && (code == LT || code == GE)
5330 && GET_MODE_CLASS (mode) == MODE_INT
5331 && (normalizep || STORE_FLAG_VALUE == 1
5332 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5334 subtarget = target;
5336 if (!target)
5337 target_mode = mode;
5339 /* If the result is to be wider than OP0, it is best to convert it
5340 first. If it is to be narrower, it is *incorrect* to convert it
5341 first. */
5342 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5344 op0 = convert_modes (target_mode, mode, op0, 0);
5345 mode = target_mode;
5348 if (target_mode != mode)
5349 subtarget = 0;
5351 if (code == GE)
5352 op0 = expand_unop (mode, one_cmpl_optab, op0,
5353 ((STORE_FLAG_VALUE == 1 || normalizep)
5354 ? 0 : subtarget), 0);
5356 if (STORE_FLAG_VALUE == 1 || normalizep)
5357 /* If we are supposed to produce a 0/1 value, we want to do
5358 a logical shift from the sign bit to the low-order bit; for
5359 a -1/0 value, we do an arithmetic shift. */
5360 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5361 GET_MODE_BITSIZE (mode) - 1,
5362 subtarget, normalizep != -1);
5364 if (mode != target_mode)
5365 op0 = convert_modes (target_mode, mode, op0, 0);
5367 return op0;
5370 mclass = GET_MODE_CLASS (mode);
5371 for (compare_mode = mode; compare_mode != VOIDmode;
5372 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5374 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5375 icode = optab_handler (cstore_optab, optab_mode);
5376 if (icode != CODE_FOR_nothing)
5378 do_pending_stack_adjust ();
5379 tem = emit_cstore (target, icode, code, mode, compare_mode,
5380 unsignedp, op0, op1, normalizep, target_mode);
5381 if (tem)
5382 return tem;
5384 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5386 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5387 unsignedp, op1, op0, normalizep, target_mode);
5388 if (tem)
5389 return tem;
5391 break;
5395 return 0;
5398 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5399 and storing in TARGET. Normally return TARGET.
5400 Return 0 if that cannot be done.
5402 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5403 it is VOIDmode, they cannot both be CONST_INT.
5405 UNSIGNEDP is for the case where we have to widen the operands
5406 to perform the operation. It says to use zero-extension.
5408 NORMALIZEP is 1 if we should convert the result to be either zero
5409 or one. Normalize is -1 if we should convert the result to be
5410 either zero or -1. If NORMALIZEP is zero, the result will be left
5411 "raw" out of the scc insn. */
5414 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5415 enum machine_mode mode, int unsignedp, int normalizep)
5417 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5418 enum rtx_code rcode;
5419 rtx subtarget;
5420 rtx tem, last, trueval;
5422 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5423 target_mode);
5424 if (tem)
5425 return tem;
5427 /* If we reached here, we can't do this with a scc insn, however there
5428 are some comparisons that can be done in other ways. Don't do any
5429 of these cases if branches are very cheap. */
5430 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5431 return 0;
5433 /* See what we need to return. We can only return a 1, -1, or the
5434 sign bit. */
5436 if (normalizep == 0)
5438 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5439 normalizep = STORE_FLAG_VALUE;
5441 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5443 else
5444 return 0;
5447 last = get_last_insn ();
5449 /* If optimizing, use different pseudo registers for each insn, instead
5450 of reusing the same pseudo. This leads to better CSE, but slows
5451 down the compiler, since there are more pseudos */
5452 subtarget = (!optimize
5453 && (target_mode == mode)) ? target : NULL_RTX;
5454 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5456 /* For floating-point comparisons, try the reverse comparison or try
5457 changing the "orderedness" of the comparison. */
5458 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5460 enum rtx_code first_code;
5461 bool and_them;
5463 rcode = reverse_condition_maybe_unordered (code);
5464 if (can_compare_p (rcode, mode, ccp_store_flag)
5465 && (code == ORDERED || code == UNORDERED
5466 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5467 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5469 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5470 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5472 /* For the reverse comparison, use either an addition or a XOR. */
5473 if (want_add
5474 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5475 optimize_insn_for_speed_p ()) == 0)
5477 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5478 STORE_FLAG_VALUE, target_mode);
5479 if (tem)
5480 return expand_binop (target_mode, add_optab, tem,
5481 GEN_INT (normalizep),
5482 target, 0, OPTAB_WIDEN);
5484 else if (!want_add
5485 && rtx_cost (trueval, XOR, 1,
5486 optimize_insn_for_speed_p ()) == 0)
5488 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5489 normalizep, target_mode);
5490 if (tem)
5491 return expand_binop (target_mode, xor_optab, tem, trueval,
5492 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5496 delete_insns_since (last);
5498 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5499 if (code == ORDERED || code == UNORDERED)
5500 return 0;
5502 and_them = split_comparison (code, mode, &first_code, &code);
5504 /* If there are no NaNs, the first comparison should always fall through.
5505 Effectively change the comparison to the other one. */
5506 if (!HONOR_NANS (mode))
5508 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5509 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5510 target_mode);
5513 #ifdef HAVE_conditional_move
5514 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5515 conditional move. */
5516 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5517 normalizep, target_mode);
5518 if (tem == 0)
5519 return 0;
5521 if (and_them)
5522 tem = emit_conditional_move (target, code, op0, op1, mode,
5523 tem, const0_rtx, GET_MODE (tem), 0);
5524 else
5525 tem = emit_conditional_move (target, code, op0, op1, mode,
5526 trueval, tem, GET_MODE (tem), 0);
5528 if (tem == 0)
5529 delete_insns_since (last);
5530 return tem;
5531 #else
5532 return 0;
5533 #endif
5536 /* The remaining tricks only apply to integer comparisons. */
5538 if (GET_MODE_CLASS (mode) != MODE_INT)
5539 return 0;
5541 /* If this is an equality comparison of integers, we can try to exclusive-or
5542 (or subtract) the two operands and use a recursive call to try the
5543 comparison with zero. Don't do any of these cases if branches are
5544 very cheap. */
5546 if ((code == EQ || code == NE) && op1 != const0_rtx)
5548 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5549 OPTAB_WIDEN);
5551 if (tem == 0)
5552 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5553 OPTAB_WIDEN);
5554 if (tem != 0)
5555 tem = emit_store_flag (target, code, tem, const0_rtx,
5556 mode, unsignedp, normalizep);
5557 if (tem != 0)
5558 return tem;
5560 delete_insns_since (last);
5563 /* For integer comparisons, try the reverse comparison. However, for
5564 small X and if we'd have anyway to extend, implementing "X != 0"
5565 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5566 rcode = reverse_condition (code);
5567 if (can_compare_p (rcode, mode, ccp_store_flag)
5568 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5569 && code == NE
5570 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5571 && op1 == const0_rtx))
5573 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5574 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5576 /* Again, for the reverse comparison, use either an addition or a XOR. */
5577 if (want_add
5578 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5579 optimize_insn_for_speed_p ()) == 0)
5581 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5582 STORE_FLAG_VALUE, target_mode);
5583 if (tem != 0)
5584 tem = expand_binop (target_mode, add_optab, tem,
5585 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5587 else if (!want_add
5588 && rtx_cost (trueval, XOR, 1,
5589 optimize_insn_for_speed_p ()) == 0)
5591 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5592 normalizep, target_mode);
5593 if (tem != 0)
5594 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5595 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5598 if (tem != 0)
5599 return tem;
5600 delete_insns_since (last);
5603 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5604 the constant zero. Reject all other comparisons at this point. Only
5605 do LE and GT if branches are expensive since they are expensive on
5606 2-operand machines. */
5608 if (op1 != const0_rtx
5609 || (code != EQ && code != NE
5610 && (BRANCH_COST (optimize_insn_for_speed_p (),
5611 false) <= 1 || (code != LE && code != GT))))
5612 return 0;
5614 /* Try to put the result of the comparison in the sign bit. Assume we can't
5615 do the necessary operation below. */
5617 tem = 0;
5619 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5620 the sign bit set. */
5622 if (code == LE)
5624 /* This is destructive, so SUBTARGET can't be OP0. */
5625 if (rtx_equal_p (subtarget, op0))
5626 subtarget = 0;
5628 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5629 OPTAB_WIDEN);
5630 if (tem)
5631 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5632 OPTAB_WIDEN);
5635 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5636 number of bits in the mode of OP0, minus one. */
5638 if (code == GT)
5640 if (rtx_equal_p (subtarget, op0))
5641 subtarget = 0;
5643 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5644 GET_MODE_BITSIZE (mode) - 1,
5645 subtarget, 0);
5646 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5647 OPTAB_WIDEN);
5650 if (code == EQ || code == NE)
5652 /* For EQ or NE, one way to do the comparison is to apply an operation
5653 that converts the operand into a positive number if it is nonzero
5654 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5655 for NE we negate. This puts the result in the sign bit. Then we
5656 normalize with a shift, if needed.
5658 Two operations that can do the above actions are ABS and FFS, so try
5659 them. If that doesn't work, and MODE is smaller than a full word,
5660 we can use zero-extension to the wider mode (an unsigned conversion)
5661 as the operation. */
5663 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5664 that is compensated by the subsequent overflow when subtracting
5665 one / negating. */
5667 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5668 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5669 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5670 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5671 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5673 tem = convert_modes (word_mode, mode, op0, 1);
5674 mode = word_mode;
5677 if (tem != 0)
5679 if (code == EQ)
5680 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5681 0, OPTAB_WIDEN);
5682 else
5683 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5686 /* If we couldn't do it that way, for NE we can "or" the two's complement
5687 of the value with itself. For EQ, we take the one's complement of
5688 that "or", which is an extra insn, so we only handle EQ if branches
5689 are expensive. */
5691 if (tem == 0
5692 && (code == NE
5693 || BRANCH_COST (optimize_insn_for_speed_p (),
5694 false) > 1))
5696 if (rtx_equal_p (subtarget, op0))
5697 subtarget = 0;
5699 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5700 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5701 OPTAB_WIDEN);
5703 if (tem && code == EQ)
5704 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5708 if (tem && normalizep)
5709 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5710 GET_MODE_BITSIZE (mode) - 1,
5711 subtarget, normalizep == 1);
5713 if (tem)
5715 if (!target)
5717 else if (GET_MODE (tem) != target_mode)
5719 convert_move (target, tem, 0);
5720 tem = target;
5722 else if (!subtarget)
5724 emit_move_insn (target, tem);
5725 tem = target;
5728 else
5729 delete_insns_since (last);
5731 return tem;
5734 /* Like emit_store_flag, but always succeeds. */
5737 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5738 enum machine_mode mode, int unsignedp, int normalizep)
5740 rtx tem, label;
5741 rtx trueval, falseval;
5743 /* First see if emit_store_flag can do the job. */
5744 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5745 if (tem != 0)
5746 return tem;
5748 if (!target)
5749 target = gen_reg_rtx (word_mode);
5751 /* If this failed, we have to do this with set/compare/jump/set code.
5752 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5753 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5754 if (code == NE
5755 && GET_MODE_CLASS (mode) == MODE_INT
5756 && REG_P (target)
5757 && op0 == target
5758 && op1 == const0_rtx)
5760 label = gen_label_rtx ();
5761 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5762 mode, NULL_RTX, NULL_RTX, label, -1);
5763 emit_move_insn (target, trueval);
5764 emit_label (label);
5765 return target;
5768 if (!REG_P (target)
5769 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5770 target = gen_reg_rtx (GET_MODE (target));
5772 /* Jump in the right direction if the target cannot implement CODE
5773 but can jump on its reverse condition. */
5774 falseval = const0_rtx;
5775 if (! can_compare_p (code, mode, ccp_jump)
5776 && (! FLOAT_MODE_P (mode)
5777 || code == ORDERED || code == UNORDERED
5778 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5779 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5781 enum rtx_code rcode;
5782 if (FLOAT_MODE_P (mode))
5783 rcode = reverse_condition_maybe_unordered (code);
5784 else
5785 rcode = reverse_condition (code);
5787 /* Canonicalize to UNORDERED for the libcall. */
5788 if (can_compare_p (rcode, mode, ccp_jump)
5789 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5791 falseval = trueval;
5792 trueval = const0_rtx;
5793 code = rcode;
5797 emit_move_insn (target, trueval);
5798 label = gen_label_rtx ();
5799 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5800 NULL_RTX, label, -1);
5802 emit_move_insn (target, falseval);
5803 emit_label (label);
5805 return target;
5808 /* Perform possibly multi-word comparison and conditional jump to LABEL
5809 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5810 now a thin wrapper around do_compare_rtx_and_jump. */
5812 static void
5813 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5814 rtx label)
5816 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5817 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5818 NULL_RTX, NULL_RTX, label, -1);