Make Linaro GCC4.9-2015.06.
[official-gcc.git] / gcc-4_9-branch / gcc / expmed.c
blob4d269fefd516564c7dec72ea81a80dbd0cf4812d
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2014 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "tm_p.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "expr.h"
34 #include "optabs.h"
35 #include "recog.h"
36 #include "langhooks.h"
37 #include "df.h"
38 #include "target.h"
39 #include "expmed.h"
41 struct target_expmed default_target_expmed;
42 #if SWITCHABLE_TARGET
43 struct target_expmed *this_target_expmed = &default_target_expmed;
44 #endif
46 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 rtx);
51 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT, rtx, int);
62 static rtx extract_fixed_bit_field_1 (enum machine_mode, rtx,
63 unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT, rtx, int);
65 static rtx mask_rtx (enum machine_mode, int, int, int);
66 static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
68 unsigned HOST_WIDE_INT, int);
69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
70 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
71 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
73 /* Test whether a value is zero of a power of two. */
74 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
75 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
77 struct init_expmed_rtl
79 struct rtx_def reg;
80 struct rtx_def plus;
81 struct rtx_def neg;
82 struct rtx_def mult;
83 struct rtx_def sdiv;
84 struct rtx_def udiv;
85 struct rtx_def sdiv_32;
86 struct rtx_def smod_32;
87 struct rtx_def wide_mult;
88 struct rtx_def wide_lshr;
89 struct rtx_def wide_trunc;
90 struct rtx_def shift;
91 struct rtx_def shift_mult;
92 struct rtx_def shift_add;
93 struct rtx_def shift_sub0;
94 struct rtx_def shift_sub1;
95 struct rtx_def zext;
96 struct rtx_def trunc;
98 rtx pow2[MAX_BITS_PER_WORD];
99 rtx cint[MAX_BITS_PER_WORD];
102 static void
103 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
104 enum machine_mode from_mode, bool speed)
106 int to_size, from_size;
107 rtx which;
109 /* We're given no information about the true size of a partial integer,
110 only the size of the "full" integer it requires for storage. For
111 comparison purposes here, reduce the bit size by one in that case. */
112 to_size = (GET_MODE_BITSIZE (to_mode)
113 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
114 from_size = (GET_MODE_BITSIZE (from_mode)
115 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
117 /* Assume cost of zero-extend and sign-extend is the same. */
118 which = (to_size < from_size ? &all->trunc : &all->zext);
120 PUT_MODE (&all->reg, from_mode);
121 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
124 static void
125 init_expmed_one_mode (struct init_expmed_rtl *all,
126 enum machine_mode mode, int speed)
128 int m, n, mode_bitsize;
129 enum machine_mode mode_from;
131 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
133 PUT_MODE (&all->reg, mode);
134 PUT_MODE (&all->plus, mode);
135 PUT_MODE (&all->neg, mode);
136 PUT_MODE (&all->mult, mode);
137 PUT_MODE (&all->sdiv, mode);
138 PUT_MODE (&all->udiv, mode);
139 PUT_MODE (&all->sdiv_32, mode);
140 PUT_MODE (&all->smod_32, mode);
141 PUT_MODE (&all->wide_trunc, mode);
142 PUT_MODE (&all->shift, mode);
143 PUT_MODE (&all->shift_mult, mode);
144 PUT_MODE (&all->shift_add, mode);
145 PUT_MODE (&all->shift_sub0, mode);
146 PUT_MODE (&all->shift_sub1, mode);
147 PUT_MODE (&all->zext, mode);
148 PUT_MODE (&all->trunc, mode);
150 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
151 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
152 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
153 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
154 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
156 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
157 <= 2 * add_cost (speed, mode)));
158 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
159 <= 4 * add_cost (speed, mode)));
161 set_shift_cost (speed, mode, 0, 0);
163 int cost = add_cost (speed, mode);
164 set_shiftadd_cost (speed, mode, 0, cost);
165 set_shiftsub0_cost (speed, mode, 0, cost);
166 set_shiftsub1_cost (speed, mode, 0, cost);
169 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
170 for (m = 1; m < n; m++)
172 XEXP (&all->shift, 1) = all->cint[m];
173 XEXP (&all->shift_mult, 1) = all->pow2[m];
175 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
176 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
177 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
178 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
181 if (SCALAR_INT_MODE_P (mode))
183 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
184 mode_from = (enum machine_mode)(mode_from + 1))
185 init_expmed_one_conv (all, mode, mode_from, speed);
187 if (GET_MODE_CLASS (mode) == MODE_INT)
189 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
190 if (wider_mode != VOIDmode)
192 PUT_MODE (&all->zext, wider_mode);
193 PUT_MODE (&all->wide_mult, wider_mode);
194 PUT_MODE (&all->wide_lshr, wider_mode);
195 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
197 set_mul_widen_cost (speed, wider_mode,
198 set_src_cost (&all->wide_mult, speed));
199 set_mul_highpart_cost (speed, mode,
200 set_src_cost (&all->wide_trunc, speed));
205 void
206 init_expmed (void)
208 struct init_expmed_rtl all;
209 enum machine_mode mode;
210 int m, speed;
212 memset (&all, 0, sizeof all);
213 for (m = 1; m < MAX_BITS_PER_WORD; m++)
215 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
216 all.cint[m] = GEN_INT (m);
219 PUT_CODE (&all.reg, REG);
220 /* Avoid using hard regs in ways which may be unsupported. */
221 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
223 PUT_CODE (&all.plus, PLUS);
224 XEXP (&all.plus, 0) = &all.reg;
225 XEXP (&all.plus, 1) = &all.reg;
227 PUT_CODE (&all.neg, NEG);
228 XEXP (&all.neg, 0) = &all.reg;
230 PUT_CODE (&all.mult, MULT);
231 XEXP (&all.mult, 0) = &all.reg;
232 XEXP (&all.mult, 1) = &all.reg;
234 PUT_CODE (&all.sdiv, DIV);
235 XEXP (&all.sdiv, 0) = &all.reg;
236 XEXP (&all.sdiv, 1) = &all.reg;
238 PUT_CODE (&all.udiv, UDIV);
239 XEXP (&all.udiv, 0) = &all.reg;
240 XEXP (&all.udiv, 1) = &all.reg;
242 PUT_CODE (&all.sdiv_32, DIV);
243 XEXP (&all.sdiv_32, 0) = &all.reg;
244 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
246 PUT_CODE (&all.smod_32, MOD);
247 XEXP (&all.smod_32, 0) = &all.reg;
248 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
250 PUT_CODE (&all.zext, ZERO_EXTEND);
251 XEXP (&all.zext, 0) = &all.reg;
253 PUT_CODE (&all.wide_mult, MULT);
254 XEXP (&all.wide_mult, 0) = &all.zext;
255 XEXP (&all.wide_mult, 1) = &all.zext;
257 PUT_CODE (&all.wide_lshr, LSHIFTRT);
258 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
260 PUT_CODE (&all.wide_trunc, TRUNCATE);
261 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
263 PUT_CODE (&all.shift, ASHIFT);
264 XEXP (&all.shift, 0) = &all.reg;
266 PUT_CODE (&all.shift_mult, MULT);
267 XEXP (&all.shift_mult, 0) = &all.reg;
269 PUT_CODE (&all.shift_add, PLUS);
270 XEXP (&all.shift_add, 0) = &all.shift_mult;
271 XEXP (&all.shift_add, 1) = &all.reg;
273 PUT_CODE (&all.shift_sub0, MINUS);
274 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
275 XEXP (&all.shift_sub0, 1) = &all.reg;
277 PUT_CODE (&all.shift_sub1, MINUS);
278 XEXP (&all.shift_sub1, 0) = &all.reg;
279 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
281 PUT_CODE (&all.trunc, TRUNCATE);
282 XEXP (&all.trunc, 0) = &all.reg;
284 for (speed = 0; speed < 2; speed++)
286 crtl->maybe_hot_insn_p = speed;
287 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
289 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
290 mode = (enum machine_mode)(mode + 1))
291 init_expmed_one_mode (&all, mode, speed);
293 if (MIN_MODE_PARTIAL_INT != VOIDmode)
294 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
295 mode = (enum machine_mode)(mode + 1))
296 init_expmed_one_mode (&all, mode, speed);
298 if (MIN_MODE_VECTOR_INT != VOIDmode)
299 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
300 mode = (enum machine_mode)(mode + 1))
301 init_expmed_one_mode (&all, mode, speed);
304 if (alg_hash_used_p ())
306 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
307 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
309 else
310 set_alg_hash_used_p (true);
311 default_rtl_profile ();
314 /* Return an rtx representing minus the value of X.
315 MODE is the intended mode of the result,
316 useful if X is a CONST_INT. */
319 negate_rtx (enum machine_mode mode, rtx x)
321 rtx result = simplify_unary_operation (NEG, mode, x, mode);
323 if (result == 0)
324 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
326 return result;
329 /* Adjust bitfield memory MEM so that it points to the first unit of mode
330 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
331 If MODE is BLKmode, return a reference to every byte in the bitfield.
332 Set *NEW_BITNUM to the bit position of the field within the new memory. */
334 static rtx
335 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
336 unsigned HOST_WIDE_INT bitsize,
337 unsigned HOST_WIDE_INT bitnum,
338 unsigned HOST_WIDE_INT *new_bitnum)
340 if (mode == BLKmode)
342 *new_bitnum = bitnum % BITS_PER_UNIT;
343 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
344 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
345 / BITS_PER_UNIT);
346 return adjust_bitfield_address_size (mem, mode, offset, size);
348 else
350 unsigned int unit = GET_MODE_BITSIZE (mode);
351 *new_bitnum = bitnum % unit;
352 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
353 return adjust_bitfield_address (mem, mode, offset);
357 /* The caller wants to perform insertion or extraction PATTERN on a
358 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
359 BITREGION_START and BITREGION_END are as for store_bit_field
360 and FIELDMODE is the natural mode of the field.
362 Search for a mode that is compatible with the memory access
363 restrictions and (where applicable) with a register insertion or
364 extraction. Return the new memory on success, storing the adjusted
365 bit position in *NEW_BITNUM. Return null otherwise. */
367 static rtx
368 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
369 rtx op0, HOST_WIDE_INT bitsize,
370 HOST_WIDE_INT bitnum,
371 unsigned HOST_WIDE_INT bitregion_start,
372 unsigned HOST_WIDE_INT bitregion_end,
373 enum machine_mode fieldmode,
374 unsigned HOST_WIDE_INT *new_bitnum)
376 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
377 bitregion_end, MEM_ALIGN (op0),
378 MEM_VOLATILE_P (op0));
379 enum machine_mode best_mode;
380 if (iter.next_mode (&best_mode))
382 /* We can use a memory in BEST_MODE. See whether this is true for
383 any wider modes. All other things being equal, we prefer to
384 use the widest mode possible because it tends to expose more
385 CSE opportunities. */
386 if (!iter.prefer_smaller_modes ())
388 /* Limit the search to the mode required by the corresponding
389 register insertion or extraction instruction, if any. */
390 enum machine_mode limit_mode = word_mode;
391 extraction_insn insn;
392 if (get_best_reg_extraction_insn (&insn, pattern,
393 GET_MODE_BITSIZE (best_mode),
394 fieldmode))
395 limit_mode = insn.field_mode;
397 enum machine_mode wider_mode;
398 while (iter.next_mode (&wider_mode)
399 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
400 best_mode = wider_mode;
402 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
403 new_bitnum);
405 return NULL_RTX;
408 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
409 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
410 offset is then BITNUM / BITS_PER_UNIT. */
412 static bool
413 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
414 unsigned HOST_WIDE_INT bitsize,
415 enum machine_mode struct_mode)
417 if (BYTES_BIG_ENDIAN)
418 return (bitnum % BITS_PER_UNIT == 0
419 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
420 || (bitnum + bitsize) % BITS_PER_WORD == 0));
421 else
422 return bitnum % BITS_PER_WORD == 0;
425 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
426 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
427 Return false if the access would touch memory outside the range
428 BITREGION_START to BITREGION_END for conformance to the C++ memory
429 model. */
431 static bool
432 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
433 unsigned HOST_WIDE_INT bitnum,
434 enum machine_mode fieldmode,
435 unsigned HOST_WIDE_INT bitregion_start,
436 unsigned HOST_WIDE_INT bitregion_end)
438 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
440 /* -fstrict-volatile-bitfields must be enabled and we must have a
441 volatile MEM. */
442 if (!MEM_P (op0)
443 || !MEM_VOLATILE_P (op0)
444 || flag_strict_volatile_bitfields <= 0)
445 return false;
447 /* Non-integral modes likely only happen with packed structures.
448 Punt. */
449 if (!SCALAR_INT_MODE_P (fieldmode))
450 return false;
452 /* The bit size must not be larger than the field mode, and
453 the field mode must not be larger than a word. */
454 if (bitsize > modesize || modesize > BITS_PER_WORD)
455 return false;
457 /* Check for cases of unaligned fields that must be split. */
458 if (bitnum % BITS_PER_UNIT + bitsize > modesize
459 || (STRICT_ALIGNMENT
460 && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
461 return false;
463 /* Check for cases where the C++ memory model applies. */
464 if (bitregion_end != 0
465 && (bitnum - bitnum % modesize < bitregion_start
466 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
467 return false;
469 return true;
472 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
473 bit number BITNUM can be treated as a simple value of mode MODE. */
475 static bool
476 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
477 unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
479 return (MEM_P (op0)
480 && bitnum % BITS_PER_UNIT == 0
481 && bitsize == GET_MODE_BITSIZE (mode)
482 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
483 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
484 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
487 /* Try to use instruction INSV to store VALUE into a field of OP0.
488 BITSIZE and BITNUM are as for store_bit_field. */
490 static bool
491 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
492 unsigned HOST_WIDE_INT bitsize,
493 unsigned HOST_WIDE_INT bitnum,
494 rtx value)
496 struct expand_operand ops[4];
497 rtx value1;
498 rtx xop0 = op0;
499 rtx last = get_last_insn ();
500 bool copy_back = false;
502 enum machine_mode op_mode = insv->field_mode;
503 unsigned int unit = GET_MODE_BITSIZE (op_mode);
504 if (bitsize == 0 || bitsize > unit)
505 return false;
507 if (MEM_P (xop0))
508 /* Get a reference to the first byte of the field. */
509 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
510 &bitnum);
511 else
513 /* Convert from counting within OP0 to counting in OP_MODE. */
514 if (BYTES_BIG_ENDIAN)
515 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
517 /* If xop0 is a register, we need it in OP_MODE
518 to make it acceptable to the format of insv. */
519 if (GET_CODE (xop0) == SUBREG)
520 /* We can't just change the mode, because this might clobber op0,
521 and we will need the original value of op0 if insv fails. */
522 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
523 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
524 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
527 /* If the destination is a paradoxical subreg such that we need a
528 truncate to the inner mode, perform the insertion on a temporary and
529 truncate the result to the original destination. Note that we can't
530 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
531 X) 0)) is (reg:N X). */
532 if (GET_CODE (xop0) == SUBREG
533 && REG_P (SUBREG_REG (xop0))
534 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
535 op_mode))
537 rtx tem = gen_reg_rtx (op_mode);
538 emit_move_insn (tem, xop0);
539 xop0 = tem;
540 copy_back = true;
543 /* There are similar overflow check at the start of store_bit_field_1,
544 but that only check the situation where the field lies completely
545 outside the register, while there do have situation where the field
546 lies partialy in the register, we need to adjust bitsize for this
547 partial overflow situation. Without this fix, pr48335-2.c on big-endian
548 will broken on those arch support bit insert instruction, like arm, aarch64
549 etc. */
550 if (bitsize + bitnum > unit && bitnum < unit)
552 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
553 "destination object, data truncated into %wu-bit",
554 bitsize, unit - bitnum);
555 bitsize = unit - bitnum;
558 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
559 "backwards" from the size of the unit we are inserting into.
560 Otherwise, we count bits from the most significant on a
561 BYTES/BITS_BIG_ENDIAN machine. */
563 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
564 bitnum = unit - bitsize - bitnum;
566 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
567 value1 = value;
568 if (GET_MODE (value) != op_mode)
570 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
572 /* Optimization: Don't bother really extending VALUE
573 if it has all the bits we will actually use. However,
574 if we must narrow it, be sure we do it correctly. */
576 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
578 rtx tmp;
580 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
581 if (! tmp)
582 tmp = simplify_gen_subreg (op_mode,
583 force_reg (GET_MODE (value),
584 value1),
585 GET_MODE (value), 0);
586 value1 = tmp;
588 else
589 value1 = gen_lowpart (op_mode, value1);
591 else if (CONST_INT_P (value))
592 value1 = gen_int_mode (INTVAL (value), op_mode);
593 else
594 /* Parse phase is supposed to make VALUE's data type
595 match that of the component reference, which is a type
596 at least as wide as the field; so VALUE should have
597 a mode that corresponds to that type. */
598 gcc_assert (CONSTANT_P (value));
601 create_fixed_operand (&ops[0], xop0);
602 create_integer_operand (&ops[1], bitsize);
603 create_integer_operand (&ops[2], bitnum);
604 create_input_operand (&ops[3], value1, op_mode);
605 if (maybe_expand_insn (insv->icode, 4, ops))
607 if (copy_back)
608 convert_move (op0, xop0, true);
609 return true;
611 delete_insns_since (last);
612 return false;
615 /* A subroutine of store_bit_field, with the same arguments. Return true
616 if the operation could be implemented.
618 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
619 no other way of implementing the operation. If FALLBACK_P is false,
620 return false instead. */
622 static bool
623 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
624 unsigned HOST_WIDE_INT bitnum,
625 unsigned HOST_WIDE_INT bitregion_start,
626 unsigned HOST_WIDE_INT bitregion_end,
627 enum machine_mode fieldmode,
628 rtx value, bool fallback_p)
630 rtx op0 = str_rtx;
631 rtx orig_value;
633 while (GET_CODE (op0) == SUBREG)
635 /* The following line once was done only if WORDS_BIG_ENDIAN,
636 but I think that is a mistake. WORDS_BIG_ENDIAN is
637 meaningful at a much higher level; when structures are copied
638 between memory and regs, the higher-numbered regs
639 always get higher addresses. */
640 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
641 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
642 int byte_offset = 0;
644 /* Paradoxical subregs need special handling on big endian machines. */
645 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
647 int difference = inner_mode_size - outer_mode_size;
649 if (WORDS_BIG_ENDIAN)
650 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
651 if (BYTES_BIG_ENDIAN)
652 byte_offset += difference % UNITS_PER_WORD;
654 else
655 byte_offset = SUBREG_BYTE (op0);
657 bitnum += byte_offset * BITS_PER_UNIT;
658 op0 = SUBREG_REG (op0);
661 /* No action is needed if the target is a register and if the field
662 lies completely outside that register. This can occur if the source
663 code contains an out-of-bounds access to a small array. */
664 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
665 return true;
667 /* Use vec_set patterns for inserting parts of vectors whenever
668 available. */
669 if (VECTOR_MODE_P (GET_MODE (op0))
670 && !MEM_P (op0)
671 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
672 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
673 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
674 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
676 struct expand_operand ops[3];
677 enum machine_mode outermode = GET_MODE (op0);
678 enum machine_mode innermode = GET_MODE_INNER (outermode);
679 enum insn_code icode = optab_handler (vec_set_optab, outermode);
680 int pos = bitnum / GET_MODE_BITSIZE (innermode);
682 create_fixed_operand (&ops[0], op0);
683 create_input_operand (&ops[1], value, innermode);
684 create_integer_operand (&ops[2], pos);
685 if (maybe_expand_insn (icode, 3, ops))
686 return true;
689 /* If the target is a register, overwriting the entire object, or storing
690 a full-word or multi-word field can be done with just a SUBREG. */
691 if (!MEM_P (op0)
692 && bitsize == GET_MODE_BITSIZE (fieldmode)
693 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
694 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
696 /* Use the subreg machinery either to narrow OP0 to the required
697 words or to cope with mode punning between equal-sized modes.
698 In the latter case, use subreg on the rhs side, not lhs. */
699 rtx sub;
701 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
703 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
704 if (sub)
706 emit_move_insn (op0, sub);
707 return true;
710 else
712 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
713 bitnum / BITS_PER_UNIT);
714 if (sub)
716 emit_move_insn (sub, value);
717 return true;
722 /* If the target is memory, storing any naturally aligned field can be
723 done with a simple store. For targets that support fast unaligned
724 memory, any naturally sized, unit aligned field can be done directly. */
725 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
727 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
728 emit_move_insn (op0, value);
729 return true;
732 /* Make sure we are playing with integral modes. Pun with subregs
733 if we aren't. This must come after the entire register case above,
734 since that case is valid for any mode. The following cases are only
735 valid for integral modes. */
737 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
738 if (imode != GET_MODE (op0))
740 if (MEM_P (op0))
741 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
742 else
744 gcc_assert (imode != BLKmode);
745 op0 = gen_lowpart (imode, op0);
750 /* Storing an lsb-aligned field in a register
751 can be done with a movstrict instruction. */
753 if (!MEM_P (op0)
754 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
755 && bitsize == GET_MODE_BITSIZE (fieldmode)
756 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
758 struct expand_operand ops[2];
759 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
760 rtx arg0 = op0;
761 unsigned HOST_WIDE_INT subreg_off;
763 if (GET_CODE (arg0) == SUBREG)
765 /* Else we've got some float mode source being extracted into
766 a different float mode destination -- this combination of
767 subregs results in Severe Tire Damage. */
768 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
769 || GET_MODE_CLASS (fieldmode) == MODE_INT
770 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
771 arg0 = SUBREG_REG (arg0);
774 subreg_off = bitnum / BITS_PER_UNIT;
775 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
777 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
779 create_fixed_operand (&ops[0], arg0);
780 /* Shrink the source operand to FIELDMODE. */
781 create_convert_operand_to (&ops[1], value, fieldmode, false);
782 if (maybe_expand_insn (icode, 2, ops))
783 return true;
787 /* Handle fields bigger than a word. */
789 if (bitsize > BITS_PER_WORD)
791 /* Here we transfer the words of the field
792 in the order least significant first.
793 This is because the most significant word is the one which may
794 be less than full.
795 However, only do that if the value is not BLKmode. */
797 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
798 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
799 unsigned int i;
800 rtx last;
802 /* This is the mode we must force value to, so that there will be enough
803 subwords to extract. Note that fieldmode will often (always?) be
804 VOIDmode, because that is what store_field uses to indicate that this
805 is a bit field, but passing VOIDmode to operand_subword_force
806 is not allowed. */
807 fieldmode = GET_MODE (value);
808 if (fieldmode == VOIDmode)
809 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
811 last = get_last_insn ();
812 for (i = 0; i < nwords; i++)
814 /* If I is 0, use the low-order word in both field and target;
815 if I is 1, use the next to lowest word; and so on. */
816 unsigned int wordnum = (backwards
817 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
818 - i - 1
819 : i);
820 unsigned int bit_offset = (backwards
821 ? MAX ((int) bitsize - ((int) i + 1)
822 * BITS_PER_WORD,
824 : (int) i * BITS_PER_WORD);
825 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
826 unsigned HOST_WIDE_INT new_bitsize =
827 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
829 /* If the remaining chunk doesn't have full wordsize we have
830 to make sure that for big endian machines the higher order
831 bits are used. */
832 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
833 value_word = simplify_expand_binop (word_mode, lshr_optab,
834 value_word,
835 GEN_INT (BITS_PER_WORD
836 - new_bitsize),
837 NULL_RTX, true,
838 OPTAB_LIB_WIDEN);
840 if (!store_bit_field_1 (op0, new_bitsize,
841 bitnum + bit_offset,
842 bitregion_start, bitregion_end,
843 word_mode,
844 value_word, fallback_p))
846 delete_insns_since (last);
847 return false;
850 return true;
853 /* If VALUE has a floating-point or complex mode, access it as an
854 integer of the corresponding size. This can occur on a machine
855 with 64 bit registers that uses SFmode for float. It can also
856 occur for unaligned float or complex fields. */
857 orig_value = value;
858 if (GET_MODE (value) != VOIDmode
859 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
860 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
862 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
863 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
866 /* If OP0 is a multi-word register, narrow it to the affected word.
867 If the region spans two words, defer to store_split_bit_field. */
868 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
870 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
871 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
872 gcc_assert (op0);
873 bitnum %= BITS_PER_WORD;
874 if (bitnum + bitsize > BITS_PER_WORD)
876 if (!fallback_p)
877 return false;
879 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
880 bitregion_end, value);
881 return true;
885 /* From here on we can assume that the field to be stored in fits
886 within a word. If the destination is a register, it too fits
887 in a word. */
889 extraction_insn insv;
890 if (!MEM_P (op0)
891 && get_best_reg_extraction_insn (&insv, EP_insv,
892 GET_MODE_BITSIZE (GET_MODE (op0)),
893 fieldmode)
894 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
895 return true;
897 /* If OP0 is a memory, try copying it to a register and seeing if a
898 cheap register alternative is available. */
899 if (MEM_P (op0))
901 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
902 fieldmode)
903 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
904 return true;
906 rtx last = get_last_insn ();
908 /* Try loading part of OP0 into a register, inserting the bitfield
909 into that, and then copying the result back to OP0. */
910 unsigned HOST_WIDE_INT bitpos;
911 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
912 bitregion_start, bitregion_end,
913 fieldmode, &bitpos);
914 if (xop0)
916 rtx tempreg = copy_to_reg (xop0);
917 if (store_bit_field_1 (tempreg, bitsize, bitpos,
918 bitregion_start, bitregion_end,
919 fieldmode, orig_value, false))
921 emit_move_insn (xop0, tempreg);
922 return true;
924 delete_insns_since (last);
928 if (!fallback_p)
929 return false;
931 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
932 bitregion_end, value);
933 return true;
936 /* Generate code to store value from rtx VALUE
937 into a bit-field within structure STR_RTX
938 containing BITSIZE bits starting at bit BITNUM.
940 BITREGION_START is bitpos of the first bitfield in this region.
941 BITREGION_END is the bitpos of the ending bitfield in this region.
942 These two fields are 0, if the C++ memory model does not apply,
943 or we are not interested in keeping track of bitfield regions.
945 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
947 void
948 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
949 unsigned HOST_WIDE_INT bitnum,
950 unsigned HOST_WIDE_INT bitregion_start,
951 unsigned HOST_WIDE_INT bitregion_end,
952 enum machine_mode fieldmode,
953 rtx value)
955 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
956 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
957 bitregion_start, bitregion_end))
959 /* Storing any naturally aligned field can be done with a simple
960 store. For targets that support fast unaligned memory, any
961 naturally sized, unit aligned field can be done directly. */
962 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
964 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
965 bitnum / BITS_PER_UNIT);
966 emit_move_insn (str_rtx, value);
968 else
970 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
971 &bitnum);
972 /* Explicitly override the C/C++ memory model; ignore the
973 bit range so that we can do the access in the mode mandated
974 by -fstrict-volatile-bitfields instead. */
975 store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
978 return;
981 /* Under the C++0x memory model, we must not touch bits outside the
982 bit region. Adjust the address to start at the beginning of the
983 bit region. */
984 if (MEM_P (str_rtx) && bitregion_start > 0)
986 enum machine_mode bestmode;
987 HOST_WIDE_INT offset, size;
989 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
991 offset = bitregion_start / BITS_PER_UNIT;
992 bitnum -= bitregion_start;
993 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
994 bitregion_end -= bitregion_start;
995 bitregion_start = 0;
996 bestmode = get_best_mode (bitsize, bitnum,
997 bitregion_start, bitregion_end,
998 MEM_ALIGN (str_rtx), VOIDmode,
999 MEM_VOLATILE_P (str_rtx));
1000 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1003 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1004 bitregion_start, bitregion_end,
1005 fieldmode, value, true))
1006 gcc_unreachable ();
1009 /* Use shifts and boolean operations to store VALUE into a bit field of
1010 width BITSIZE in OP0, starting at bit BITNUM. */
1012 static void
1013 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1014 unsigned HOST_WIDE_INT bitnum,
1015 unsigned HOST_WIDE_INT bitregion_start,
1016 unsigned HOST_WIDE_INT bitregion_end,
1017 rtx value)
1019 /* There is a case not handled here:
1020 a structure with a known alignment of just a halfword
1021 and a field split across two aligned halfwords within the structure.
1022 Or likewise a structure with a known alignment of just a byte
1023 and a field split across two bytes.
1024 Such cases are not supposed to be able to occur. */
1026 if (MEM_P (op0))
1028 enum machine_mode mode = GET_MODE (op0);
1029 if (GET_MODE_BITSIZE (mode) == 0
1030 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1031 mode = word_mode;
1032 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1033 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1035 if (mode == VOIDmode)
1037 /* The only way this should occur is if the field spans word
1038 boundaries. */
1039 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1040 bitregion_end, value);
1041 return;
1044 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1047 store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1050 /* Helper function for store_fixed_bit_field, stores
1051 the bit field always using the MODE of OP0. */
1053 static void
1054 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1055 unsigned HOST_WIDE_INT bitnum,
1056 rtx value)
1058 enum machine_mode mode;
1059 rtx temp;
1060 int all_zero = 0;
1061 int all_one = 0;
1063 mode = GET_MODE (op0);
1064 gcc_assert (SCALAR_INT_MODE_P (mode));
1066 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1067 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1069 if (BYTES_BIG_ENDIAN)
1070 /* BITNUM is the distance between our msb
1071 and that of the containing datum.
1072 Convert it to the distance from the lsb. */
1073 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1075 /* Now BITNUM is always the distance between our lsb
1076 and that of OP0. */
1078 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1079 we must first convert its mode to MODE. */
1081 if (CONST_INT_P (value))
1083 HOST_WIDE_INT v = INTVAL (value);
1085 if (bitsize < HOST_BITS_PER_WIDE_INT)
1086 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1088 if (v == 0)
1089 all_zero = 1;
1090 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1091 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1092 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1093 all_one = 1;
1095 value = lshift_value (mode, v, bitnum);
1097 else
1099 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1100 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1102 if (GET_MODE (value) != mode)
1103 value = convert_to_mode (mode, value, 1);
1105 if (must_and)
1106 value = expand_binop (mode, and_optab, value,
1107 mask_rtx (mode, 0, bitsize, 0),
1108 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1109 if (bitnum > 0)
1110 value = expand_shift (LSHIFT_EXPR, mode, value,
1111 bitnum, NULL_RTX, 1);
1114 /* Now clear the chosen bits in OP0,
1115 except that if VALUE is -1 we need not bother. */
1116 /* We keep the intermediates in registers to allow CSE to combine
1117 consecutive bitfield assignments. */
1119 temp = force_reg (mode, op0);
1121 if (! all_one)
1123 temp = expand_binop (mode, and_optab, temp,
1124 mask_rtx (mode, bitnum, bitsize, 1),
1125 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1126 temp = force_reg (mode, temp);
1129 /* Now logical-or VALUE into OP0, unless it is zero. */
1131 if (! all_zero)
1133 temp = expand_binop (mode, ior_optab, temp, value,
1134 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1135 temp = force_reg (mode, temp);
1138 if (op0 != temp)
1140 op0 = copy_rtx (op0);
1141 emit_move_insn (op0, temp);
1145 /* Store a bit field that is split across multiple accessible memory objects.
1147 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1148 BITSIZE is the field width; BITPOS the position of its first bit
1149 (within the word).
1150 VALUE is the value to store.
1152 This does not yet handle fields wider than BITS_PER_WORD. */
1154 static void
1155 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1156 unsigned HOST_WIDE_INT bitpos,
1157 unsigned HOST_WIDE_INT bitregion_start,
1158 unsigned HOST_WIDE_INT bitregion_end,
1159 rtx value)
1161 unsigned int unit;
1162 unsigned int bitsdone = 0;
1164 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1165 much at a time. */
1166 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1167 unit = BITS_PER_WORD;
1168 else
1169 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1171 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1172 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1173 again, and we will mutually recurse forever. */
1174 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1175 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1177 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1178 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1179 that VALUE might be a floating-point constant. */
1180 if (CONSTANT_P (value) && !CONST_INT_P (value))
1182 rtx word = gen_lowpart_common (word_mode, value);
1184 if (word && (value != word))
1185 value = word;
1186 else
1187 value = gen_lowpart_common (word_mode,
1188 force_reg (GET_MODE (value) != VOIDmode
1189 ? GET_MODE (value)
1190 : word_mode, value));
1193 while (bitsdone < bitsize)
1195 unsigned HOST_WIDE_INT thissize;
1196 rtx part, word;
1197 unsigned HOST_WIDE_INT thispos;
1198 unsigned HOST_WIDE_INT offset;
1200 offset = (bitpos + bitsdone) / unit;
1201 thispos = (bitpos + bitsdone) % unit;
1203 /* When region of bytes we can touch is restricted, decrease
1204 UNIT close to the end of the region as needed. If op0 is a REG
1205 or SUBREG of REG, don't do this, as there can't be data races
1206 on a register and we can expand shorter code in some cases. */
1207 if (bitregion_end
1208 && unit > BITS_PER_UNIT
1209 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1210 && !REG_P (op0)
1211 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1213 unit = unit / 2;
1214 continue;
1217 /* THISSIZE must not overrun a word boundary. Otherwise,
1218 store_fixed_bit_field will call us again, and we will mutually
1219 recurse forever. */
1220 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1221 thissize = MIN (thissize, unit - thispos);
1223 if (BYTES_BIG_ENDIAN)
1225 /* Fetch successively less significant portions. */
1226 if (CONST_INT_P (value))
1227 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1228 >> (bitsize - bitsdone - thissize))
1229 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1230 else
1232 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1233 /* The args are chosen so that the last part includes the
1234 lsb. Give extract_bit_field the value it needs (with
1235 endianness compensation) to fetch the piece we want. */
1236 part = extract_fixed_bit_field (word_mode, value, thissize,
1237 total_bits - bitsize + bitsdone,
1238 NULL_RTX, 1);
1241 else
1243 /* Fetch successively more significant portions. */
1244 if (CONST_INT_P (value))
1245 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1246 >> bitsdone)
1247 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1248 else
1249 part = extract_fixed_bit_field (word_mode, value, thissize,
1250 bitsdone, NULL_RTX, 1);
1253 /* If OP0 is a register, then handle OFFSET here.
1255 When handling multiword bitfields, extract_bit_field may pass
1256 down a word_mode SUBREG of a larger REG for a bitfield that actually
1257 crosses a word boundary. Thus, for a SUBREG, we must find
1258 the current word starting from the base register. */
1259 if (GET_CODE (op0) == SUBREG)
1261 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1262 + (offset * unit / BITS_PER_WORD);
1263 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1264 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1265 word = word_offset ? const0_rtx : op0;
1266 else
1267 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1268 GET_MODE (SUBREG_REG (op0)));
1269 offset &= BITS_PER_WORD / unit - 1;
1271 else if (REG_P (op0))
1273 enum machine_mode op0_mode = GET_MODE (op0);
1274 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1275 word = offset ? const0_rtx : op0;
1276 else
1277 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1278 GET_MODE (op0));
1279 offset &= BITS_PER_WORD / unit - 1;
1281 else
1282 word = op0;
1284 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1285 it is just an out-of-bounds access. Ignore it. */
1286 if (word != const0_rtx)
1287 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1288 bitregion_start, bitregion_end, part);
1289 bitsdone += thissize;
1293 /* A subroutine of extract_bit_field_1 that converts return value X
1294 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1295 to extract_bit_field. */
1297 static rtx
1298 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1299 enum machine_mode tmode, bool unsignedp)
1301 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1302 return x;
1304 /* If the x mode is not a scalar integral, first convert to the
1305 integer mode of that size and then access it as a floating-point
1306 value via a SUBREG. */
1307 if (!SCALAR_INT_MODE_P (tmode))
1309 enum machine_mode smode;
1311 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1312 x = convert_to_mode (smode, x, unsignedp);
1313 x = force_reg (smode, x);
1314 return gen_lowpart (tmode, x);
1317 return convert_to_mode (tmode, x, unsignedp);
1320 /* Try to use an ext(z)v pattern to extract a field from OP0.
1321 Return the extracted value on success, otherwise return null.
1322 EXT_MODE is the mode of the extraction and the other arguments
1323 are as for extract_bit_field. */
1325 static rtx
1326 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1327 unsigned HOST_WIDE_INT bitsize,
1328 unsigned HOST_WIDE_INT bitnum,
1329 int unsignedp, rtx target,
1330 enum machine_mode mode, enum machine_mode tmode)
1332 struct expand_operand ops[4];
1333 rtx spec_target = target;
1334 rtx spec_target_subreg = 0;
1335 enum machine_mode ext_mode = extv->field_mode;
1336 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1338 if (bitsize == 0 || unit < bitsize)
1339 return NULL_RTX;
1341 if (MEM_P (op0))
1342 /* Get a reference to the first byte of the field. */
1343 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1344 &bitnum);
1345 else
1347 /* Convert from counting within OP0 to counting in EXT_MODE. */
1348 if (BYTES_BIG_ENDIAN)
1349 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1351 /* If op0 is a register, we need it in EXT_MODE to make it
1352 acceptable to the format of ext(z)v. */
1353 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1354 return NULL_RTX;
1355 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1356 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1359 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1360 "backwards" from the size of the unit we are extracting from.
1361 Otherwise, we count bits from the most significant on a
1362 BYTES/BITS_BIG_ENDIAN machine. */
1364 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1365 bitnum = unit - bitsize - bitnum;
1367 if (target == 0)
1368 target = spec_target = gen_reg_rtx (tmode);
1370 if (GET_MODE (target) != ext_mode)
1372 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1373 between the mode of the extraction (word_mode) and the target
1374 mode. Instead, create a temporary and use convert_move to set
1375 the target. */
1376 if (REG_P (target)
1377 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1379 target = gen_lowpart (ext_mode, target);
1380 if (GET_MODE_PRECISION (ext_mode)
1381 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1382 spec_target_subreg = target;
1384 else
1385 target = gen_reg_rtx (ext_mode);
1388 create_output_operand (&ops[0], target, ext_mode);
1389 create_fixed_operand (&ops[1], op0);
1390 create_integer_operand (&ops[2], bitsize);
1391 create_integer_operand (&ops[3], bitnum);
1392 if (maybe_expand_insn (extv->icode, 4, ops))
1394 target = ops[0].value;
1395 if (target == spec_target)
1396 return target;
1397 if (target == spec_target_subreg)
1398 return spec_target;
1399 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1401 return NULL_RTX;
1404 /* A subroutine of extract_bit_field, with the same arguments.
1405 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1406 if we can find no other means of implementing the operation.
1407 if FALLBACK_P is false, return NULL instead. */
1409 static rtx
1410 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1411 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1412 enum machine_mode mode, enum machine_mode tmode,
1413 bool fallback_p)
1415 rtx op0 = str_rtx;
1416 enum machine_mode int_mode;
1417 enum machine_mode mode1;
1419 if (tmode == VOIDmode)
1420 tmode = mode;
1422 while (GET_CODE (op0) == SUBREG)
1424 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1425 op0 = SUBREG_REG (op0);
1428 /* If we have an out-of-bounds access to a register, just return an
1429 uninitialized register of the required mode. This can occur if the
1430 source code contains an out-of-bounds access to a small array. */
1431 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1432 return gen_reg_rtx (tmode);
1434 if (REG_P (op0)
1435 && mode == GET_MODE (op0)
1436 && bitnum == 0
1437 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1439 /* We're trying to extract a full register from itself. */
1440 return op0;
1443 /* See if we can get a better vector mode before extracting. */
1444 if (VECTOR_MODE_P (GET_MODE (op0))
1445 && !MEM_P (op0)
1446 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1448 enum machine_mode new_mode;
1450 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1451 new_mode = MIN_MODE_VECTOR_FLOAT;
1452 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1453 new_mode = MIN_MODE_VECTOR_FRACT;
1454 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1455 new_mode = MIN_MODE_VECTOR_UFRACT;
1456 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1457 new_mode = MIN_MODE_VECTOR_ACCUM;
1458 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1459 new_mode = MIN_MODE_VECTOR_UACCUM;
1460 else
1461 new_mode = MIN_MODE_VECTOR_INT;
1463 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1464 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1465 && targetm.vector_mode_supported_p (new_mode))
1466 break;
1467 if (new_mode != VOIDmode)
1468 op0 = gen_lowpart (new_mode, op0);
1471 /* Use vec_extract patterns for extracting parts of vectors whenever
1472 available. */
1473 if (VECTOR_MODE_P (GET_MODE (op0))
1474 && !MEM_P (op0)
1475 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1476 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1477 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1479 struct expand_operand ops[3];
1480 enum machine_mode outermode = GET_MODE (op0);
1481 enum machine_mode innermode = GET_MODE_INNER (outermode);
1482 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1483 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1485 create_output_operand (&ops[0], target, innermode);
1486 create_input_operand (&ops[1], op0, outermode);
1487 create_integer_operand (&ops[2], pos);
1488 if (maybe_expand_insn (icode, 3, ops))
1490 target = ops[0].value;
1491 if (GET_MODE (target) != mode)
1492 return gen_lowpart (tmode, target);
1493 return target;
1497 /* Make sure we are playing with integral modes. Pun with subregs
1498 if we aren't. */
1500 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1501 if (imode != GET_MODE (op0))
1503 if (MEM_P (op0))
1504 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1505 else if (imode != BLKmode)
1507 op0 = gen_lowpart (imode, op0);
1509 /* If we got a SUBREG, force it into a register since we
1510 aren't going to be able to do another SUBREG on it. */
1511 if (GET_CODE (op0) == SUBREG)
1512 op0 = force_reg (imode, op0);
1514 else if (REG_P (op0))
1516 rtx reg, subreg;
1517 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1518 MODE_INT);
1519 reg = gen_reg_rtx (imode);
1520 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1521 emit_move_insn (subreg, op0);
1522 op0 = reg;
1523 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1525 else
1527 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1528 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1529 emit_move_insn (mem, op0);
1530 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1535 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1536 If that's wrong, the solution is to test for it and set TARGET to 0
1537 if needed. */
1539 /* Get the mode of the field to use for atomic access or subreg
1540 conversion. */
1541 mode1 = mode;
1542 if (SCALAR_INT_MODE_P (tmode))
1544 enum machine_mode try_mode = mode_for_size (bitsize,
1545 GET_MODE_CLASS (tmode), 0);
1546 if (try_mode != BLKmode)
1547 mode1 = try_mode;
1549 gcc_assert (mode1 != BLKmode);
1551 /* Extraction of a full MODE1 value can be done with a subreg as long
1552 as the least significant bit of the value is the least significant
1553 bit of either OP0 or a word of OP0. */
1554 if (!MEM_P (op0)
1555 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1556 && bitsize == GET_MODE_BITSIZE (mode1)
1557 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1559 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1560 bitnum / BITS_PER_UNIT);
1561 if (sub)
1562 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1565 /* Extraction of a full MODE1 value can be done with a load as long as
1566 the field is on a byte boundary and is sufficiently aligned. */
1567 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1569 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1570 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1573 /* Handle fields bigger than a word. */
1575 if (bitsize > BITS_PER_WORD)
1577 /* Here we transfer the words of the field
1578 in the order least significant first.
1579 This is because the most significant word is the one which may
1580 be less than full. */
1582 unsigned int backwards = WORDS_BIG_ENDIAN;
1583 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1584 unsigned int i;
1585 rtx last;
1587 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1588 target = gen_reg_rtx (mode);
1590 /* Indicate for flow that the entire target reg is being set. */
1591 emit_clobber (target);
1593 last = get_last_insn ();
1594 for (i = 0; i < nwords; i++)
1596 /* If I is 0, use the low-order word in both field and target;
1597 if I is 1, use the next to lowest word; and so on. */
1598 /* Word number in TARGET to use. */
1599 unsigned int wordnum
1600 = (backwards
1601 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1602 : i);
1603 /* Offset from start of field in OP0. */
1604 unsigned int bit_offset = (backwards
1605 ? MAX ((int) bitsize - ((int) i + 1)
1606 * BITS_PER_WORD,
1608 : (int) i * BITS_PER_WORD);
1609 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1610 rtx result_part
1611 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1612 bitsize - i * BITS_PER_WORD),
1613 bitnum + bit_offset, 1, target_part,
1614 mode, word_mode, fallback_p);
1616 gcc_assert (target_part);
1617 if (!result_part)
1619 delete_insns_since (last);
1620 return NULL;
1623 if (result_part != target_part)
1624 emit_move_insn (target_part, result_part);
1627 if (unsignedp)
1629 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1630 need to be zero'd out. */
1631 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1633 unsigned int i, total_words;
1635 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1636 for (i = nwords; i < total_words; i++)
1637 emit_move_insn
1638 (operand_subword (target,
1639 backwards ? total_words - i - 1 : i,
1640 1, VOIDmode),
1641 const0_rtx);
1643 return target;
1646 /* Signed bit field: sign-extend with two arithmetic shifts. */
1647 target = expand_shift (LSHIFT_EXPR, mode, target,
1648 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1649 return expand_shift (RSHIFT_EXPR, mode, target,
1650 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1653 /* If OP0 is a multi-word register, narrow it to the affected word.
1654 If the region spans two words, defer to extract_split_bit_field. */
1655 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1657 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1658 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1659 bitnum %= BITS_PER_WORD;
1660 if (bitnum + bitsize > BITS_PER_WORD)
1662 if (!fallback_p)
1663 return NULL_RTX;
1664 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1665 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1669 /* From here on we know the desired field is smaller than a word.
1670 If OP0 is a register, it too fits within a word. */
1671 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1672 extraction_insn extv;
1673 if (!MEM_P (op0)
1674 /* ??? We could limit the structure size to the part of OP0 that
1675 contains the field, with appropriate checks for endianness
1676 and TRULY_NOOP_TRUNCATION. */
1677 && get_best_reg_extraction_insn (&extv, pattern,
1678 GET_MODE_BITSIZE (GET_MODE (op0)),
1679 tmode))
1681 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1682 unsignedp, target, mode,
1683 tmode);
1684 if (result)
1685 return result;
1688 /* If OP0 is a memory, try copying it to a register and seeing if a
1689 cheap register alternative is available. */
1690 if (MEM_P (op0))
1692 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1693 tmode))
1695 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1696 bitnum, unsignedp,
1697 target, mode,
1698 tmode);
1699 if (result)
1700 return result;
1703 rtx last = get_last_insn ();
1705 /* Try loading part of OP0 into a register and extracting the
1706 bitfield from that. */
1707 unsigned HOST_WIDE_INT bitpos;
1708 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1709 0, 0, tmode, &bitpos);
1710 if (xop0)
1712 xop0 = copy_to_reg (xop0);
1713 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1714 unsignedp, target,
1715 mode, tmode, false);
1716 if (result)
1717 return result;
1718 delete_insns_since (last);
1722 if (!fallback_p)
1723 return NULL;
1725 /* Find a correspondingly-sized integer field, so we can apply
1726 shifts and masks to it. */
1727 int_mode = int_mode_for_mode (tmode);
1728 if (int_mode == BLKmode)
1729 int_mode = int_mode_for_mode (mode);
1730 /* Should probably push op0 out to memory and then do a load. */
1731 gcc_assert (int_mode != BLKmode);
1733 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1734 target, unsignedp);
1735 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1738 /* Generate code to extract a byte-field from STR_RTX
1739 containing BITSIZE bits, starting at BITNUM,
1740 and put it in TARGET if possible (if TARGET is nonzero).
1741 Regardless of TARGET, we return the rtx for where the value is placed.
1743 STR_RTX is the structure containing the byte (a REG or MEM).
1744 UNSIGNEDP is nonzero if this is an unsigned bit field.
1745 MODE is the natural mode of the field value once extracted.
1746 TMODE is the mode the caller would like the value to have;
1747 but the value may be returned with type MODE instead.
1749 If a TARGET is specified and we can store in it at no extra cost,
1750 we do so, and return TARGET.
1751 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1752 if they are equally easy. */
1755 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1756 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1757 enum machine_mode mode, enum machine_mode tmode)
1759 enum machine_mode mode1;
1761 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1762 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1763 mode1 = GET_MODE (str_rtx);
1764 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1765 mode1 = GET_MODE (target);
1766 else
1767 mode1 = tmode;
1769 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1771 rtx result;
1773 /* Extraction of a full MODE1 value can be done with a load as long as
1774 the field is on a byte boundary and is sufficiently aligned. */
1775 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1776 result = adjust_bitfield_address (str_rtx, mode1,
1777 bitnum / BITS_PER_UNIT);
1778 else
1780 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1781 &bitnum);
1782 result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1783 target, unsignedp);
1786 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1789 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1790 target, mode, tmode, true);
1793 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1794 from bit BITNUM of OP0.
1796 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1797 If TARGET is nonzero, attempts to store the value there
1798 and return TARGET, but this is not guaranteed.
1799 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1801 static rtx
1802 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1803 unsigned HOST_WIDE_INT bitsize,
1804 unsigned HOST_WIDE_INT bitnum, rtx target,
1805 int unsignedp)
1807 if (MEM_P (op0))
1809 enum machine_mode mode
1810 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1811 MEM_VOLATILE_P (op0));
1813 if (mode == VOIDmode)
1814 /* The only way this should occur is if the field spans word
1815 boundaries. */
1816 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1818 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1821 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1822 target, unsignedp);
1825 /* Helper function for extract_fixed_bit_field, extracts
1826 the bit field always using the MODE of OP0. */
1828 static rtx
1829 extract_fixed_bit_field_1 (enum machine_mode tmode, rtx op0,
1830 unsigned HOST_WIDE_INT bitsize,
1831 unsigned HOST_WIDE_INT bitnum, rtx target,
1832 int unsignedp)
1834 enum machine_mode mode = GET_MODE (op0);
1835 gcc_assert (SCALAR_INT_MODE_P (mode));
1837 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1838 for invalid input, such as extract equivalent of f5 from
1839 gcc.dg/pr48335-2.c. */
1841 if (BYTES_BIG_ENDIAN)
1842 /* BITNUM is the distance between our msb and that of OP0.
1843 Convert it to the distance from the lsb. */
1844 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1846 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1847 We have reduced the big-endian case to the little-endian case. */
1849 if (unsignedp)
1851 if (bitnum)
1853 /* If the field does not already start at the lsb,
1854 shift it so it does. */
1855 /* Maybe propagate the target for the shift. */
1856 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1857 if (tmode != mode)
1858 subtarget = 0;
1859 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1861 /* Convert the value to the desired mode. */
1862 if (mode != tmode)
1863 op0 = convert_to_mode (tmode, op0, 1);
1865 /* Unless the msb of the field used to be the msb when we shifted,
1866 mask out the upper bits. */
1868 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1869 return expand_binop (GET_MODE (op0), and_optab, op0,
1870 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1871 target, 1, OPTAB_LIB_WIDEN);
1872 return op0;
1875 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1876 then arithmetic-shift its lsb to the lsb of the word. */
1877 op0 = force_reg (mode, op0);
1879 /* Find the narrowest integer mode that contains the field. */
1881 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1882 mode = GET_MODE_WIDER_MODE (mode))
1883 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1885 op0 = convert_to_mode (mode, op0, 0);
1886 break;
1889 if (mode != tmode)
1890 target = 0;
1892 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1894 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1895 /* Maybe propagate the target for the shift. */
1896 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1897 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1900 return expand_shift (RSHIFT_EXPR, mode, op0,
1901 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1904 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1905 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1906 complement of that if COMPLEMENT. The mask is truncated if
1907 necessary to the width of mode MODE. The mask is zero-extended if
1908 BITSIZE+BITPOS is too small for MODE. */
1910 static rtx
1911 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1913 double_int mask;
1915 mask = double_int::mask (bitsize);
1916 mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1918 if (complement)
1919 mask = ~mask;
1921 return immed_double_int_const (mask, mode);
1924 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1925 VALUE << BITPOS. */
1927 static rtx
1928 lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
1929 int bitpos)
1931 double_int val;
1933 val = double_int::from_uhwi (value);
1934 val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1936 return immed_double_int_const (val, mode);
1939 /* Extract a bit field that is split across two words
1940 and return an RTX for the result.
1942 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1943 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1944 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1946 static rtx
1947 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1948 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1950 unsigned int unit;
1951 unsigned int bitsdone = 0;
1952 rtx result = NULL_RTX;
1953 int first = 1;
1955 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1956 much at a time. */
1957 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1958 unit = BITS_PER_WORD;
1959 else
1960 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1962 while (bitsdone < bitsize)
1964 unsigned HOST_WIDE_INT thissize;
1965 rtx part, word;
1966 unsigned HOST_WIDE_INT thispos;
1967 unsigned HOST_WIDE_INT offset;
1969 offset = (bitpos + bitsdone) / unit;
1970 thispos = (bitpos + bitsdone) % unit;
1972 /* THISSIZE must not overrun a word boundary. Otherwise,
1973 extract_fixed_bit_field will call us again, and we will mutually
1974 recurse forever. */
1975 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1976 thissize = MIN (thissize, unit - thispos);
1978 /* If OP0 is a register, then handle OFFSET here.
1980 When handling multiword bitfields, extract_bit_field may pass
1981 down a word_mode SUBREG of a larger REG for a bitfield that actually
1982 crosses a word boundary. Thus, for a SUBREG, we must find
1983 the current word starting from the base register. */
1984 if (GET_CODE (op0) == SUBREG)
1986 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1987 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1988 GET_MODE (SUBREG_REG (op0)));
1989 offset = 0;
1991 else if (REG_P (op0))
1993 word = operand_subword_force (op0, offset, GET_MODE (op0));
1994 offset = 0;
1996 else
1997 word = op0;
1999 /* Extract the parts in bit-counting order,
2000 whose meaning is determined by BYTES_PER_UNIT.
2001 OFFSET is in UNITs, and UNIT is in bits. */
2002 part = extract_fixed_bit_field (word_mode, word, thissize,
2003 offset * unit + thispos, 0, 1);
2004 bitsdone += thissize;
2006 /* Shift this part into place for the result. */
2007 if (BYTES_BIG_ENDIAN)
2009 if (bitsize != bitsdone)
2010 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2011 bitsize - bitsdone, 0, 1);
2013 else
2015 if (bitsdone != thissize)
2016 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2017 bitsdone - thissize, 0, 1);
2020 if (first)
2021 result = part;
2022 else
2023 /* Combine the parts with bitwise or. This works
2024 because we extracted each part as an unsigned bit field. */
2025 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2026 OPTAB_LIB_WIDEN);
2028 first = 0;
2031 /* Unsigned bit field: we are done. */
2032 if (unsignedp)
2033 return result;
2034 /* Signed bit field: sign-extend with two arithmetic shifts. */
2035 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2036 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2037 return expand_shift (RSHIFT_EXPR, word_mode, result,
2038 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2041 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2042 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2043 MODE, fill the upper bits with zeros. Fail if the layout of either
2044 mode is unknown (as for CC modes) or if the extraction would involve
2045 unprofitable mode punning. Return the value on success, otherwise
2046 return null.
2048 This is different from gen_lowpart* in these respects:
2050 - the returned value must always be considered an rvalue
2052 - when MODE is wider than SRC_MODE, the extraction involves
2053 a zero extension
2055 - when MODE is smaller than SRC_MODE, the extraction involves
2056 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2058 In other words, this routine performs a computation, whereas the
2059 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2060 operations. */
2063 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2065 enum machine_mode int_mode, src_int_mode;
2067 if (mode == src_mode)
2068 return src;
2070 if (CONSTANT_P (src))
2072 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2073 fails, it will happily create (subreg (symbol_ref)) or similar
2074 invalid SUBREGs. */
2075 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2076 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2077 if (ret)
2078 return ret;
2080 if (GET_MODE (src) == VOIDmode
2081 || !validate_subreg (mode, src_mode, src, byte))
2082 return NULL_RTX;
2084 src = force_reg (GET_MODE (src), src);
2085 return gen_rtx_SUBREG (mode, src, byte);
2088 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2089 return NULL_RTX;
2091 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2092 && MODES_TIEABLE_P (mode, src_mode))
2094 rtx x = gen_lowpart_common (mode, src);
2095 if (x)
2096 return x;
2099 src_int_mode = int_mode_for_mode (src_mode);
2100 int_mode = int_mode_for_mode (mode);
2101 if (src_int_mode == BLKmode || int_mode == BLKmode)
2102 return NULL_RTX;
2104 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2105 return NULL_RTX;
2106 if (!MODES_TIEABLE_P (int_mode, mode))
2107 return NULL_RTX;
2109 src = gen_lowpart (src_int_mode, src);
2110 src = convert_modes (int_mode, src_int_mode, src, true);
2111 src = gen_lowpart (mode, src);
2112 return src;
2115 /* Add INC into TARGET. */
2117 void
2118 expand_inc (rtx target, rtx inc)
2120 rtx value = expand_binop (GET_MODE (target), add_optab,
2121 target, inc,
2122 target, 0, OPTAB_LIB_WIDEN);
2123 if (value != target)
2124 emit_move_insn (target, value);
2127 /* Subtract DEC from TARGET. */
2129 void
2130 expand_dec (rtx target, rtx dec)
2132 rtx value = expand_binop (GET_MODE (target), sub_optab,
2133 target, dec,
2134 target, 0, OPTAB_LIB_WIDEN);
2135 if (value != target)
2136 emit_move_insn (target, value);
2139 /* Output a shift instruction for expression code CODE,
2140 with SHIFTED being the rtx for the value to shift,
2141 and AMOUNT the rtx for the amount to shift by.
2142 Store the result in the rtx TARGET, if that is convenient.
2143 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2144 Return the rtx for where the value is. */
2146 static rtx
2147 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2148 rtx amount, rtx target, int unsignedp)
2150 rtx op1, temp = 0;
2151 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2152 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2153 optab lshift_optab = ashl_optab;
2154 optab rshift_arith_optab = ashr_optab;
2155 optab rshift_uns_optab = lshr_optab;
2156 optab lrotate_optab = rotl_optab;
2157 optab rrotate_optab = rotr_optab;
2158 enum machine_mode op1_mode;
2159 enum machine_mode scalar_mode = mode;
2160 int attempt;
2161 bool speed = optimize_insn_for_speed_p ();
2163 if (VECTOR_MODE_P (mode))
2164 scalar_mode = GET_MODE_INNER (mode);
2165 op1 = amount;
2166 op1_mode = GET_MODE (op1);
2168 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2169 shift amount is a vector, use the vector/vector shift patterns. */
2170 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2172 lshift_optab = vashl_optab;
2173 rshift_arith_optab = vashr_optab;
2174 rshift_uns_optab = vlshr_optab;
2175 lrotate_optab = vrotl_optab;
2176 rrotate_optab = vrotr_optab;
2179 /* Previously detected shift-counts computed by NEGATE_EXPR
2180 and shifted in the other direction; but that does not work
2181 on all machines. */
2183 if (SHIFT_COUNT_TRUNCATED)
2185 if (CONST_INT_P (op1)
2186 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2187 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2188 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2189 % GET_MODE_BITSIZE (scalar_mode));
2190 else if (GET_CODE (op1) == SUBREG
2191 && subreg_lowpart_p (op1)
2192 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2193 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2194 op1 = SUBREG_REG (op1);
2197 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2198 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2199 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2200 amount instead. */
2201 if (rotate
2202 && CONST_INT_P (op1)
2203 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2204 GET_MODE_BITSIZE (scalar_mode) - 1))
2206 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2207 left = !left;
2208 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2211 if (op1 == const0_rtx)
2212 return shifted;
2214 /* Check whether its cheaper to implement a left shift by a constant
2215 bit count by a sequence of additions. */
2216 if (code == LSHIFT_EXPR
2217 && CONST_INT_P (op1)
2218 && INTVAL (op1) > 0
2219 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2220 && INTVAL (op1) < MAX_BITS_PER_WORD
2221 && (shift_cost (speed, mode, INTVAL (op1))
2222 > INTVAL (op1) * add_cost (speed, mode))
2223 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2225 int i;
2226 for (i = 0; i < INTVAL (op1); i++)
2228 temp = force_reg (mode, shifted);
2229 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2230 unsignedp, OPTAB_LIB_WIDEN);
2232 return shifted;
2235 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2237 enum optab_methods methods;
2239 if (attempt == 0)
2240 methods = OPTAB_DIRECT;
2241 else if (attempt == 1)
2242 methods = OPTAB_WIDEN;
2243 else
2244 methods = OPTAB_LIB_WIDEN;
2246 if (rotate)
2248 /* Widening does not work for rotation. */
2249 if (methods == OPTAB_WIDEN)
2250 continue;
2251 else if (methods == OPTAB_LIB_WIDEN)
2253 /* If we have been unable to open-code this by a rotation,
2254 do it as the IOR of two shifts. I.e., to rotate A
2255 by N bits, compute
2256 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2257 where C is the bitsize of A.
2259 It is theoretically possible that the target machine might
2260 not be able to perform either shift and hence we would
2261 be making two libcalls rather than just the one for the
2262 shift (similarly if IOR could not be done). We will allow
2263 this extremely unlikely lossage to avoid complicating the
2264 code below. */
2266 rtx subtarget = target == shifted ? 0 : target;
2267 rtx new_amount, other_amount;
2268 rtx temp1;
2270 new_amount = op1;
2271 if (op1 == const0_rtx)
2272 return shifted;
2273 else if (CONST_INT_P (op1))
2274 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2275 - INTVAL (op1));
2276 else
2278 other_amount
2279 = simplify_gen_unary (NEG, GET_MODE (op1),
2280 op1, GET_MODE (op1));
2281 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2282 other_amount
2283 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2284 gen_int_mode (mask, GET_MODE (op1)));
2287 shifted = force_reg (mode, shifted);
2289 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2290 mode, shifted, new_amount, 0, 1);
2291 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2292 mode, shifted, other_amount,
2293 subtarget, 1);
2294 return expand_binop (mode, ior_optab, temp, temp1, target,
2295 unsignedp, methods);
2298 temp = expand_binop (mode,
2299 left ? lrotate_optab : rrotate_optab,
2300 shifted, op1, target, unsignedp, methods);
2302 else if (unsignedp)
2303 temp = expand_binop (mode,
2304 left ? lshift_optab : rshift_uns_optab,
2305 shifted, op1, target, unsignedp, methods);
2307 /* Do arithmetic shifts.
2308 Also, if we are going to widen the operand, we can just as well
2309 use an arithmetic right-shift instead of a logical one. */
2310 if (temp == 0 && ! rotate
2311 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2313 enum optab_methods methods1 = methods;
2315 /* If trying to widen a log shift to an arithmetic shift,
2316 don't accept an arithmetic shift of the same size. */
2317 if (unsignedp)
2318 methods1 = OPTAB_MUST_WIDEN;
2320 /* Arithmetic shift */
2322 temp = expand_binop (mode,
2323 left ? lshift_optab : rshift_arith_optab,
2324 shifted, op1, target, unsignedp, methods1);
2327 /* We used to try extzv here for logical right shifts, but that was
2328 only useful for one machine, the VAX, and caused poor code
2329 generation there for lshrdi3, so the code was deleted and a
2330 define_expand for lshrsi3 was added to vax.md. */
2333 gcc_assert (temp);
2334 return temp;
2337 /* Output a shift instruction for expression code CODE,
2338 with SHIFTED being the rtx for the value to shift,
2339 and AMOUNT the amount to shift by.
2340 Store the result in the rtx TARGET, if that is convenient.
2341 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2342 Return the rtx for where the value is. */
2345 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2346 int amount, rtx target, int unsignedp)
2348 return expand_shift_1 (code, mode,
2349 shifted, GEN_INT (amount), target, unsignedp);
2352 /* Output a shift instruction for expression code CODE,
2353 with SHIFTED being the rtx for the value to shift,
2354 and AMOUNT the tree for the amount to shift by.
2355 Store the result in the rtx TARGET, if that is convenient.
2356 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2357 Return the rtx for where the value is. */
2360 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2361 tree amount, rtx target, int unsignedp)
2363 return expand_shift_1 (code, mode,
2364 shifted, expand_normal (amount), target, unsignedp);
2368 /* Indicates the type of fixup needed after a constant multiplication.
2369 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2370 the result should be negated, and ADD_VARIANT means that the
2371 multiplicand should be added to the result. */
2372 enum mult_variant {basic_variant, negate_variant, add_variant};
2374 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2375 const struct mult_cost *, enum machine_mode mode);
2376 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2377 struct algorithm *, enum mult_variant *, int);
2378 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2379 const struct algorithm *, enum mult_variant);
2380 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2381 static rtx extract_high_half (enum machine_mode, rtx);
2382 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2383 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2384 int, int);
2385 /* Compute and return the best algorithm for multiplying by T.
2386 The algorithm must cost less than cost_limit
2387 If retval.cost >= COST_LIMIT, no algorithm was found and all
2388 other field of the returned struct are undefined.
2389 MODE is the machine mode of the multiplication. */
2391 static void
2392 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2393 const struct mult_cost *cost_limit, enum machine_mode mode)
2395 int m;
2396 struct algorithm *alg_in, *best_alg;
2397 struct mult_cost best_cost;
2398 struct mult_cost new_limit;
2399 int op_cost, op_latency;
2400 unsigned HOST_WIDE_INT orig_t = t;
2401 unsigned HOST_WIDE_INT q;
2402 int maxm, hash_index;
2403 bool cache_hit = false;
2404 enum alg_code cache_alg = alg_zero;
2405 bool speed = optimize_insn_for_speed_p ();
2406 enum machine_mode imode;
2407 struct alg_hash_entry *entry_ptr;
2409 /* Indicate that no algorithm is yet found. If no algorithm
2410 is found, this value will be returned and indicate failure. */
2411 alg_out->cost.cost = cost_limit->cost + 1;
2412 alg_out->cost.latency = cost_limit->latency + 1;
2414 if (cost_limit->cost < 0
2415 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2416 return;
2418 /* Be prepared for vector modes. */
2419 imode = GET_MODE_INNER (mode);
2420 if (imode == VOIDmode)
2421 imode = mode;
2423 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2425 /* Restrict the bits of "t" to the multiplication's mode. */
2426 t &= GET_MODE_MASK (imode);
2428 /* t == 1 can be done in zero cost. */
2429 if (t == 1)
2431 alg_out->ops = 1;
2432 alg_out->cost.cost = 0;
2433 alg_out->cost.latency = 0;
2434 alg_out->op[0] = alg_m;
2435 return;
2438 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2439 fail now. */
2440 if (t == 0)
2442 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2443 return;
2444 else
2446 alg_out->ops = 1;
2447 alg_out->cost.cost = zero_cost (speed);
2448 alg_out->cost.latency = zero_cost (speed);
2449 alg_out->op[0] = alg_zero;
2450 return;
2454 /* We'll be needing a couple extra algorithm structures now. */
2456 alg_in = XALLOCA (struct algorithm);
2457 best_alg = XALLOCA (struct algorithm);
2458 best_cost = *cost_limit;
2460 /* Compute the hash index. */
2461 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2463 /* See if we already know what to do for T. */
2464 entry_ptr = alg_hash_entry_ptr (hash_index);
2465 if (entry_ptr->t == t
2466 && entry_ptr->mode == mode
2467 && entry_ptr->mode == mode
2468 && entry_ptr->speed == speed
2469 && entry_ptr->alg != alg_unknown)
2471 cache_alg = entry_ptr->alg;
2473 if (cache_alg == alg_impossible)
2475 /* The cache tells us that it's impossible to synthesize
2476 multiplication by T within entry_ptr->cost. */
2477 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2478 /* COST_LIMIT is at least as restrictive as the one
2479 recorded in the hash table, in which case we have no
2480 hope of synthesizing a multiplication. Just
2481 return. */
2482 return;
2484 /* If we get here, COST_LIMIT is less restrictive than the
2485 one recorded in the hash table, so we may be able to
2486 synthesize a multiplication. Proceed as if we didn't
2487 have the cache entry. */
2489 else
2491 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2492 /* The cached algorithm shows that this multiplication
2493 requires more cost than COST_LIMIT. Just return. This
2494 way, we don't clobber this cache entry with
2495 alg_impossible but retain useful information. */
2496 return;
2498 cache_hit = true;
2500 switch (cache_alg)
2502 case alg_shift:
2503 goto do_alg_shift;
2505 case alg_add_t_m2:
2506 case alg_sub_t_m2:
2507 goto do_alg_addsub_t_m2;
2509 case alg_add_factor:
2510 case alg_sub_factor:
2511 goto do_alg_addsub_factor;
2513 case alg_add_t2_m:
2514 goto do_alg_add_t2_m;
2516 case alg_sub_t2_m:
2517 goto do_alg_sub_t2_m;
2519 default:
2520 gcc_unreachable ();
2525 /* If we have a group of zero bits at the low-order part of T, try
2526 multiplying by the remaining bits and then doing a shift. */
2528 if ((t & 1) == 0)
2530 do_alg_shift:
2531 m = floor_log2 (t & -t); /* m = number of low zero bits */
2532 if (m < maxm)
2534 q = t >> m;
2535 /* The function expand_shift will choose between a shift and
2536 a sequence of additions, so the observed cost is given as
2537 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2538 op_cost = m * add_cost (speed, mode);
2539 if (shift_cost (speed, mode, m) < op_cost)
2540 op_cost = shift_cost (speed, mode, m);
2541 new_limit.cost = best_cost.cost - op_cost;
2542 new_limit.latency = best_cost.latency - op_cost;
2543 synth_mult (alg_in, q, &new_limit, mode);
2545 alg_in->cost.cost += op_cost;
2546 alg_in->cost.latency += op_cost;
2547 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2549 struct algorithm *x;
2550 best_cost = alg_in->cost;
2551 x = alg_in, alg_in = best_alg, best_alg = x;
2552 best_alg->log[best_alg->ops] = m;
2553 best_alg->op[best_alg->ops] = alg_shift;
2556 /* See if treating ORIG_T as a signed number yields a better
2557 sequence. Try this sequence only for a negative ORIG_T
2558 as it would be useless for a non-negative ORIG_T. */
2559 if ((HOST_WIDE_INT) orig_t < 0)
2561 /* Shift ORIG_T as follows because a right shift of a
2562 negative-valued signed type is implementation
2563 defined. */
2564 q = ~(~orig_t >> m);
2565 /* The function expand_shift will choose between a shift
2566 and a sequence of additions, so the observed cost is
2567 given as MIN (m * add_cost(speed, mode),
2568 shift_cost(speed, mode, m)). */
2569 op_cost = m * add_cost (speed, mode);
2570 if (shift_cost (speed, mode, m) < op_cost)
2571 op_cost = shift_cost (speed, mode, m);
2572 new_limit.cost = best_cost.cost - op_cost;
2573 new_limit.latency = best_cost.latency - op_cost;
2574 synth_mult (alg_in, q, &new_limit, mode);
2576 alg_in->cost.cost += op_cost;
2577 alg_in->cost.latency += op_cost;
2578 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2580 struct algorithm *x;
2581 best_cost = alg_in->cost;
2582 x = alg_in, alg_in = best_alg, best_alg = x;
2583 best_alg->log[best_alg->ops] = m;
2584 best_alg->op[best_alg->ops] = alg_shift;
2588 if (cache_hit)
2589 goto done;
2592 /* If we have an odd number, add or subtract one. */
2593 if ((t & 1) != 0)
2595 unsigned HOST_WIDE_INT w;
2597 do_alg_addsub_t_m2:
2598 for (w = 1; (w & t) != 0; w <<= 1)
2600 /* If T was -1, then W will be zero after the loop. This is another
2601 case where T ends with ...111. Handling this with (T + 1) and
2602 subtract 1 produces slightly better code and results in algorithm
2603 selection much faster than treating it like the ...0111 case
2604 below. */
2605 if (w == 0
2606 || (w > 2
2607 /* Reject the case where t is 3.
2608 Thus we prefer addition in that case. */
2609 && t != 3))
2611 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2613 op_cost = add_cost (speed, mode);
2614 new_limit.cost = best_cost.cost - op_cost;
2615 new_limit.latency = best_cost.latency - op_cost;
2616 synth_mult (alg_in, t + 1, &new_limit, mode);
2618 alg_in->cost.cost += op_cost;
2619 alg_in->cost.latency += op_cost;
2620 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2622 struct algorithm *x;
2623 best_cost = alg_in->cost;
2624 x = alg_in, alg_in = best_alg, best_alg = x;
2625 best_alg->log[best_alg->ops] = 0;
2626 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2629 else
2631 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2633 op_cost = add_cost (speed, mode);
2634 new_limit.cost = best_cost.cost - op_cost;
2635 new_limit.latency = best_cost.latency - op_cost;
2636 synth_mult (alg_in, t - 1, &new_limit, mode);
2638 alg_in->cost.cost += op_cost;
2639 alg_in->cost.latency += op_cost;
2640 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2642 struct algorithm *x;
2643 best_cost = alg_in->cost;
2644 x = alg_in, alg_in = best_alg, best_alg = x;
2645 best_alg->log[best_alg->ops] = 0;
2646 best_alg->op[best_alg->ops] = alg_add_t_m2;
2650 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2651 quickly with a - a * n for some appropriate constant n. */
2652 m = exact_log2 (-orig_t + 1);
2653 if (m >= 0 && m < maxm)
2655 op_cost = shiftsub1_cost (speed, mode, m);
2656 new_limit.cost = best_cost.cost - op_cost;
2657 new_limit.latency = best_cost.latency - op_cost;
2658 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2659 &new_limit, mode);
2661 alg_in->cost.cost += op_cost;
2662 alg_in->cost.latency += op_cost;
2663 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2665 struct algorithm *x;
2666 best_cost = alg_in->cost;
2667 x = alg_in, alg_in = best_alg, best_alg = x;
2668 best_alg->log[best_alg->ops] = m;
2669 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2673 if (cache_hit)
2674 goto done;
2677 /* Look for factors of t of the form
2678 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2679 If we find such a factor, we can multiply by t using an algorithm that
2680 multiplies by q, shift the result by m and add/subtract it to itself.
2682 We search for large factors first and loop down, even if large factors
2683 are less probable than small; if we find a large factor we will find a
2684 good sequence quickly, and therefore be able to prune (by decreasing
2685 COST_LIMIT) the search. */
2687 do_alg_addsub_factor:
2688 for (m = floor_log2 (t - 1); m >= 2; m--)
2690 unsigned HOST_WIDE_INT d;
2692 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2693 if (t % d == 0 && t > d && m < maxm
2694 && (!cache_hit || cache_alg == alg_add_factor))
2696 /* If the target has a cheap shift-and-add instruction use
2697 that in preference to a shift insn followed by an add insn.
2698 Assume that the shift-and-add is "atomic" with a latency
2699 equal to its cost, otherwise assume that on superscalar
2700 hardware the shift may be executed concurrently with the
2701 earlier steps in the algorithm. */
2702 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2703 if (shiftadd_cost (speed, mode, m) < op_cost)
2705 op_cost = shiftadd_cost (speed, mode, m);
2706 op_latency = op_cost;
2708 else
2709 op_latency = add_cost (speed, mode);
2711 new_limit.cost = best_cost.cost - op_cost;
2712 new_limit.latency = best_cost.latency - op_latency;
2713 synth_mult (alg_in, t / d, &new_limit, mode);
2715 alg_in->cost.cost += op_cost;
2716 alg_in->cost.latency += op_latency;
2717 if (alg_in->cost.latency < op_cost)
2718 alg_in->cost.latency = op_cost;
2719 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2721 struct algorithm *x;
2722 best_cost = alg_in->cost;
2723 x = alg_in, alg_in = best_alg, best_alg = x;
2724 best_alg->log[best_alg->ops] = m;
2725 best_alg->op[best_alg->ops] = alg_add_factor;
2727 /* Other factors will have been taken care of in the recursion. */
2728 break;
2731 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2732 if (t % d == 0 && t > d && m < maxm
2733 && (!cache_hit || cache_alg == alg_sub_factor))
2735 /* If the target has a cheap shift-and-subtract insn use
2736 that in preference to a shift insn followed by a sub insn.
2737 Assume that the shift-and-sub is "atomic" with a latency
2738 equal to it's cost, otherwise assume that on superscalar
2739 hardware the shift may be executed concurrently with the
2740 earlier steps in the algorithm. */
2741 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2742 if (shiftsub0_cost (speed, mode, m) < op_cost)
2744 op_cost = shiftsub0_cost (speed, mode, m);
2745 op_latency = op_cost;
2747 else
2748 op_latency = add_cost (speed, mode);
2750 new_limit.cost = best_cost.cost - op_cost;
2751 new_limit.latency = best_cost.latency - op_latency;
2752 synth_mult (alg_in, t / d, &new_limit, mode);
2754 alg_in->cost.cost += op_cost;
2755 alg_in->cost.latency += op_latency;
2756 if (alg_in->cost.latency < op_cost)
2757 alg_in->cost.latency = op_cost;
2758 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2760 struct algorithm *x;
2761 best_cost = alg_in->cost;
2762 x = alg_in, alg_in = best_alg, best_alg = x;
2763 best_alg->log[best_alg->ops] = m;
2764 best_alg->op[best_alg->ops] = alg_sub_factor;
2766 break;
2769 if (cache_hit)
2770 goto done;
2772 /* Try shift-and-add (load effective address) instructions,
2773 i.e. do a*3, a*5, a*9. */
2774 if ((t & 1) != 0)
2776 do_alg_add_t2_m:
2777 q = t - 1;
2778 q = q & -q;
2779 m = exact_log2 (q);
2780 if (m >= 0 && m < maxm)
2782 op_cost = shiftadd_cost (speed, mode, m);
2783 new_limit.cost = best_cost.cost - op_cost;
2784 new_limit.latency = best_cost.latency - op_cost;
2785 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2787 alg_in->cost.cost += op_cost;
2788 alg_in->cost.latency += op_cost;
2789 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2791 struct algorithm *x;
2792 best_cost = alg_in->cost;
2793 x = alg_in, alg_in = best_alg, best_alg = x;
2794 best_alg->log[best_alg->ops] = m;
2795 best_alg->op[best_alg->ops] = alg_add_t2_m;
2798 if (cache_hit)
2799 goto done;
2801 do_alg_sub_t2_m:
2802 q = t + 1;
2803 q = q & -q;
2804 m = exact_log2 (q);
2805 if (m >= 0 && m < maxm)
2807 op_cost = shiftsub0_cost (speed, mode, m);
2808 new_limit.cost = best_cost.cost - op_cost;
2809 new_limit.latency = best_cost.latency - op_cost;
2810 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2812 alg_in->cost.cost += op_cost;
2813 alg_in->cost.latency += op_cost;
2814 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2816 struct algorithm *x;
2817 best_cost = alg_in->cost;
2818 x = alg_in, alg_in = best_alg, best_alg = x;
2819 best_alg->log[best_alg->ops] = m;
2820 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2823 if (cache_hit)
2824 goto done;
2827 done:
2828 /* If best_cost has not decreased, we have not found any algorithm. */
2829 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2831 /* We failed to find an algorithm. Record alg_impossible for
2832 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2833 we are asked to find an algorithm for T within the same or
2834 lower COST_LIMIT, we can immediately return to the
2835 caller. */
2836 entry_ptr->t = t;
2837 entry_ptr->mode = mode;
2838 entry_ptr->speed = speed;
2839 entry_ptr->alg = alg_impossible;
2840 entry_ptr->cost = *cost_limit;
2841 return;
2844 /* Cache the result. */
2845 if (!cache_hit)
2847 entry_ptr->t = t;
2848 entry_ptr->mode = mode;
2849 entry_ptr->speed = speed;
2850 entry_ptr->alg = best_alg->op[best_alg->ops];
2851 entry_ptr->cost.cost = best_cost.cost;
2852 entry_ptr->cost.latency = best_cost.latency;
2855 /* If we are getting a too long sequence for `struct algorithm'
2856 to record, make this search fail. */
2857 if (best_alg->ops == MAX_BITS_PER_WORD)
2858 return;
2860 /* Copy the algorithm from temporary space to the space at alg_out.
2861 We avoid using structure assignment because the majority of
2862 best_alg is normally undefined, and this is a critical function. */
2863 alg_out->ops = best_alg->ops + 1;
2864 alg_out->cost = best_cost;
2865 memcpy (alg_out->op, best_alg->op,
2866 alg_out->ops * sizeof *alg_out->op);
2867 memcpy (alg_out->log, best_alg->log,
2868 alg_out->ops * sizeof *alg_out->log);
2871 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2872 Try three variations:
2874 - a shift/add sequence based on VAL itself
2875 - a shift/add sequence based on -VAL, followed by a negation
2876 - a shift/add sequence based on VAL - 1, followed by an addition.
2878 Return true if the cheapest of these cost less than MULT_COST,
2879 describing the algorithm in *ALG and final fixup in *VARIANT. */
2881 static bool
2882 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2883 struct algorithm *alg, enum mult_variant *variant,
2884 int mult_cost)
2886 struct algorithm alg2;
2887 struct mult_cost limit;
2888 int op_cost;
2889 bool speed = optimize_insn_for_speed_p ();
2891 /* Fail quickly for impossible bounds. */
2892 if (mult_cost < 0)
2893 return false;
2895 /* Ensure that mult_cost provides a reasonable upper bound.
2896 Any constant multiplication can be performed with less
2897 than 2 * bits additions. */
2898 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2899 if (mult_cost > op_cost)
2900 mult_cost = op_cost;
2902 *variant = basic_variant;
2903 limit.cost = mult_cost;
2904 limit.latency = mult_cost;
2905 synth_mult (alg, val, &limit, mode);
2907 /* This works only if the inverted value actually fits in an
2908 `unsigned int' */
2909 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2911 op_cost = neg_cost (speed, mode);
2912 if (MULT_COST_LESS (&alg->cost, mult_cost))
2914 limit.cost = alg->cost.cost - op_cost;
2915 limit.latency = alg->cost.latency - op_cost;
2917 else
2919 limit.cost = mult_cost - op_cost;
2920 limit.latency = mult_cost - op_cost;
2923 synth_mult (&alg2, -val, &limit, mode);
2924 alg2.cost.cost += op_cost;
2925 alg2.cost.latency += op_cost;
2926 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2927 *alg = alg2, *variant = negate_variant;
2930 /* This proves very useful for division-by-constant. */
2931 op_cost = add_cost (speed, mode);
2932 if (MULT_COST_LESS (&alg->cost, mult_cost))
2934 limit.cost = alg->cost.cost - op_cost;
2935 limit.latency = alg->cost.latency - op_cost;
2937 else
2939 limit.cost = mult_cost - op_cost;
2940 limit.latency = mult_cost - op_cost;
2943 synth_mult (&alg2, val - 1, &limit, mode);
2944 alg2.cost.cost += op_cost;
2945 alg2.cost.latency += op_cost;
2946 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2947 *alg = alg2, *variant = add_variant;
2949 return MULT_COST_LESS (&alg->cost, mult_cost);
2952 /* A subroutine of expand_mult, used for constant multiplications.
2953 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2954 convenient. Use the shift/add sequence described by ALG and apply
2955 the final fixup specified by VARIANT. */
2957 static rtx
2958 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2959 rtx target, const struct algorithm *alg,
2960 enum mult_variant variant)
2962 HOST_WIDE_INT val_so_far;
2963 rtx insn, accum, tem;
2964 int opno;
2965 enum machine_mode nmode;
2967 /* Avoid referencing memory over and over and invalid sharing
2968 on SUBREGs. */
2969 op0 = force_reg (mode, op0);
2971 /* ACCUM starts out either as OP0 or as a zero, depending on
2972 the first operation. */
2974 if (alg->op[0] == alg_zero)
2976 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2977 val_so_far = 0;
2979 else if (alg->op[0] == alg_m)
2981 accum = copy_to_mode_reg (mode, op0);
2982 val_so_far = 1;
2984 else
2985 gcc_unreachable ();
2987 for (opno = 1; opno < alg->ops; opno++)
2989 int log = alg->log[opno];
2990 rtx shift_subtarget = optimize ? 0 : accum;
2991 rtx add_target
2992 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2993 && !optimize)
2994 ? target : 0;
2995 rtx accum_target = optimize ? 0 : accum;
2996 rtx accum_inner;
2998 switch (alg->op[opno])
3000 case alg_shift:
3001 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3002 /* REG_EQUAL note will be attached to the following insn. */
3003 emit_move_insn (accum, tem);
3004 val_so_far <<= log;
3005 break;
3007 case alg_add_t_m2:
3008 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3009 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3010 add_target ? add_target : accum_target);
3011 val_so_far += (HOST_WIDE_INT) 1 << log;
3012 break;
3014 case alg_sub_t_m2:
3015 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3016 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3017 add_target ? add_target : accum_target);
3018 val_so_far -= (HOST_WIDE_INT) 1 << log;
3019 break;
3021 case alg_add_t2_m:
3022 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3023 log, shift_subtarget, 0);
3024 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3025 add_target ? add_target : accum_target);
3026 val_so_far = (val_so_far << log) + 1;
3027 break;
3029 case alg_sub_t2_m:
3030 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3031 log, shift_subtarget, 0);
3032 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3033 add_target ? add_target : accum_target);
3034 val_so_far = (val_so_far << log) - 1;
3035 break;
3037 case alg_add_factor:
3038 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3039 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3040 add_target ? add_target : accum_target);
3041 val_so_far += val_so_far << log;
3042 break;
3044 case alg_sub_factor:
3045 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3046 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3047 (add_target
3048 ? add_target : (optimize ? 0 : tem)));
3049 val_so_far = (val_so_far << log) - val_so_far;
3050 break;
3052 default:
3053 gcc_unreachable ();
3056 if (SCALAR_INT_MODE_P (mode))
3058 /* Write a REG_EQUAL note on the last insn so that we can cse
3059 multiplication sequences. Note that if ACCUM is a SUBREG,
3060 we've set the inner register and must properly indicate that. */
3061 tem = op0, nmode = mode;
3062 accum_inner = accum;
3063 if (GET_CODE (accum) == SUBREG)
3065 accum_inner = SUBREG_REG (accum);
3066 nmode = GET_MODE (accum_inner);
3067 tem = gen_lowpart (nmode, op0);
3070 insn = get_last_insn ();
3071 set_dst_reg_note (insn, REG_EQUAL,
3072 gen_rtx_MULT (nmode, tem,
3073 gen_int_mode (val_so_far, nmode)),
3074 accum_inner);
3078 if (variant == negate_variant)
3080 val_so_far = -val_so_far;
3081 accum = expand_unop (mode, neg_optab, accum, target, 0);
3083 else if (variant == add_variant)
3085 val_so_far = val_so_far + 1;
3086 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3089 /* Compare only the bits of val and val_so_far that are significant
3090 in the result mode, to avoid sign-/zero-extension confusion. */
3091 nmode = GET_MODE_INNER (mode);
3092 if (nmode == VOIDmode)
3093 nmode = mode;
3094 val &= GET_MODE_MASK (nmode);
3095 val_so_far &= GET_MODE_MASK (nmode);
3096 gcc_assert (val == val_so_far);
3098 return accum;
3101 /* Perform a multiplication and return an rtx for the result.
3102 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3103 TARGET is a suggestion for where to store the result (an rtx).
3105 We check specially for a constant integer as OP1.
3106 If you want this check for OP0 as well, then before calling
3107 you should swap the two operands if OP0 would be constant. */
3110 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3111 int unsignedp)
3113 enum mult_variant variant;
3114 struct algorithm algorithm;
3115 rtx scalar_op1;
3116 int max_cost;
3117 bool speed = optimize_insn_for_speed_p ();
3118 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3120 if (CONSTANT_P (op0))
3122 rtx temp = op0;
3123 op0 = op1;
3124 op1 = temp;
3127 /* For vectors, there are several simplifications that can be made if
3128 all elements of the vector constant are identical. */
3129 scalar_op1 = op1;
3130 if (GET_CODE (op1) == CONST_VECTOR)
3132 int i, n = CONST_VECTOR_NUNITS (op1);
3133 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3134 for (i = 1; i < n; ++i)
3135 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3136 goto skip_scalar;
3139 if (INTEGRAL_MODE_P (mode))
3141 rtx fake_reg;
3142 HOST_WIDE_INT coeff;
3143 bool is_neg;
3144 int mode_bitsize;
3146 if (op1 == CONST0_RTX (mode))
3147 return op1;
3148 if (op1 == CONST1_RTX (mode))
3149 return op0;
3150 if (op1 == CONSTM1_RTX (mode))
3151 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3152 op0, target, 0);
3154 if (do_trapv)
3155 goto skip_synth;
3157 /* If mode is integer vector mode, check if the backend supports
3158 vector lshift (by scalar or vector) at all. If not, we can't use
3159 synthetized multiply. */
3160 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3161 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3162 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3163 goto skip_synth;
3165 /* These are the operations that are potentially turned into
3166 a sequence of shifts and additions. */
3167 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3169 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3170 less than or equal in size to `unsigned int' this doesn't matter.
3171 If the mode is larger than `unsigned int', then synth_mult works
3172 only if the constant value exactly fits in an `unsigned int' without
3173 any truncation. This means that multiplying by negative values does
3174 not work; results are off by 2^32 on a 32 bit machine. */
3176 if (CONST_INT_P (scalar_op1))
3178 coeff = INTVAL (scalar_op1);
3179 is_neg = coeff < 0;
3181 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3183 /* If we are multiplying in DImode, it may still be a win
3184 to try to work with shifts and adds. */
3185 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3186 && (CONST_DOUBLE_LOW (scalar_op1) > 0
3187 || (CONST_DOUBLE_LOW (scalar_op1) < 0
3188 && EXACT_POWER_OF_2_OR_ZERO_P
3189 (CONST_DOUBLE_LOW (scalar_op1)))))
3191 coeff = CONST_DOUBLE_LOW (scalar_op1);
3192 is_neg = false;
3194 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3196 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3197 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3199 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3200 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3201 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3202 return expand_shift (LSHIFT_EXPR, mode, op0,
3203 shift, target, unsignedp);
3205 goto skip_synth;
3207 else
3208 goto skip_synth;
3210 else
3211 goto skip_synth;
3213 /* We used to test optimize here, on the grounds that it's better to
3214 produce a smaller program when -O is not used. But this causes
3215 such a terrible slowdown sometimes that it seems better to always
3216 use synth_mult. */
3218 /* Special case powers of two. */
3219 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3220 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3221 return expand_shift (LSHIFT_EXPR, mode, op0,
3222 floor_log2 (coeff), target, unsignedp);
3224 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3226 /* Attempt to handle multiplication of DImode values by negative
3227 coefficients, by performing the multiplication by a positive
3228 multiplier and then inverting the result. */
3229 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3231 /* Its safe to use -coeff even for INT_MIN, as the
3232 result is interpreted as an unsigned coefficient.
3233 Exclude cost of op0 from max_cost to match the cost
3234 calculation of the synth_mult. */
3235 coeff = -(unsigned HOST_WIDE_INT) coeff;
3236 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3237 - neg_cost (speed, mode));
3238 if (max_cost <= 0)
3239 goto skip_synth;
3241 /* Special case powers of two. */
3242 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3244 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3245 floor_log2 (coeff), target, unsignedp);
3246 return expand_unop (mode, neg_optab, temp, target, 0);
3249 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3250 max_cost))
3252 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3253 &algorithm, variant);
3254 return expand_unop (mode, neg_optab, temp, target, 0);
3256 goto skip_synth;
3259 /* Exclude cost of op0 from max_cost to match the cost
3260 calculation of the synth_mult. */
3261 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3262 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3263 return expand_mult_const (mode, op0, coeff, target,
3264 &algorithm, variant);
3266 skip_synth:
3268 /* Expand x*2.0 as x+x. */
3269 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3271 REAL_VALUE_TYPE d;
3272 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3274 if (REAL_VALUES_EQUAL (d, dconst2))
3276 op0 = force_reg (GET_MODE (op0), op0);
3277 return expand_binop (mode, add_optab, op0, op0,
3278 target, unsignedp, OPTAB_LIB_WIDEN);
3281 skip_scalar:
3283 /* This used to use umul_optab if unsigned, but for non-widening multiply
3284 there is no difference between signed and unsigned. */
3285 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3286 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3287 gcc_assert (op0);
3288 return op0;
3291 /* Return a cost estimate for multiplying a register by the given
3292 COEFFicient in the given MODE and SPEED. */
3295 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3297 int max_cost;
3298 struct algorithm algorithm;
3299 enum mult_variant variant;
3301 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3302 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3303 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3304 return algorithm.cost.cost;
3305 else
3306 return max_cost;
3309 /* Perform a widening multiplication and return an rtx for the result.
3310 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3311 TARGET is a suggestion for where to store the result (an rtx).
3312 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3313 or smul_widen_optab.
3315 We check specially for a constant integer as OP1, comparing the
3316 cost of a widening multiply against the cost of a sequence of shifts
3317 and adds. */
3320 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3321 int unsignedp, optab this_optab)
3323 bool speed = optimize_insn_for_speed_p ();
3324 rtx cop1;
3326 if (CONST_INT_P (op1)
3327 && GET_MODE (op0) != VOIDmode
3328 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3329 this_optab == umul_widen_optab))
3330 && CONST_INT_P (cop1)
3331 && (INTVAL (cop1) >= 0
3332 || HWI_COMPUTABLE_MODE_P (mode)))
3334 HOST_WIDE_INT coeff = INTVAL (cop1);
3335 int max_cost;
3336 enum mult_variant variant;
3337 struct algorithm algorithm;
3339 if (coeff == 0)
3340 return CONST0_RTX (mode);
3342 /* Special case powers of two. */
3343 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3345 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3346 return expand_shift (LSHIFT_EXPR, mode, op0,
3347 floor_log2 (coeff), target, unsignedp);
3350 /* Exclude cost of op0 from max_cost to match the cost
3351 calculation of the synth_mult. */
3352 max_cost = mul_widen_cost (speed, mode);
3353 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3354 max_cost))
3356 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3357 return expand_mult_const (mode, op0, coeff, target,
3358 &algorithm, variant);
3361 return expand_binop (mode, this_optab, op0, op1, target,
3362 unsignedp, OPTAB_LIB_WIDEN);
3365 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3366 replace division by D, and put the least significant N bits of the result
3367 in *MULTIPLIER_PTR and return the most significant bit.
3369 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3370 needed precision is in PRECISION (should be <= N).
3372 PRECISION should be as small as possible so this function can choose
3373 multiplier more freely.
3375 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3376 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3378 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3379 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3381 unsigned HOST_WIDE_INT
3382 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3383 unsigned HOST_WIDE_INT *multiplier_ptr,
3384 int *post_shift_ptr, int *lgup_ptr)
3386 double_int mhigh, mlow;
3387 int lgup, post_shift;
3388 int pow, pow2;
3390 /* lgup = ceil(log2(divisor)); */
3391 lgup = ceil_log2 (d);
3393 gcc_assert (lgup <= n);
3395 pow = n + lgup;
3396 pow2 = n + lgup - precision;
3398 /* We could handle this with some effort, but this case is much
3399 better handled directly with a scc insn, so rely on caller using
3400 that. */
3401 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3403 /* mlow = 2^(N + lgup)/d */
3404 double_int val = double_int_zero.set_bit (pow);
3405 mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3407 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3408 val |= double_int_zero.set_bit (pow2);
3409 mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3411 gcc_assert (!mhigh.high || val.high - d < d);
3412 gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3413 /* Assert that mlow < mhigh. */
3414 gcc_assert (mlow.ult (mhigh));
3416 /* If precision == N, then mlow, mhigh exceed 2^N
3417 (but they do not exceed 2^(N+1)). */
3419 /* Reduce to lowest terms. */
3420 for (post_shift = lgup; post_shift > 0; post_shift--)
3422 int shft = HOST_BITS_PER_WIDE_INT - 1;
3423 unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3424 unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3425 if (ml_lo >= mh_lo)
3426 break;
3428 mlow = double_int::from_uhwi (ml_lo);
3429 mhigh = double_int::from_uhwi (mh_lo);
3432 *post_shift_ptr = post_shift;
3433 *lgup_ptr = lgup;
3434 if (n < HOST_BITS_PER_WIDE_INT)
3436 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3437 *multiplier_ptr = mhigh.low & mask;
3438 return mhigh.low >= mask;
3440 else
3442 *multiplier_ptr = mhigh.low;
3443 return mhigh.high;
3447 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3448 congruent to 1 (mod 2**N). */
3450 static unsigned HOST_WIDE_INT
3451 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3453 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3455 /* The algorithm notes that the choice y = x satisfies
3456 x*y == 1 mod 2^3, since x is assumed odd.
3457 Each iteration doubles the number of bits of significance in y. */
3459 unsigned HOST_WIDE_INT mask;
3460 unsigned HOST_WIDE_INT y = x;
3461 int nbit = 3;
3463 mask = (n == HOST_BITS_PER_WIDE_INT
3464 ? ~(unsigned HOST_WIDE_INT) 0
3465 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3467 while (nbit < n)
3469 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3470 nbit *= 2;
3472 return y;
3475 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3476 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3477 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3478 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3479 become signed.
3481 The result is put in TARGET if that is convenient.
3483 MODE is the mode of operation. */
3486 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3487 rtx op1, rtx target, int unsignedp)
3489 rtx tem;
3490 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3492 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3493 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3494 tem = expand_and (mode, tem, op1, NULL_RTX);
3495 adj_operand
3496 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3497 adj_operand);
3499 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3500 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3501 tem = expand_and (mode, tem, op0, NULL_RTX);
3502 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3503 target);
3505 return target;
3508 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3510 static rtx
3511 extract_high_half (enum machine_mode mode, rtx op)
3513 enum machine_mode wider_mode;
3515 if (mode == word_mode)
3516 return gen_highpart (mode, op);
3518 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3520 wider_mode = GET_MODE_WIDER_MODE (mode);
3521 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3522 GET_MODE_BITSIZE (mode), 0, 1);
3523 return convert_modes (mode, wider_mode, op, 0);
3526 /* Like expmed_mult_highpart, but only consider using a multiplication
3527 optab. OP1 is an rtx for the constant operand. */
3529 static rtx
3530 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3531 rtx target, int unsignedp, int max_cost)
3533 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3534 enum machine_mode wider_mode;
3535 optab moptab;
3536 rtx tem;
3537 int size;
3538 bool speed = optimize_insn_for_speed_p ();
3540 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3542 wider_mode = GET_MODE_WIDER_MODE (mode);
3543 size = GET_MODE_BITSIZE (mode);
3545 /* Firstly, try using a multiplication insn that only generates the needed
3546 high part of the product, and in the sign flavor of unsignedp. */
3547 if (mul_highpart_cost (speed, mode) < max_cost)
3549 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3550 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3551 unsignedp, OPTAB_DIRECT);
3552 if (tem)
3553 return tem;
3556 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3557 Need to adjust the result after the multiplication. */
3558 if (size - 1 < BITS_PER_WORD
3559 && (mul_highpart_cost (speed, mode)
3560 + 2 * shift_cost (speed, mode, size-1)
3561 + 4 * add_cost (speed, mode) < max_cost))
3563 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3564 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3565 unsignedp, OPTAB_DIRECT);
3566 if (tem)
3567 /* We used the wrong signedness. Adjust the result. */
3568 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3569 tem, unsignedp);
3572 /* Try widening multiplication. */
3573 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3574 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3575 && mul_widen_cost (speed, wider_mode) < max_cost)
3577 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3578 unsignedp, OPTAB_WIDEN);
3579 if (tem)
3580 return extract_high_half (mode, tem);
3583 /* Try widening the mode and perform a non-widening multiplication. */
3584 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3585 && size - 1 < BITS_PER_WORD
3586 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3587 < max_cost))
3589 rtx insns, wop0, wop1;
3591 /* We need to widen the operands, for example to ensure the
3592 constant multiplier is correctly sign or zero extended.
3593 Use a sequence to clean-up any instructions emitted by
3594 the conversions if things don't work out. */
3595 start_sequence ();
3596 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3597 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3598 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3599 unsignedp, OPTAB_WIDEN);
3600 insns = get_insns ();
3601 end_sequence ();
3603 if (tem)
3605 emit_insn (insns);
3606 return extract_high_half (mode, tem);
3610 /* Try widening multiplication of opposite signedness, and adjust. */
3611 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3612 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3613 && size - 1 < BITS_PER_WORD
3614 && (mul_widen_cost (speed, wider_mode)
3615 + 2 * shift_cost (speed, mode, size-1)
3616 + 4 * add_cost (speed, mode) < max_cost))
3618 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3619 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3620 if (tem != 0)
3622 tem = extract_high_half (mode, tem);
3623 /* We used the wrong signedness. Adjust the result. */
3624 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3625 target, unsignedp);
3629 return 0;
3632 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3633 putting the high half of the result in TARGET if that is convenient,
3634 and return where the result is. If the operation can not be performed,
3635 0 is returned.
3637 MODE is the mode of operation and result.
3639 UNSIGNEDP nonzero means unsigned multiply.
3641 MAX_COST is the total allowed cost for the expanded RTL. */
3643 static rtx
3644 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3645 rtx target, int unsignedp, int max_cost)
3647 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3648 unsigned HOST_WIDE_INT cnst1;
3649 int extra_cost;
3650 bool sign_adjust = false;
3651 enum mult_variant variant;
3652 struct algorithm alg;
3653 rtx tem;
3654 bool speed = optimize_insn_for_speed_p ();
3656 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3657 /* We can't support modes wider than HOST_BITS_PER_INT. */
3658 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3660 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3662 /* We can't optimize modes wider than BITS_PER_WORD.
3663 ??? We might be able to perform double-word arithmetic if
3664 mode == word_mode, however all the cost calculations in
3665 synth_mult etc. assume single-word operations. */
3666 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3667 return expmed_mult_highpart_optab (mode, op0, op1, target,
3668 unsignedp, max_cost);
3670 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3672 /* Check whether we try to multiply by a negative constant. */
3673 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3675 sign_adjust = true;
3676 extra_cost += add_cost (speed, mode);
3679 /* See whether shift/add multiplication is cheap enough. */
3680 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3681 max_cost - extra_cost))
3683 /* See whether the specialized multiplication optabs are
3684 cheaper than the shift/add version. */
3685 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3686 alg.cost.cost + extra_cost);
3687 if (tem)
3688 return tem;
3690 tem = convert_to_mode (wider_mode, op0, unsignedp);
3691 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3692 tem = extract_high_half (mode, tem);
3694 /* Adjust result for signedness. */
3695 if (sign_adjust)
3696 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3698 return tem;
3700 return expmed_mult_highpart_optab (mode, op0, op1, target,
3701 unsignedp, max_cost);
3705 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3707 static rtx
3708 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3710 unsigned HOST_WIDE_INT masklow, maskhigh;
3711 rtx result, temp, shift, label;
3712 int logd;
3714 logd = floor_log2 (d);
3715 result = gen_reg_rtx (mode);
3717 /* Avoid conditional branches when they're expensive. */
3718 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3719 && optimize_insn_for_speed_p ())
3721 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3722 mode, 0, -1);
3723 if (signmask)
3725 signmask = force_reg (mode, signmask);
3726 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3727 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3729 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3730 which instruction sequence to use. If logical right shifts
3731 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3732 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3734 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3735 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3736 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3737 > COSTS_N_INSNS (2)))
3739 temp = expand_binop (mode, xor_optab, op0, signmask,
3740 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3741 temp = expand_binop (mode, sub_optab, temp, signmask,
3742 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3743 temp = expand_binop (mode, and_optab, temp,
3744 gen_int_mode (masklow, mode),
3745 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3746 temp = expand_binop (mode, xor_optab, temp, signmask,
3747 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3748 temp = expand_binop (mode, sub_optab, temp, signmask,
3749 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3751 else
3753 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3754 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3755 signmask = force_reg (mode, signmask);
3757 temp = expand_binop (mode, add_optab, op0, signmask,
3758 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3759 temp = expand_binop (mode, and_optab, temp,
3760 gen_int_mode (masklow, mode),
3761 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3762 temp = expand_binop (mode, sub_optab, temp, signmask,
3763 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3765 return temp;
3769 /* Mask contains the mode's signbit and the significant bits of the
3770 modulus. By including the signbit in the operation, many targets
3771 can avoid an explicit compare operation in the following comparison
3772 against zero. */
3774 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3775 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3777 masklow |= HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (mode) - 1);
3778 maskhigh = -1;
3780 else
3781 maskhigh = HOST_WIDE_INT_M1U
3782 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3784 temp = expand_binop (mode, and_optab, op0,
3785 immed_double_const (masklow, maskhigh, mode),
3786 result, 1, OPTAB_LIB_WIDEN);
3787 if (temp != result)
3788 emit_move_insn (result, temp);
3790 label = gen_label_rtx ();
3791 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3793 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3794 0, OPTAB_LIB_WIDEN);
3795 masklow = HOST_WIDE_INT_M1U << logd;
3796 maskhigh = -1;
3797 temp = expand_binop (mode, ior_optab, temp,
3798 immed_double_const (masklow, maskhigh, mode),
3799 result, 1, OPTAB_LIB_WIDEN);
3800 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3801 0, OPTAB_LIB_WIDEN);
3802 if (temp != result)
3803 emit_move_insn (result, temp);
3804 emit_label (label);
3805 return result;
3808 /* Expand signed division of OP0 by a power of two D in mode MODE.
3809 This routine is only called for positive values of D. */
3811 static rtx
3812 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3814 rtx temp, label;
3815 int logd;
3817 logd = floor_log2 (d);
3819 if (d == 2
3820 && BRANCH_COST (optimize_insn_for_speed_p (),
3821 false) >= 1)
3823 temp = gen_reg_rtx (mode);
3824 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3825 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3826 0, OPTAB_LIB_WIDEN);
3827 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3830 #ifdef HAVE_conditional_move
3831 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3832 >= 2)
3834 rtx temp2;
3836 start_sequence ();
3837 temp2 = copy_to_mode_reg (mode, op0);
3838 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3839 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3840 temp = force_reg (mode, temp);
3842 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3843 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3844 mode, temp, temp2, mode, 0);
3845 if (temp2)
3847 rtx seq = get_insns ();
3848 end_sequence ();
3849 emit_insn (seq);
3850 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3852 end_sequence ();
3854 #endif
3856 if (BRANCH_COST (optimize_insn_for_speed_p (),
3857 false) >= 2)
3859 int ushift = GET_MODE_BITSIZE (mode) - logd;
3861 temp = gen_reg_rtx (mode);
3862 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3863 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3864 > COSTS_N_INSNS (1))
3865 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3866 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3867 else
3868 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3869 ushift, NULL_RTX, 1);
3870 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3871 0, OPTAB_LIB_WIDEN);
3872 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3875 label = gen_label_rtx ();
3876 temp = copy_to_mode_reg (mode, op0);
3877 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3878 expand_inc (temp, gen_int_mode (d - 1, mode));
3879 emit_label (label);
3880 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3883 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3884 if that is convenient, and returning where the result is.
3885 You may request either the quotient or the remainder as the result;
3886 specify REM_FLAG nonzero to get the remainder.
3888 CODE is the expression code for which kind of division this is;
3889 it controls how rounding is done. MODE is the machine mode to use.
3890 UNSIGNEDP nonzero means do unsigned division. */
3892 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3893 and then correct it by or'ing in missing high bits
3894 if result of ANDI is nonzero.
3895 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3896 This could optimize to a bfexts instruction.
3897 But C doesn't use these operations, so their optimizations are
3898 left for later. */
3899 /* ??? For modulo, we don't actually need the highpart of the first product,
3900 the low part will do nicely. And for small divisors, the second multiply
3901 can also be a low-part only multiply or even be completely left out.
3902 E.g. to calculate the remainder of a division by 3 with a 32 bit
3903 multiply, multiply with 0x55555556 and extract the upper two bits;
3904 the result is exact for inputs up to 0x1fffffff.
3905 The input range can be reduced by using cross-sum rules.
3906 For odd divisors >= 3, the following table gives right shift counts
3907 so that if a number is shifted by an integer multiple of the given
3908 amount, the remainder stays the same:
3909 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3910 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3911 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3912 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3913 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3915 Cross-sum rules for even numbers can be derived by leaving as many bits
3916 to the right alone as the divisor has zeros to the right.
3917 E.g. if x is an unsigned 32 bit number:
3918 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3922 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3923 rtx op0, rtx op1, rtx target, int unsignedp)
3925 enum machine_mode compute_mode;
3926 rtx tquotient;
3927 rtx quotient = 0, remainder = 0;
3928 rtx last;
3929 int size;
3930 rtx insn;
3931 optab optab1, optab2;
3932 int op1_is_constant, op1_is_pow2 = 0;
3933 int max_cost, extra_cost;
3934 static HOST_WIDE_INT last_div_const = 0;
3935 bool speed = optimize_insn_for_speed_p ();
3937 op1_is_constant = CONST_INT_P (op1);
3938 if (op1_is_constant)
3940 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3941 if (unsignedp)
3942 ext_op1 &= GET_MODE_MASK (mode);
3943 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3944 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3948 This is the structure of expand_divmod:
3950 First comes code to fix up the operands so we can perform the operations
3951 correctly and efficiently.
3953 Second comes a switch statement with code specific for each rounding mode.
3954 For some special operands this code emits all RTL for the desired
3955 operation, for other cases, it generates only a quotient and stores it in
3956 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3957 to indicate that it has not done anything.
3959 Last comes code that finishes the operation. If QUOTIENT is set and
3960 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3961 QUOTIENT is not set, it is computed using trunc rounding.
3963 We try to generate special code for division and remainder when OP1 is a
3964 constant. If |OP1| = 2**n we can use shifts and some other fast
3965 operations. For other values of OP1, we compute a carefully selected
3966 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3967 by m.
3969 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3970 half of the product. Different strategies for generating the product are
3971 implemented in expmed_mult_highpart.
3973 If what we actually want is the remainder, we generate that by another
3974 by-constant multiplication and a subtraction. */
3976 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3977 code below will malfunction if we are, so check here and handle
3978 the special case if so. */
3979 if (op1 == const1_rtx)
3980 return rem_flag ? const0_rtx : op0;
3982 /* When dividing by -1, we could get an overflow.
3983 negv_optab can handle overflows. */
3984 if (! unsignedp && op1 == constm1_rtx)
3986 if (rem_flag)
3987 return const0_rtx;
3988 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3989 ? negv_optab : neg_optab, op0, target, 0);
3992 if (target
3993 /* Don't use the function value register as a target
3994 since we have to read it as well as write it,
3995 and function-inlining gets confused by this. */
3996 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3997 /* Don't clobber an operand while doing a multi-step calculation. */
3998 || ((rem_flag || op1_is_constant)
3999 && (reg_mentioned_p (target, op0)
4000 || (MEM_P (op0) && MEM_P (target))))
4001 || reg_mentioned_p (target, op1)
4002 || (MEM_P (op1) && MEM_P (target))))
4003 target = 0;
4005 /* Get the mode in which to perform this computation. Normally it will
4006 be MODE, but sometimes we can't do the desired operation in MODE.
4007 If so, pick a wider mode in which we can do the operation. Convert
4008 to that mode at the start to avoid repeated conversions.
4010 First see what operations we need. These depend on the expression
4011 we are evaluating. (We assume that divxx3 insns exist under the
4012 same conditions that modxx3 insns and that these insns don't normally
4013 fail. If these assumptions are not correct, we may generate less
4014 efficient code in some cases.)
4016 Then see if we find a mode in which we can open-code that operation
4017 (either a division, modulus, or shift). Finally, check for the smallest
4018 mode for which we can do the operation with a library call. */
4020 /* We might want to refine this now that we have division-by-constant
4021 optimization. Since expmed_mult_highpart tries so many variants, it is
4022 not straightforward to generalize this. Maybe we should make an array
4023 of possible modes in init_expmed? Save this for GCC 2.7. */
4025 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4026 ? (unsignedp ? lshr_optab : ashr_optab)
4027 : (unsignedp ? udiv_optab : sdiv_optab));
4028 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4029 ? optab1
4030 : (unsignedp ? udivmod_optab : sdivmod_optab));
4032 for (compute_mode = mode; compute_mode != VOIDmode;
4033 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4034 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4035 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4036 break;
4038 if (compute_mode == VOIDmode)
4039 for (compute_mode = mode; compute_mode != VOIDmode;
4040 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4041 if (optab_libfunc (optab1, compute_mode)
4042 || optab_libfunc (optab2, compute_mode))
4043 break;
4045 /* If we still couldn't find a mode, use MODE, but expand_binop will
4046 probably die. */
4047 if (compute_mode == VOIDmode)
4048 compute_mode = mode;
4050 if (target && GET_MODE (target) == compute_mode)
4051 tquotient = target;
4052 else
4053 tquotient = gen_reg_rtx (compute_mode);
4055 size = GET_MODE_BITSIZE (compute_mode);
4056 #if 0
4057 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4058 (mode), and thereby get better code when OP1 is a constant. Do that
4059 later. It will require going over all usages of SIZE below. */
4060 size = GET_MODE_BITSIZE (mode);
4061 #endif
4063 /* Only deduct something for a REM if the last divide done was
4064 for a different constant. Then set the constant of the last
4065 divide. */
4066 max_cost = (unsignedp
4067 ? udiv_cost (speed, compute_mode)
4068 : sdiv_cost (speed, compute_mode));
4069 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4070 && INTVAL (op1) == last_div_const))
4071 max_cost -= (mul_cost (speed, compute_mode)
4072 + add_cost (speed, compute_mode));
4074 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4076 /* Now convert to the best mode to use. */
4077 if (compute_mode != mode)
4079 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4080 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4082 /* convert_modes may have placed op1 into a register, so we
4083 must recompute the following. */
4084 op1_is_constant = CONST_INT_P (op1);
4085 op1_is_pow2 = (op1_is_constant
4086 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4087 || (! unsignedp
4088 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4091 /* If one of the operands is a volatile MEM, copy it into a register. */
4093 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4094 op0 = force_reg (compute_mode, op0);
4095 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4096 op1 = force_reg (compute_mode, op1);
4098 /* If we need the remainder or if OP1 is constant, we need to
4099 put OP0 in a register in case it has any queued subexpressions. */
4100 if (rem_flag || op1_is_constant)
4101 op0 = force_reg (compute_mode, op0);
4103 last = get_last_insn ();
4105 /* Promote floor rounding to trunc rounding for unsigned operations. */
4106 if (unsignedp)
4108 if (code == FLOOR_DIV_EXPR)
4109 code = TRUNC_DIV_EXPR;
4110 if (code == FLOOR_MOD_EXPR)
4111 code = TRUNC_MOD_EXPR;
4112 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4113 code = TRUNC_DIV_EXPR;
4116 if (op1 != const0_rtx)
4117 switch (code)
4119 case TRUNC_MOD_EXPR:
4120 case TRUNC_DIV_EXPR:
4121 if (op1_is_constant)
4123 if (unsignedp)
4125 unsigned HOST_WIDE_INT mh, ml;
4126 int pre_shift, post_shift;
4127 int dummy;
4128 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4129 & GET_MODE_MASK (compute_mode));
4131 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4133 pre_shift = floor_log2 (d);
4134 if (rem_flag)
4136 unsigned HOST_WIDE_INT mask
4137 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4138 remainder
4139 = expand_binop (compute_mode, and_optab, op0,
4140 gen_int_mode (mask, compute_mode),
4141 remainder, 1,
4142 OPTAB_LIB_WIDEN);
4143 if (remainder)
4144 return gen_lowpart (mode, remainder);
4146 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4147 pre_shift, tquotient, 1);
4149 else if (size <= HOST_BITS_PER_WIDE_INT)
4151 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4153 /* Most significant bit of divisor is set; emit an scc
4154 insn. */
4155 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4156 compute_mode, 1, 1);
4158 else
4160 /* Find a suitable multiplier and right shift count
4161 instead of multiplying with D. */
4163 mh = choose_multiplier (d, size, size,
4164 &ml, &post_shift, &dummy);
4166 /* If the suggested multiplier is more than SIZE bits,
4167 we can do better for even divisors, using an
4168 initial right shift. */
4169 if (mh != 0 && (d & 1) == 0)
4171 pre_shift = floor_log2 (d & -d);
4172 mh = choose_multiplier (d >> pre_shift, size,
4173 size - pre_shift,
4174 &ml, &post_shift, &dummy);
4175 gcc_assert (!mh);
4177 else
4178 pre_shift = 0;
4180 if (mh != 0)
4182 rtx t1, t2, t3, t4;
4184 if (post_shift - 1 >= BITS_PER_WORD)
4185 goto fail1;
4187 extra_cost
4188 = (shift_cost (speed, compute_mode, post_shift - 1)
4189 + shift_cost (speed, compute_mode, 1)
4190 + 2 * add_cost (speed, compute_mode));
4191 t1 = expmed_mult_highpart
4192 (compute_mode, op0,
4193 gen_int_mode (ml, compute_mode),
4194 NULL_RTX, 1, max_cost - extra_cost);
4195 if (t1 == 0)
4196 goto fail1;
4197 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4198 op0, t1),
4199 NULL_RTX);
4200 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4201 t2, 1, NULL_RTX, 1);
4202 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4203 t1, t3),
4204 NULL_RTX);
4205 quotient = expand_shift
4206 (RSHIFT_EXPR, compute_mode, t4,
4207 post_shift - 1, tquotient, 1);
4209 else
4211 rtx t1, t2;
4213 if (pre_shift >= BITS_PER_WORD
4214 || post_shift >= BITS_PER_WORD)
4215 goto fail1;
4217 t1 = expand_shift
4218 (RSHIFT_EXPR, compute_mode, op0,
4219 pre_shift, NULL_RTX, 1);
4220 extra_cost
4221 = (shift_cost (speed, compute_mode, pre_shift)
4222 + shift_cost (speed, compute_mode, post_shift));
4223 t2 = expmed_mult_highpart
4224 (compute_mode, t1,
4225 gen_int_mode (ml, compute_mode),
4226 NULL_RTX, 1, max_cost - extra_cost);
4227 if (t2 == 0)
4228 goto fail1;
4229 quotient = expand_shift
4230 (RSHIFT_EXPR, compute_mode, t2,
4231 post_shift, tquotient, 1);
4235 else /* Too wide mode to use tricky code */
4236 break;
4238 insn = get_last_insn ();
4239 if (insn != last)
4240 set_dst_reg_note (insn, REG_EQUAL,
4241 gen_rtx_UDIV (compute_mode, op0, op1),
4242 quotient);
4244 else /* TRUNC_DIV, signed */
4246 unsigned HOST_WIDE_INT ml;
4247 int lgup, post_shift;
4248 rtx mlr;
4249 HOST_WIDE_INT d = INTVAL (op1);
4250 unsigned HOST_WIDE_INT abs_d;
4252 /* Since d might be INT_MIN, we have to cast to
4253 unsigned HOST_WIDE_INT before negating to avoid
4254 undefined signed overflow. */
4255 abs_d = (d >= 0
4256 ? (unsigned HOST_WIDE_INT) d
4257 : - (unsigned HOST_WIDE_INT) d);
4259 /* n rem d = n rem -d */
4260 if (rem_flag && d < 0)
4262 d = abs_d;
4263 op1 = gen_int_mode (abs_d, compute_mode);
4266 if (d == 1)
4267 quotient = op0;
4268 else if (d == -1)
4269 quotient = expand_unop (compute_mode, neg_optab, op0,
4270 tquotient, 0);
4271 else if (HOST_BITS_PER_WIDE_INT >= size
4272 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4274 /* This case is not handled correctly below. */
4275 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4276 compute_mode, 1, 1);
4277 if (quotient == 0)
4278 goto fail1;
4280 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4281 && (rem_flag
4282 ? smod_pow2_cheap (speed, compute_mode)
4283 : sdiv_pow2_cheap (speed, compute_mode))
4284 /* We assume that cheap metric is true if the
4285 optab has an expander for this mode. */
4286 && ((optab_handler ((rem_flag ? smod_optab
4287 : sdiv_optab),
4288 compute_mode)
4289 != CODE_FOR_nothing)
4290 || (optab_handler (sdivmod_optab,
4291 compute_mode)
4292 != CODE_FOR_nothing)))
4294 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4296 if (rem_flag)
4298 remainder = expand_smod_pow2 (compute_mode, op0, d);
4299 if (remainder)
4300 return gen_lowpart (mode, remainder);
4303 if (sdiv_pow2_cheap (speed, compute_mode)
4304 && ((optab_handler (sdiv_optab, compute_mode)
4305 != CODE_FOR_nothing)
4306 || (optab_handler (sdivmod_optab, compute_mode)
4307 != CODE_FOR_nothing)))
4308 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4309 compute_mode, op0,
4310 gen_int_mode (abs_d,
4311 compute_mode),
4312 NULL_RTX, 0);
4313 else
4314 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4316 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4317 negate the quotient. */
4318 if (d < 0)
4320 insn = get_last_insn ();
4321 if (insn != last
4322 && abs_d < ((unsigned HOST_WIDE_INT) 1
4323 << (HOST_BITS_PER_WIDE_INT - 1)))
4324 set_dst_reg_note (insn, REG_EQUAL,
4325 gen_rtx_DIV (compute_mode, op0,
4326 gen_int_mode
4327 (abs_d,
4328 compute_mode)),
4329 quotient);
4331 quotient = expand_unop (compute_mode, neg_optab,
4332 quotient, quotient, 0);
4335 else if (size <= HOST_BITS_PER_WIDE_INT)
4337 choose_multiplier (abs_d, size, size - 1,
4338 &ml, &post_shift, &lgup);
4339 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4341 rtx t1, t2, t3;
4343 if (post_shift >= BITS_PER_WORD
4344 || size - 1 >= BITS_PER_WORD)
4345 goto fail1;
4347 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4348 + shift_cost (speed, compute_mode, size - 1)
4349 + add_cost (speed, compute_mode));
4350 t1 = expmed_mult_highpart
4351 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4352 NULL_RTX, 0, max_cost - extra_cost);
4353 if (t1 == 0)
4354 goto fail1;
4355 t2 = expand_shift
4356 (RSHIFT_EXPR, compute_mode, t1,
4357 post_shift, NULL_RTX, 0);
4358 t3 = expand_shift
4359 (RSHIFT_EXPR, compute_mode, op0,
4360 size - 1, NULL_RTX, 0);
4361 if (d < 0)
4362 quotient
4363 = force_operand (gen_rtx_MINUS (compute_mode,
4364 t3, t2),
4365 tquotient);
4366 else
4367 quotient
4368 = force_operand (gen_rtx_MINUS (compute_mode,
4369 t2, t3),
4370 tquotient);
4372 else
4374 rtx t1, t2, t3, t4;
4376 if (post_shift >= BITS_PER_WORD
4377 || size - 1 >= BITS_PER_WORD)
4378 goto fail1;
4380 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4381 mlr = gen_int_mode (ml, compute_mode);
4382 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4383 + shift_cost (speed, compute_mode, size - 1)
4384 + 2 * add_cost (speed, compute_mode));
4385 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4386 NULL_RTX, 0,
4387 max_cost - extra_cost);
4388 if (t1 == 0)
4389 goto fail1;
4390 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4391 t1, op0),
4392 NULL_RTX);
4393 t3 = expand_shift
4394 (RSHIFT_EXPR, compute_mode, t2,
4395 post_shift, NULL_RTX, 0);
4396 t4 = expand_shift
4397 (RSHIFT_EXPR, compute_mode, op0,
4398 size - 1, NULL_RTX, 0);
4399 if (d < 0)
4400 quotient
4401 = force_operand (gen_rtx_MINUS (compute_mode,
4402 t4, t3),
4403 tquotient);
4404 else
4405 quotient
4406 = force_operand (gen_rtx_MINUS (compute_mode,
4407 t3, t4),
4408 tquotient);
4411 else /* Too wide mode to use tricky code */
4412 break;
4414 insn = get_last_insn ();
4415 if (insn != last)
4416 set_dst_reg_note (insn, REG_EQUAL,
4417 gen_rtx_DIV (compute_mode, op0, op1),
4418 quotient);
4420 break;
4422 fail1:
4423 delete_insns_since (last);
4424 break;
4426 case FLOOR_DIV_EXPR:
4427 case FLOOR_MOD_EXPR:
4428 /* We will come here only for signed operations. */
4429 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4431 unsigned HOST_WIDE_INT mh, ml;
4432 int pre_shift, lgup, post_shift;
4433 HOST_WIDE_INT d = INTVAL (op1);
4435 if (d > 0)
4437 /* We could just as easily deal with negative constants here,
4438 but it does not seem worth the trouble for GCC 2.6. */
4439 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4441 pre_shift = floor_log2 (d);
4442 if (rem_flag)
4444 unsigned HOST_WIDE_INT mask
4445 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4446 remainder = expand_binop
4447 (compute_mode, and_optab, op0,
4448 gen_int_mode (mask, compute_mode),
4449 remainder, 0, OPTAB_LIB_WIDEN);
4450 if (remainder)
4451 return gen_lowpart (mode, remainder);
4453 quotient = expand_shift
4454 (RSHIFT_EXPR, compute_mode, op0,
4455 pre_shift, tquotient, 0);
4457 else
4459 rtx t1, t2, t3, t4;
4461 mh = choose_multiplier (d, size, size - 1,
4462 &ml, &post_shift, &lgup);
4463 gcc_assert (!mh);
4465 if (post_shift < BITS_PER_WORD
4466 && size - 1 < BITS_PER_WORD)
4468 t1 = expand_shift
4469 (RSHIFT_EXPR, compute_mode, op0,
4470 size - 1, NULL_RTX, 0);
4471 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4472 NULL_RTX, 0, OPTAB_WIDEN);
4473 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4474 + shift_cost (speed, compute_mode, size - 1)
4475 + 2 * add_cost (speed, compute_mode));
4476 t3 = expmed_mult_highpart
4477 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4478 NULL_RTX, 1, max_cost - extra_cost);
4479 if (t3 != 0)
4481 t4 = expand_shift
4482 (RSHIFT_EXPR, compute_mode, t3,
4483 post_shift, NULL_RTX, 1);
4484 quotient = expand_binop (compute_mode, xor_optab,
4485 t4, t1, tquotient, 0,
4486 OPTAB_WIDEN);
4491 else
4493 rtx nsign, t1, t2, t3, t4;
4494 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4495 op0, constm1_rtx), NULL_RTX);
4496 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4497 0, OPTAB_WIDEN);
4498 nsign = expand_shift
4499 (RSHIFT_EXPR, compute_mode, t2,
4500 size - 1, NULL_RTX, 0);
4501 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4502 NULL_RTX);
4503 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4504 NULL_RTX, 0);
4505 if (t4)
4507 rtx t5;
4508 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4509 NULL_RTX, 0);
4510 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4511 t4, t5),
4512 tquotient);
4517 if (quotient != 0)
4518 break;
4519 delete_insns_since (last);
4521 /* Try using an instruction that produces both the quotient and
4522 remainder, using truncation. We can easily compensate the quotient
4523 or remainder to get floor rounding, once we have the remainder.
4524 Notice that we compute also the final remainder value here,
4525 and return the result right away. */
4526 if (target == 0 || GET_MODE (target) != compute_mode)
4527 target = gen_reg_rtx (compute_mode);
4529 if (rem_flag)
4531 remainder
4532 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4533 quotient = gen_reg_rtx (compute_mode);
4535 else
4537 quotient
4538 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4539 remainder = gen_reg_rtx (compute_mode);
4542 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4543 quotient, remainder, 0))
4545 /* This could be computed with a branch-less sequence.
4546 Save that for later. */
4547 rtx tem;
4548 rtx label = gen_label_rtx ();
4549 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4550 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4551 NULL_RTX, 0, OPTAB_WIDEN);
4552 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4553 expand_dec (quotient, const1_rtx);
4554 expand_inc (remainder, op1);
4555 emit_label (label);
4556 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4559 /* No luck with division elimination or divmod. Have to do it
4560 by conditionally adjusting op0 *and* the result. */
4562 rtx label1, label2, label3, label4, label5;
4563 rtx adjusted_op0;
4564 rtx tem;
4566 quotient = gen_reg_rtx (compute_mode);
4567 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4568 label1 = gen_label_rtx ();
4569 label2 = gen_label_rtx ();
4570 label3 = gen_label_rtx ();
4571 label4 = gen_label_rtx ();
4572 label5 = gen_label_rtx ();
4573 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4574 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4575 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4576 quotient, 0, OPTAB_LIB_WIDEN);
4577 if (tem != quotient)
4578 emit_move_insn (quotient, tem);
4579 emit_jump_insn (gen_jump (label5));
4580 emit_barrier ();
4581 emit_label (label1);
4582 expand_inc (adjusted_op0, const1_rtx);
4583 emit_jump_insn (gen_jump (label4));
4584 emit_barrier ();
4585 emit_label (label2);
4586 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4587 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4588 quotient, 0, OPTAB_LIB_WIDEN);
4589 if (tem != quotient)
4590 emit_move_insn (quotient, tem);
4591 emit_jump_insn (gen_jump (label5));
4592 emit_barrier ();
4593 emit_label (label3);
4594 expand_dec (adjusted_op0, const1_rtx);
4595 emit_label (label4);
4596 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4597 quotient, 0, OPTAB_LIB_WIDEN);
4598 if (tem != quotient)
4599 emit_move_insn (quotient, tem);
4600 expand_dec (quotient, const1_rtx);
4601 emit_label (label5);
4603 break;
4605 case CEIL_DIV_EXPR:
4606 case CEIL_MOD_EXPR:
4607 if (unsignedp)
4609 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4611 rtx t1, t2, t3;
4612 unsigned HOST_WIDE_INT d = INTVAL (op1);
4613 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4614 floor_log2 (d), tquotient, 1);
4615 t2 = expand_binop (compute_mode, and_optab, op0,
4616 gen_int_mode (d - 1, compute_mode),
4617 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4618 t3 = gen_reg_rtx (compute_mode);
4619 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4620 compute_mode, 1, 1);
4621 if (t3 == 0)
4623 rtx lab;
4624 lab = gen_label_rtx ();
4625 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4626 expand_inc (t1, const1_rtx);
4627 emit_label (lab);
4628 quotient = t1;
4630 else
4631 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4632 t1, t3),
4633 tquotient);
4634 break;
4637 /* Try using an instruction that produces both the quotient and
4638 remainder, using truncation. We can easily compensate the
4639 quotient or remainder to get ceiling rounding, once we have the
4640 remainder. Notice that we compute also the final remainder
4641 value here, and return the result right away. */
4642 if (target == 0 || GET_MODE (target) != compute_mode)
4643 target = gen_reg_rtx (compute_mode);
4645 if (rem_flag)
4647 remainder = (REG_P (target)
4648 ? target : gen_reg_rtx (compute_mode));
4649 quotient = gen_reg_rtx (compute_mode);
4651 else
4653 quotient = (REG_P (target)
4654 ? target : gen_reg_rtx (compute_mode));
4655 remainder = gen_reg_rtx (compute_mode);
4658 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4659 remainder, 1))
4661 /* This could be computed with a branch-less sequence.
4662 Save that for later. */
4663 rtx label = gen_label_rtx ();
4664 do_cmp_and_jump (remainder, const0_rtx, EQ,
4665 compute_mode, label);
4666 expand_inc (quotient, const1_rtx);
4667 expand_dec (remainder, op1);
4668 emit_label (label);
4669 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4672 /* No luck with division elimination or divmod. Have to do it
4673 by conditionally adjusting op0 *and* the result. */
4675 rtx label1, label2;
4676 rtx adjusted_op0, tem;
4678 quotient = gen_reg_rtx (compute_mode);
4679 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4680 label1 = gen_label_rtx ();
4681 label2 = gen_label_rtx ();
4682 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4683 compute_mode, label1);
4684 emit_move_insn (quotient, const0_rtx);
4685 emit_jump_insn (gen_jump (label2));
4686 emit_barrier ();
4687 emit_label (label1);
4688 expand_dec (adjusted_op0, const1_rtx);
4689 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4690 quotient, 1, OPTAB_LIB_WIDEN);
4691 if (tem != quotient)
4692 emit_move_insn (quotient, tem);
4693 expand_inc (quotient, const1_rtx);
4694 emit_label (label2);
4697 else /* signed */
4699 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4700 && INTVAL (op1) >= 0)
4702 /* This is extremely similar to the code for the unsigned case
4703 above. For 2.7 we should merge these variants, but for
4704 2.6.1 I don't want to touch the code for unsigned since that
4705 get used in C. The signed case will only be used by other
4706 languages (Ada). */
4708 rtx t1, t2, t3;
4709 unsigned HOST_WIDE_INT d = INTVAL (op1);
4710 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4711 floor_log2 (d), tquotient, 0);
4712 t2 = expand_binop (compute_mode, and_optab, op0,
4713 gen_int_mode (d - 1, compute_mode),
4714 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4715 t3 = gen_reg_rtx (compute_mode);
4716 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4717 compute_mode, 1, 1);
4718 if (t3 == 0)
4720 rtx lab;
4721 lab = gen_label_rtx ();
4722 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4723 expand_inc (t1, const1_rtx);
4724 emit_label (lab);
4725 quotient = t1;
4727 else
4728 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4729 t1, t3),
4730 tquotient);
4731 break;
4734 /* Try using an instruction that produces both the quotient and
4735 remainder, using truncation. We can easily compensate the
4736 quotient or remainder to get ceiling rounding, once we have the
4737 remainder. Notice that we compute also the final remainder
4738 value here, and return the result right away. */
4739 if (target == 0 || GET_MODE (target) != compute_mode)
4740 target = gen_reg_rtx (compute_mode);
4741 if (rem_flag)
4743 remainder= (REG_P (target)
4744 ? target : gen_reg_rtx (compute_mode));
4745 quotient = gen_reg_rtx (compute_mode);
4747 else
4749 quotient = (REG_P (target)
4750 ? target : gen_reg_rtx (compute_mode));
4751 remainder = gen_reg_rtx (compute_mode);
4754 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4755 remainder, 0))
4757 /* This could be computed with a branch-less sequence.
4758 Save that for later. */
4759 rtx tem;
4760 rtx label = gen_label_rtx ();
4761 do_cmp_and_jump (remainder, const0_rtx, EQ,
4762 compute_mode, label);
4763 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4764 NULL_RTX, 0, OPTAB_WIDEN);
4765 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4766 expand_inc (quotient, const1_rtx);
4767 expand_dec (remainder, op1);
4768 emit_label (label);
4769 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4772 /* No luck with division elimination or divmod. Have to do it
4773 by conditionally adjusting op0 *and* the result. */
4775 rtx label1, label2, label3, label4, label5;
4776 rtx adjusted_op0;
4777 rtx tem;
4779 quotient = gen_reg_rtx (compute_mode);
4780 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4781 label1 = gen_label_rtx ();
4782 label2 = gen_label_rtx ();
4783 label3 = gen_label_rtx ();
4784 label4 = gen_label_rtx ();
4785 label5 = gen_label_rtx ();
4786 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4787 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4788 compute_mode, label1);
4789 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4790 quotient, 0, OPTAB_LIB_WIDEN);
4791 if (tem != quotient)
4792 emit_move_insn (quotient, tem);
4793 emit_jump_insn (gen_jump (label5));
4794 emit_barrier ();
4795 emit_label (label1);
4796 expand_dec (adjusted_op0, const1_rtx);
4797 emit_jump_insn (gen_jump (label4));
4798 emit_barrier ();
4799 emit_label (label2);
4800 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4801 compute_mode, label3);
4802 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4803 quotient, 0, OPTAB_LIB_WIDEN);
4804 if (tem != quotient)
4805 emit_move_insn (quotient, tem);
4806 emit_jump_insn (gen_jump (label5));
4807 emit_barrier ();
4808 emit_label (label3);
4809 expand_inc (adjusted_op0, const1_rtx);
4810 emit_label (label4);
4811 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4812 quotient, 0, OPTAB_LIB_WIDEN);
4813 if (tem != quotient)
4814 emit_move_insn (quotient, tem);
4815 expand_inc (quotient, const1_rtx);
4816 emit_label (label5);
4819 break;
4821 case EXACT_DIV_EXPR:
4822 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4824 HOST_WIDE_INT d = INTVAL (op1);
4825 unsigned HOST_WIDE_INT ml;
4826 int pre_shift;
4827 rtx t1;
4829 pre_shift = floor_log2 (d & -d);
4830 ml = invert_mod2n (d >> pre_shift, size);
4831 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4832 pre_shift, NULL_RTX, unsignedp);
4833 quotient = expand_mult (compute_mode, t1,
4834 gen_int_mode (ml, compute_mode),
4835 NULL_RTX, 1);
4837 insn = get_last_insn ();
4838 set_dst_reg_note (insn, REG_EQUAL,
4839 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4840 compute_mode, op0, op1),
4841 quotient);
4843 break;
4845 case ROUND_DIV_EXPR:
4846 case ROUND_MOD_EXPR:
4847 if (unsignedp)
4849 rtx tem;
4850 rtx label;
4851 label = gen_label_rtx ();
4852 quotient = gen_reg_rtx (compute_mode);
4853 remainder = gen_reg_rtx (compute_mode);
4854 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4856 rtx tem;
4857 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4858 quotient, 1, OPTAB_LIB_WIDEN);
4859 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4860 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4861 remainder, 1, OPTAB_LIB_WIDEN);
4863 tem = plus_constant (compute_mode, op1, -1);
4864 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4865 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4866 expand_inc (quotient, const1_rtx);
4867 expand_dec (remainder, op1);
4868 emit_label (label);
4870 else
4872 rtx abs_rem, abs_op1, tem, mask;
4873 rtx label;
4874 label = gen_label_rtx ();
4875 quotient = gen_reg_rtx (compute_mode);
4876 remainder = gen_reg_rtx (compute_mode);
4877 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4879 rtx tem;
4880 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4881 quotient, 0, OPTAB_LIB_WIDEN);
4882 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4883 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4884 remainder, 0, OPTAB_LIB_WIDEN);
4886 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4887 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4888 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4889 1, NULL_RTX, 1);
4890 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4891 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4892 NULL_RTX, 0, OPTAB_WIDEN);
4893 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4894 size - 1, NULL_RTX, 0);
4895 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4896 NULL_RTX, 0, OPTAB_WIDEN);
4897 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4898 NULL_RTX, 0, OPTAB_WIDEN);
4899 expand_inc (quotient, tem);
4900 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4901 NULL_RTX, 0, OPTAB_WIDEN);
4902 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4903 NULL_RTX, 0, OPTAB_WIDEN);
4904 expand_dec (remainder, tem);
4905 emit_label (label);
4907 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4909 default:
4910 gcc_unreachable ();
4913 if (quotient == 0)
4915 if (target && GET_MODE (target) != compute_mode)
4916 target = 0;
4918 if (rem_flag)
4920 /* Try to produce the remainder without producing the quotient.
4921 If we seem to have a divmod pattern that does not require widening,
4922 don't try widening here. We should really have a WIDEN argument
4923 to expand_twoval_binop, since what we'd really like to do here is
4924 1) try a mod insn in compute_mode
4925 2) try a divmod insn in compute_mode
4926 3) try a div insn in compute_mode and multiply-subtract to get
4927 remainder
4928 4) try the same things with widening allowed. */
4929 remainder
4930 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4931 op0, op1, target,
4932 unsignedp,
4933 ((optab_handler (optab2, compute_mode)
4934 != CODE_FOR_nothing)
4935 ? OPTAB_DIRECT : OPTAB_WIDEN));
4936 if (remainder == 0)
4938 /* No luck there. Can we do remainder and divide at once
4939 without a library call? */
4940 remainder = gen_reg_rtx (compute_mode);
4941 if (! expand_twoval_binop ((unsignedp
4942 ? udivmod_optab
4943 : sdivmod_optab),
4944 op0, op1,
4945 NULL_RTX, remainder, unsignedp))
4946 remainder = 0;
4949 if (remainder)
4950 return gen_lowpart (mode, remainder);
4953 /* Produce the quotient. Try a quotient insn, but not a library call.
4954 If we have a divmod in this mode, use it in preference to widening
4955 the div (for this test we assume it will not fail). Note that optab2
4956 is set to the one of the two optabs that the call below will use. */
4957 quotient
4958 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4959 op0, op1, rem_flag ? NULL_RTX : target,
4960 unsignedp,
4961 ((optab_handler (optab2, compute_mode)
4962 != CODE_FOR_nothing)
4963 ? OPTAB_DIRECT : OPTAB_WIDEN));
4965 if (quotient == 0)
4967 /* No luck there. Try a quotient-and-remainder insn,
4968 keeping the quotient alone. */
4969 quotient = gen_reg_rtx (compute_mode);
4970 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4971 op0, op1,
4972 quotient, NULL_RTX, unsignedp))
4974 quotient = 0;
4975 if (! rem_flag)
4976 /* Still no luck. If we are not computing the remainder,
4977 use a library call for the quotient. */
4978 quotient = sign_expand_binop (compute_mode,
4979 udiv_optab, sdiv_optab,
4980 op0, op1, target,
4981 unsignedp, OPTAB_LIB_WIDEN);
4986 if (rem_flag)
4988 if (target && GET_MODE (target) != compute_mode)
4989 target = 0;
4991 if (quotient == 0)
4993 /* No divide instruction either. Use library for remainder. */
4994 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4995 op0, op1, target,
4996 unsignedp, OPTAB_LIB_WIDEN);
4997 /* No remainder function. Try a quotient-and-remainder
4998 function, keeping the remainder. */
4999 if (!remainder)
5001 remainder = gen_reg_rtx (compute_mode);
5002 if (!expand_twoval_binop_libfunc
5003 (unsignedp ? udivmod_optab : sdivmod_optab,
5004 op0, op1,
5005 NULL_RTX, remainder,
5006 unsignedp ? UMOD : MOD))
5007 remainder = NULL_RTX;
5010 else
5012 /* We divided. Now finish doing X - Y * (X / Y). */
5013 remainder = expand_mult (compute_mode, quotient, op1,
5014 NULL_RTX, unsignedp);
5015 remainder = expand_binop (compute_mode, sub_optab, op0,
5016 remainder, target, unsignedp,
5017 OPTAB_LIB_WIDEN);
5021 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5024 /* Return a tree node with data type TYPE, describing the value of X.
5025 Usually this is an VAR_DECL, if there is no obvious better choice.
5026 X may be an expression, however we only support those expressions
5027 generated by loop.c. */
5029 tree
5030 make_tree (tree type, rtx x)
5032 tree t;
5034 switch (GET_CODE (x))
5036 case CONST_INT:
5038 HOST_WIDE_INT hi = 0;
5040 if (INTVAL (x) < 0
5041 && !(TYPE_UNSIGNED (type)
5042 && (GET_MODE_BITSIZE (TYPE_MODE (type))
5043 < HOST_BITS_PER_WIDE_INT)))
5044 hi = -1;
5046 t = build_int_cst_wide (type, INTVAL (x), hi);
5048 return t;
5051 case CONST_DOUBLE:
5052 if (GET_MODE (x) == VOIDmode)
5053 t = build_int_cst_wide (type,
5054 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5055 else
5057 REAL_VALUE_TYPE d;
5059 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5060 t = build_real (type, d);
5063 return t;
5065 case CONST_VECTOR:
5067 int units = CONST_VECTOR_NUNITS (x);
5068 tree itype = TREE_TYPE (type);
5069 tree *elts;
5070 int i;
5072 /* Build a tree with vector elements. */
5073 elts = XALLOCAVEC (tree, units);
5074 for (i = units - 1; i >= 0; --i)
5076 rtx elt = CONST_VECTOR_ELT (x, i);
5077 elts[i] = make_tree (itype, elt);
5080 return build_vector (type, elts);
5083 case PLUS:
5084 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5085 make_tree (type, XEXP (x, 1)));
5087 case MINUS:
5088 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5089 make_tree (type, XEXP (x, 1)));
5091 case NEG:
5092 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5094 case MULT:
5095 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5096 make_tree (type, XEXP (x, 1)));
5098 case ASHIFT:
5099 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5100 make_tree (type, XEXP (x, 1)));
5102 case LSHIFTRT:
5103 t = unsigned_type_for (type);
5104 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5105 make_tree (t, XEXP (x, 0)),
5106 make_tree (type, XEXP (x, 1))));
5108 case ASHIFTRT:
5109 t = signed_type_for (type);
5110 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5111 make_tree (t, XEXP (x, 0)),
5112 make_tree (type, XEXP (x, 1))));
5114 case DIV:
5115 if (TREE_CODE (type) != REAL_TYPE)
5116 t = signed_type_for (type);
5117 else
5118 t = type;
5120 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5121 make_tree (t, XEXP (x, 0)),
5122 make_tree (t, XEXP (x, 1))));
5123 case UDIV:
5124 t = unsigned_type_for (type);
5125 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5126 make_tree (t, XEXP (x, 0)),
5127 make_tree (t, XEXP (x, 1))));
5129 case SIGN_EXTEND:
5130 case ZERO_EXTEND:
5131 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5132 GET_CODE (x) == ZERO_EXTEND);
5133 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5135 case CONST:
5136 return make_tree (type, XEXP (x, 0));
5138 case SYMBOL_REF:
5139 t = SYMBOL_REF_DECL (x);
5140 if (t)
5141 return fold_convert (type, build_fold_addr_expr (t));
5142 /* else fall through. */
5144 default:
5145 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5147 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5148 address mode to pointer mode. */
5149 if (POINTER_TYPE_P (type))
5150 x = convert_memory_address_addr_space
5151 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5153 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5154 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5155 t->decl_with_rtl.rtl = x;
5157 return t;
5161 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5162 and returning TARGET.
5164 If TARGET is 0, a pseudo-register or constant is returned. */
5167 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5169 rtx tem = 0;
5171 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5172 tem = simplify_binary_operation (AND, mode, op0, op1);
5173 if (tem == 0)
5174 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5176 if (target == 0)
5177 target = tem;
5178 else if (tem != target)
5179 emit_move_insn (target, tem);
5180 return target;
5183 /* Helper function for emit_store_flag. */
5184 static rtx
5185 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5186 enum machine_mode mode, enum machine_mode compare_mode,
5187 int unsignedp, rtx x, rtx y, int normalizep,
5188 enum machine_mode target_mode)
5190 struct expand_operand ops[4];
5191 rtx op0, last, comparison, subtarget;
5192 enum machine_mode result_mode = targetm.cstore_mode (icode);
5194 last = get_last_insn ();
5195 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5196 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5197 if (!x || !y)
5199 delete_insns_since (last);
5200 return NULL_RTX;
5203 if (target_mode == VOIDmode)
5204 target_mode = result_mode;
5205 if (!target)
5206 target = gen_reg_rtx (target_mode);
5208 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5210 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5211 create_fixed_operand (&ops[1], comparison);
5212 create_fixed_operand (&ops[2], x);
5213 create_fixed_operand (&ops[3], y);
5214 if (!maybe_expand_insn (icode, 4, ops))
5216 delete_insns_since (last);
5217 return NULL_RTX;
5219 subtarget = ops[0].value;
5221 /* If we are converting to a wider mode, first convert to
5222 TARGET_MODE, then normalize. This produces better combining
5223 opportunities on machines that have a SIGN_EXTRACT when we are
5224 testing a single bit. This mostly benefits the 68k.
5226 If STORE_FLAG_VALUE does not have the sign bit set when
5227 interpreted in MODE, we can do this conversion as unsigned, which
5228 is usually more efficient. */
5229 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5231 convert_move (target, subtarget,
5232 val_signbit_known_clear_p (result_mode,
5233 STORE_FLAG_VALUE));
5234 op0 = target;
5235 result_mode = target_mode;
5237 else
5238 op0 = subtarget;
5240 /* If we want to keep subexpressions around, don't reuse our last
5241 target. */
5242 if (optimize)
5243 subtarget = 0;
5245 /* Now normalize to the proper value in MODE. Sometimes we don't
5246 have to do anything. */
5247 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5249 /* STORE_FLAG_VALUE might be the most negative number, so write
5250 the comparison this way to avoid a compiler-time warning. */
5251 else if (- normalizep == STORE_FLAG_VALUE)
5252 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5254 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5255 it hard to use a value of just the sign bit due to ANSI integer
5256 constant typing rules. */
5257 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5258 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5259 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5260 normalizep == 1);
5261 else
5263 gcc_assert (STORE_FLAG_VALUE & 1);
5265 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5266 if (normalizep == -1)
5267 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5270 /* If we were converting to a smaller mode, do the conversion now. */
5271 if (target_mode != result_mode)
5273 convert_move (target, op0, 0);
5274 return target;
5276 else
5277 return op0;
5281 /* A subroutine of emit_store_flag only including "tricks" that do not
5282 need a recursive call. These are kept separate to avoid infinite
5283 loops. */
5285 static rtx
5286 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5287 enum machine_mode mode, int unsignedp, int normalizep,
5288 enum machine_mode target_mode)
5290 rtx subtarget;
5291 enum insn_code icode;
5292 enum machine_mode compare_mode;
5293 enum mode_class mclass;
5294 enum rtx_code scode;
5295 rtx tem;
5297 if (unsignedp)
5298 code = unsigned_condition (code);
5299 scode = swap_condition (code);
5301 /* If one operand is constant, make it the second one. Only do this
5302 if the other operand is not constant as well. */
5304 if (swap_commutative_operands_p (op0, op1))
5306 tem = op0;
5307 op0 = op1;
5308 op1 = tem;
5309 code = swap_condition (code);
5312 if (mode == VOIDmode)
5313 mode = GET_MODE (op0);
5315 /* For some comparisons with 1 and -1, we can convert this to
5316 comparisons with zero. This will often produce more opportunities for
5317 store-flag insns. */
5319 switch (code)
5321 case LT:
5322 if (op1 == const1_rtx)
5323 op1 = const0_rtx, code = LE;
5324 break;
5325 case LE:
5326 if (op1 == constm1_rtx)
5327 op1 = const0_rtx, code = LT;
5328 break;
5329 case GE:
5330 if (op1 == const1_rtx)
5331 op1 = const0_rtx, code = GT;
5332 break;
5333 case GT:
5334 if (op1 == constm1_rtx)
5335 op1 = const0_rtx, code = GE;
5336 break;
5337 case GEU:
5338 if (op1 == const1_rtx)
5339 op1 = const0_rtx, code = NE;
5340 break;
5341 case LTU:
5342 if (op1 == const1_rtx)
5343 op1 = const0_rtx, code = EQ;
5344 break;
5345 default:
5346 break;
5349 /* If we are comparing a double-word integer with zero or -1, we can
5350 convert the comparison into one involving a single word. */
5351 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5352 && GET_MODE_CLASS (mode) == MODE_INT
5353 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5355 if ((code == EQ || code == NE)
5356 && (op1 == const0_rtx || op1 == constm1_rtx))
5358 rtx op00, op01;
5360 /* Do a logical OR or AND of the two words and compare the
5361 result. */
5362 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5363 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5364 tem = expand_binop (word_mode,
5365 op1 == const0_rtx ? ior_optab : and_optab,
5366 op00, op01, NULL_RTX, unsignedp,
5367 OPTAB_DIRECT);
5369 if (tem != 0)
5370 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5371 unsignedp, normalizep);
5373 else if ((code == LT || code == GE) && op1 == const0_rtx)
5375 rtx op0h;
5377 /* If testing the sign bit, can just test on high word. */
5378 op0h = simplify_gen_subreg (word_mode, op0, mode,
5379 subreg_highpart_offset (word_mode,
5380 mode));
5381 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5382 unsignedp, normalizep);
5384 else
5385 tem = NULL_RTX;
5387 if (tem)
5389 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5390 return tem;
5391 if (!target)
5392 target = gen_reg_rtx (target_mode);
5394 convert_move (target, tem,
5395 !val_signbit_known_set_p (word_mode,
5396 (normalizep ? normalizep
5397 : STORE_FLAG_VALUE)));
5398 return target;
5402 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5403 complement of A (for GE) and shifting the sign bit to the low bit. */
5404 if (op1 == const0_rtx && (code == LT || code == GE)
5405 && GET_MODE_CLASS (mode) == MODE_INT
5406 && (normalizep || STORE_FLAG_VALUE == 1
5407 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5409 subtarget = target;
5411 if (!target)
5412 target_mode = mode;
5414 /* If the result is to be wider than OP0, it is best to convert it
5415 first. If it is to be narrower, it is *incorrect* to convert it
5416 first. */
5417 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5419 op0 = convert_modes (target_mode, mode, op0, 0);
5420 mode = target_mode;
5423 if (target_mode != mode)
5424 subtarget = 0;
5426 if (code == GE)
5427 op0 = expand_unop (mode, one_cmpl_optab, op0,
5428 ((STORE_FLAG_VALUE == 1 || normalizep)
5429 ? 0 : subtarget), 0);
5431 if (STORE_FLAG_VALUE == 1 || normalizep)
5432 /* If we are supposed to produce a 0/1 value, we want to do
5433 a logical shift from the sign bit to the low-order bit; for
5434 a -1/0 value, we do an arithmetic shift. */
5435 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5436 GET_MODE_BITSIZE (mode) - 1,
5437 subtarget, normalizep != -1);
5439 if (mode != target_mode)
5440 op0 = convert_modes (target_mode, mode, op0, 0);
5442 return op0;
5445 mclass = GET_MODE_CLASS (mode);
5446 for (compare_mode = mode; compare_mode != VOIDmode;
5447 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5449 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5450 icode = optab_handler (cstore_optab, optab_mode);
5451 if (icode != CODE_FOR_nothing)
5453 do_pending_stack_adjust ();
5454 tem = emit_cstore (target, icode, code, mode, compare_mode,
5455 unsignedp, op0, op1, normalizep, target_mode);
5456 if (tem)
5457 return tem;
5459 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5461 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5462 unsignedp, op1, op0, normalizep, target_mode);
5463 if (tem)
5464 return tem;
5466 break;
5470 return 0;
5473 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5474 and storing in TARGET. Normally return TARGET.
5475 Return 0 if that cannot be done.
5477 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5478 it is VOIDmode, they cannot both be CONST_INT.
5480 UNSIGNEDP is for the case where we have to widen the operands
5481 to perform the operation. It says to use zero-extension.
5483 NORMALIZEP is 1 if we should convert the result to be either zero
5484 or one. Normalize is -1 if we should convert the result to be
5485 either zero or -1. If NORMALIZEP is zero, the result will be left
5486 "raw" out of the scc insn. */
5489 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5490 enum machine_mode mode, int unsignedp, int normalizep)
5492 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5493 enum rtx_code rcode;
5494 rtx subtarget;
5495 rtx tem, last, trueval;
5497 /* If we compare constants, we shouldn't use a store-flag operation,
5498 but a constant load. We can get there via the vanilla route that
5499 usually generates a compare-branch sequence, but will in this case
5500 fold the comparison to a constant, and thus elide the branch. */
5501 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5502 return NULL_RTX;
5504 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5505 target_mode);
5506 if (tem)
5507 return tem;
5509 /* If we reached here, we can't do this with a scc insn, however there
5510 are some comparisons that can be done in other ways. Don't do any
5511 of these cases if branches are very cheap. */
5512 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5513 return 0;
5515 /* See what we need to return. We can only return a 1, -1, or the
5516 sign bit. */
5518 if (normalizep == 0)
5520 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5521 normalizep = STORE_FLAG_VALUE;
5523 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5525 else
5526 return 0;
5529 last = get_last_insn ();
5531 /* If optimizing, use different pseudo registers for each insn, instead
5532 of reusing the same pseudo. This leads to better CSE, but slows
5533 down the compiler, since there are more pseudos */
5534 subtarget = (!optimize
5535 && (target_mode == mode)) ? target : NULL_RTX;
5536 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5538 /* For floating-point comparisons, try the reverse comparison or try
5539 changing the "orderedness" of the comparison. */
5540 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5542 enum rtx_code first_code;
5543 bool and_them;
5545 rcode = reverse_condition_maybe_unordered (code);
5546 if (can_compare_p (rcode, mode, ccp_store_flag)
5547 && (code == ORDERED || code == UNORDERED
5548 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5549 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5551 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5552 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5554 /* For the reverse comparison, use either an addition or a XOR. */
5555 if (want_add
5556 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5557 optimize_insn_for_speed_p ()) == 0)
5559 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5560 STORE_FLAG_VALUE, target_mode);
5561 if (tem)
5562 return expand_binop (target_mode, add_optab, tem,
5563 gen_int_mode (normalizep, target_mode),
5564 target, 0, OPTAB_WIDEN);
5566 else if (!want_add
5567 && rtx_cost (trueval, XOR, 1,
5568 optimize_insn_for_speed_p ()) == 0)
5570 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5571 normalizep, target_mode);
5572 if (tem)
5573 return expand_binop (target_mode, xor_optab, tem, trueval,
5574 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5578 delete_insns_since (last);
5580 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5581 if (code == ORDERED || code == UNORDERED)
5582 return 0;
5584 and_them = split_comparison (code, mode, &first_code, &code);
5586 /* If there are no NaNs, the first comparison should always fall through.
5587 Effectively change the comparison to the other one. */
5588 if (!HONOR_NANS (mode))
5590 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5591 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5592 target_mode);
5595 #ifdef HAVE_conditional_move
5596 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5597 conditional move. */
5598 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5599 normalizep, target_mode);
5600 if (tem == 0)
5601 return 0;
5603 if (and_them)
5604 tem = emit_conditional_move (target, code, op0, op1, mode,
5605 tem, const0_rtx, GET_MODE (tem), 0);
5606 else
5607 tem = emit_conditional_move (target, code, op0, op1, mode,
5608 trueval, tem, GET_MODE (tem), 0);
5610 if (tem == 0)
5611 delete_insns_since (last);
5612 return tem;
5613 #else
5614 return 0;
5615 #endif
5618 /* The remaining tricks only apply to integer comparisons. */
5620 if (GET_MODE_CLASS (mode) != MODE_INT)
5621 return 0;
5623 /* If this is an equality comparison of integers, we can try to exclusive-or
5624 (or subtract) the two operands and use a recursive call to try the
5625 comparison with zero. Don't do any of these cases if branches are
5626 very cheap. */
5628 if ((code == EQ || code == NE) && op1 != const0_rtx)
5630 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5631 OPTAB_WIDEN);
5633 if (tem == 0)
5634 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5635 OPTAB_WIDEN);
5636 if (tem != 0)
5637 tem = emit_store_flag (target, code, tem, const0_rtx,
5638 mode, unsignedp, normalizep);
5639 if (tem != 0)
5640 return tem;
5642 delete_insns_since (last);
5645 /* For integer comparisons, try the reverse comparison. However, for
5646 small X and if we'd have anyway to extend, implementing "X != 0"
5647 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5648 rcode = reverse_condition (code);
5649 if (can_compare_p (rcode, mode, ccp_store_flag)
5650 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5651 && code == NE
5652 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5653 && op1 == const0_rtx))
5655 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5656 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5658 /* Again, for the reverse comparison, use either an addition or a XOR. */
5659 if (want_add
5660 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5661 optimize_insn_for_speed_p ()) == 0)
5663 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5664 STORE_FLAG_VALUE, target_mode);
5665 if (tem != 0)
5666 tem = expand_binop (target_mode, add_optab, tem,
5667 gen_int_mode (normalizep, target_mode),
5668 target, 0, OPTAB_WIDEN);
5670 else if (!want_add
5671 && rtx_cost (trueval, XOR, 1,
5672 optimize_insn_for_speed_p ()) == 0)
5674 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5675 normalizep, target_mode);
5676 if (tem != 0)
5677 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5678 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5681 if (tem != 0)
5682 return tem;
5683 delete_insns_since (last);
5686 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5687 the constant zero. Reject all other comparisons at this point. Only
5688 do LE and GT if branches are expensive since they are expensive on
5689 2-operand machines. */
5691 if (op1 != const0_rtx
5692 || (code != EQ && code != NE
5693 && (BRANCH_COST (optimize_insn_for_speed_p (),
5694 false) <= 1 || (code != LE && code != GT))))
5695 return 0;
5697 /* Try to put the result of the comparison in the sign bit. Assume we can't
5698 do the necessary operation below. */
5700 tem = 0;
5702 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5703 the sign bit set. */
5705 if (code == LE)
5707 /* This is destructive, so SUBTARGET can't be OP0. */
5708 if (rtx_equal_p (subtarget, op0))
5709 subtarget = 0;
5711 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5712 OPTAB_WIDEN);
5713 if (tem)
5714 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5715 OPTAB_WIDEN);
5718 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5719 number of bits in the mode of OP0, minus one. */
5721 if (code == GT)
5723 if (rtx_equal_p (subtarget, op0))
5724 subtarget = 0;
5726 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5727 GET_MODE_BITSIZE (mode) - 1,
5728 subtarget, 0);
5729 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5730 OPTAB_WIDEN);
5733 if (code == EQ || code == NE)
5735 /* For EQ or NE, one way to do the comparison is to apply an operation
5736 that converts the operand into a positive number if it is nonzero
5737 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5738 for NE we negate. This puts the result in the sign bit. Then we
5739 normalize with a shift, if needed.
5741 Two operations that can do the above actions are ABS and FFS, so try
5742 them. If that doesn't work, and MODE is smaller than a full word,
5743 we can use zero-extension to the wider mode (an unsigned conversion)
5744 as the operation. */
5746 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5747 that is compensated by the subsequent overflow when subtracting
5748 one / negating. */
5750 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5751 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5752 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5753 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5754 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5756 tem = convert_modes (word_mode, mode, op0, 1);
5757 mode = word_mode;
5760 if (tem != 0)
5762 if (code == EQ)
5763 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5764 0, OPTAB_WIDEN);
5765 else
5766 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5769 /* If we couldn't do it that way, for NE we can "or" the two's complement
5770 of the value with itself. For EQ, we take the one's complement of
5771 that "or", which is an extra insn, so we only handle EQ if branches
5772 are expensive. */
5774 if (tem == 0
5775 && (code == NE
5776 || BRANCH_COST (optimize_insn_for_speed_p (),
5777 false) > 1))
5779 if (rtx_equal_p (subtarget, op0))
5780 subtarget = 0;
5782 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5783 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5784 OPTAB_WIDEN);
5786 if (tem && code == EQ)
5787 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5791 if (tem && normalizep)
5792 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5793 GET_MODE_BITSIZE (mode) - 1,
5794 subtarget, normalizep == 1);
5796 if (tem)
5798 if (!target)
5800 else if (GET_MODE (tem) != target_mode)
5802 convert_move (target, tem, 0);
5803 tem = target;
5805 else if (!subtarget)
5807 emit_move_insn (target, tem);
5808 tem = target;
5811 else
5812 delete_insns_since (last);
5814 return tem;
5817 /* Like emit_store_flag, but always succeeds. */
5820 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5821 enum machine_mode mode, int unsignedp, int normalizep)
5823 rtx tem, label;
5824 rtx trueval, falseval;
5826 /* First see if emit_store_flag can do the job. */
5827 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5828 if (tem != 0)
5829 return tem;
5831 if (!target)
5832 target = gen_reg_rtx (word_mode);
5834 /* If this failed, we have to do this with set/compare/jump/set code.
5835 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5836 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5837 if (code == NE
5838 && GET_MODE_CLASS (mode) == MODE_INT
5839 && REG_P (target)
5840 && op0 == target
5841 && op1 == const0_rtx)
5843 label = gen_label_rtx ();
5844 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5845 mode, NULL_RTX, NULL_RTX, label, -1);
5846 emit_move_insn (target, trueval);
5847 emit_label (label);
5848 return target;
5851 if (!REG_P (target)
5852 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5853 target = gen_reg_rtx (GET_MODE (target));
5855 /* Jump in the right direction if the target cannot implement CODE
5856 but can jump on its reverse condition. */
5857 falseval = const0_rtx;
5858 if (! can_compare_p (code, mode, ccp_jump)
5859 && (! FLOAT_MODE_P (mode)
5860 || code == ORDERED || code == UNORDERED
5861 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5862 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5864 enum rtx_code rcode;
5865 if (FLOAT_MODE_P (mode))
5866 rcode = reverse_condition_maybe_unordered (code);
5867 else
5868 rcode = reverse_condition (code);
5870 /* Canonicalize to UNORDERED for the libcall. */
5871 if (can_compare_p (rcode, mode, ccp_jump)
5872 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5874 falseval = trueval;
5875 trueval = const0_rtx;
5876 code = rcode;
5880 emit_move_insn (target, trueval);
5881 label = gen_label_rtx ();
5882 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5883 NULL_RTX, label, -1);
5885 emit_move_insn (target, falseval);
5886 emit_label (label);
5888 return target;
5891 /* Perform possibly multi-word comparison and conditional jump to LABEL
5892 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5893 now a thin wrapper around do_compare_rtx_and_jump. */
5895 static void
5896 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5897 rtx label)
5899 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5900 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5901 NULL_RTX, NULL_RTX, label, -1);