Enabled Usage of _Cilk_spawn and _Cilk_sync in Cilk Runtime (libcilkrts).
[official-gcc.git] / gcc / expmed.c
blob3a6c919ce22267f064538a4b922230d100aa11a5
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2013 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "tm_p.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "expr.h"
34 #include "optabs.h"
35 #include "recog.h"
36 #include "langhooks.h"
37 #include "df.h"
38 #include "target.h"
39 #include "expmed.h"
41 struct target_expmed default_target_expmed;
42 #if SWITCHABLE_TARGET
43 struct target_expmed *this_target_expmed = &default_target_expmed;
44 #endif
46 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 rtx);
51 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT, rtx, int);
62 static rtx extract_fixed_bit_field_1 (enum machine_mode, rtx,
63 unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT, rtx, int);
65 static rtx mask_rtx (enum machine_mode, int, int, int);
66 static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
68 unsigned HOST_WIDE_INT, int);
69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
70 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
71 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
73 /* Test whether a value is zero of a power of two. */
74 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
75 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
77 struct init_expmed_rtl
79 struct rtx_def reg;
80 struct rtx_def plus;
81 struct rtx_def neg;
82 struct rtx_def mult;
83 struct rtx_def sdiv;
84 struct rtx_def udiv;
85 struct rtx_def sdiv_32;
86 struct rtx_def smod_32;
87 struct rtx_def wide_mult;
88 struct rtx_def wide_lshr;
89 struct rtx_def wide_trunc;
90 struct rtx_def shift;
91 struct rtx_def shift_mult;
92 struct rtx_def shift_add;
93 struct rtx_def shift_sub0;
94 struct rtx_def shift_sub1;
95 struct rtx_def zext;
96 struct rtx_def trunc;
98 rtx pow2[MAX_BITS_PER_WORD];
99 rtx cint[MAX_BITS_PER_WORD];
102 static void
103 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
104 enum machine_mode from_mode, bool speed)
106 int to_size, from_size;
107 rtx which;
109 /* We're given no information about the true size of a partial integer,
110 only the size of the "full" integer it requires for storage. For
111 comparison purposes here, reduce the bit size by one in that case. */
112 to_size = (GET_MODE_BITSIZE (to_mode)
113 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
114 from_size = (GET_MODE_BITSIZE (from_mode)
115 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
117 /* Assume cost of zero-extend and sign-extend is the same. */
118 which = (to_size < from_size ? &all->trunc : &all->zext);
120 PUT_MODE (&all->reg, from_mode);
121 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
124 static void
125 init_expmed_one_mode (struct init_expmed_rtl *all,
126 enum machine_mode mode, int speed)
128 int m, n, mode_bitsize;
129 enum machine_mode mode_from;
131 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
133 PUT_MODE (&all->reg, mode);
134 PUT_MODE (&all->plus, mode);
135 PUT_MODE (&all->neg, mode);
136 PUT_MODE (&all->mult, mode);
137 PUT_MODE (&all->sdiv, mode);
138 PUT_MODE (&all->udiv, mode);
139 PUT_MODE (&all->sdiv_32, mode);
140 PUT_MODE (&all->smod_32, mode);
141 PUT_MODE (&all->wide_trunc, mode);
142 PUT_MODE (&all->shift, mode);
143 PUT_MODE (&all->shift_mult, mode);
144 PUT_MODE (&all->shift_add, mode);
145 PUT_MODE (&all->shift_sub0, mode);
146 PUT_MODE (&all->shift_sub1, mode);
147 PUT_MODE (&all->zext, mode);
148 PUT_MODE (&all->trunc, mode);
150 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
151 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
152 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
153 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
154 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
156 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
157 <= 2 * add_cost (speed, mode)));
158 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
159 <= 4 * add_cost (speed, mode)));
161 set_shift_cost (speed, mode, 0, 0);
163 int cost = add_cost (speed, mode);
164 set_shiftadd_cost (speed, mode, 0, cost);
165 set_shiftsub0_cost (speed, mode, 0, cost);
166 set_shiftsub1_cost (speed, mode, 0, cost);
169 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
170 for (m = 1; m < n; m++)
172 XEXP (&all->shift, 1) = all->cint[m];
173 XEXP (&all->shift_mult, 1) = all->pow2[m];
175 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
176 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
177 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
178 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
181 if (SCALAR_INT_MODE_P (mode))
183 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
184 mode_from = (enum machine_mode)(mode_from + 1))
185 init_expmed_one_conv (all, mode, mode_from, speed);
187 if (GET_MODE_CLASS (mode) == MODE_INT)
189 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
190 if (wider_mode != VOIDmode)
192 PUT_MODE (&all->zext, wider_mode);
193 PUT_MODE (&all->wide_mult, wider_mode);
194 PUT_MODE (&all->wide_lshr, wider_mode);
195 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
197 set_mul_widen_cost (speed, wider_mode,
198 set_src_cost (&all->wide_mult, speed));
199 set_mul_highpart_cost (speed, mode,
200 set_src_cost (&all->wide_trunc, speed));
205 void
206 init_expmed (void)
208 struct init_expmed_rtl all;
209 enum machine_mode mode;
210 int m, speed;
212 memset (&all, 0, sizeof all);
213 for (m = 1; m < MAX_BITS_PER_WORD; m++)
215 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
216 all.cint[m] = GEN_INT (m);
219 PUT_CODE (&all.reg, REG);
220 /* Avoid using hard regs in ways which may be unsupported. */
221 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
223 PUT_CODE (&all.plus, PLUS);
224 XEXP (&all.plus, 0) = &all.reg;
225 XEXP (&all.plus, 1) = &all.reg;
227 PUT_CODE (&all.neg, NEG);
228 XEXP (&all.neg, 0) = &all.reg;
230 PUT_CODE (&all.mult, MULT);
231 XEXP (&all.mult, 0) = &all.reg;
232 XEXP (&all.mult, 1) = &all.reg;
234 PUT_CODE (&all.sdiv, DIV);
235 XEXP (&all.sdiv, 0) = &all.reg;
236 XEXP (&all.sdiv, 1) = &all.reg;
238 PUT_CODE (&all.udiv, UDIV);
239 XEXP (&all.udiv, 0) = &all.reg;
240 XEXP (&all.udiv, 1) = &all.reg;
242 PUT_CODE (&all.sdiv_32, DIV);
243 XEXP (&all.sdiv_32, 0) = &all.reg;
244 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
246 PUT_CODE (&all.smod_32, MOD);
247 XEXP (&all.smod_32, 0) = &all.reg;
248 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
250 PUT_CODE (&all.zext, ZERO_EXTEND);
251 XEXP (&all.zext, 0) = &all.reg;
253 PUT_CODE (&all.wide_mult, MULT);
254 XEXP (&all.wide_mult, 0) = &all.zext;
255 XEXP (&all.wide_mult, 1) = &all.zext;
257 PUT_CODE (&all.wide_lshr, LSHIFTRT);
258 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
260 PUT_CODE (&all.wide_trunc, TRUNCATE);
261 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
263 PUT_CODE (&all.shift, ASHIFT);
264 XEXP (&all.shift, 0) = &all.reg;
266 PUT_CODE (&all.shift_mult, MULT);
267 XEXP (&all.shift_mult, 0) = &all.reg;
269 PUT_CODE (&all.shift_add, PLUS);
270 XEXP (&all.shift_add, 0) = &all.shift_mult;
271 XEXP (&all.shift_add, 1) = &all.reg;
273 PUT_CODE (&all.shift_sub0, MINUS);
274 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
275 XEXP (&all.shift_sub0, 1) = &all.reg;
277 PUT_CODE (&all.shift_sub1, MINUS);
278 XEXP (&all.shift_sub1, 0) = &all.reg;
279 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
281 PUT_CODE (&all.trunc, TRUNCATE);
282 XEXP (&all.trunc, 0) = &all.reg;
284 for (speed = 0; speed < 2; speed++)
286 crtl->maybe_hot_insn_p = speed;
287 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
289 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
290 mode = (enum machine_mode)(mode + 1))
291 init_expmed_one_mode (&all, mode, speed);
293 if (MIN_MODE_PARTIAL_INT != VOIDmode)
294 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
295 mode = (enum machine_mode)(mode + 1))
296 init_expmed_one_mode (&all, mode, speed);
298 if (MIN_MODE_VECTOR_INT != VOIDmode)
299 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
300 mode = (enum machine_mode)(mode + 1))
301 init_expmed_one_mode (&all, mode, speed);
304 if (alg_hash_used_p ())
306 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
307 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
309 else
310 set_alg_hash_used_p (true);
311 default_rtl_profile ();
314 /* Return an rtx representing minus the value of X.
315 MODE is the intended mode of the result,
316 useful if X is a CONST_INT. */
319 negate_rtx (enum machine_mode mode, rtx x)
321 rtx result = simplify_unary_operation (NEG, mode, x, mode);
323 if (result == 0)
324 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
326 return result;
329 /* Adjust bitfield memory MEM so that it points to the first unit of mode
330 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
331 If MODE is BLKmode, return a reference to every byte in the bitfield.
332 Set *NEW_BITNUM to the bit position of the field within the new memory. */
334 static rtx
335 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
336 unsigned HOST_WIDE_INT bitsize,
337 unsigned HOST_WIDE_INT bitnum,
338 unsigned HOST_WIDE_INT *new_bitnum)
340 if (mode == BLKmode)
342 *new_bitnum = bitnum % BITS_PER_UNIT;
343 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
344 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
345 / BITS_PER_UNIT);
346 return adjust_bitfield_address_size (mem, mode, offset, size);
348 else
350 unsigned int unit = GET_MODE_BITSIZE (mode);
351 *new_bitnum = bitnum % unit;
352 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
353 return adjust_bitfield_address (mem, mode, offset);
357 /* The caller wants to perform insertion or extraction PATTERN on a
358 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
359 BITREGION_START and BITREGION_END are as for store_bit_field
360 and FIELDMODE is the natural mode of the field.
362 Search for a mode that is compatible with the memory access
363 restrictions and (where applicable) with a register insertion or
364 extraction. Return the new memory on success, storing the adjusted
365 bit position in *NEW_BITNUM. Return null otherwise. */
367 static rtx
368 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
369 rtx op0, HOST_WIDE_INT bitsize,
370 HOST_WIDE_INT bitnum,
371 unsigned HOST_WIDE_INT bitregion_start,
372 unsigned HOST_WIDE_INT bitregion_end,
373 enum machine_mode fieldmode,
374 unsigned HOST_WIDE_INT *new_bitnum)
376 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
377 bitregion_end, MEM_ALIGN (op0),
378 MEM_VOLATILE_P (op0));
379 enum machine_mode best_mode;
380 if (iter.next_mode (&best_mode))
382 /* We can use a memory in BEST_MODE. See whether this is true for
383 any wider modes. All other things being equal, we prefer to
384 use the widest mode possible because it tends to expose more
385 CSE opportunities. */
386 if (!iter.prefer_smaller_modes ())
388 /* Limit the search to the mode required by the corresponding
389 register insertion or extraction instruction, if any. */
390 enum machine_mode limit_mode = word_mode;
391 extraction_insn insn;
392 if (get_best_reg_extraction_insn (&insn, pattern,
393 GET_MODE_BITSIZE (best_mode),
394 fieldmode))
395 limit_mode = insn.field_mode;
397 enum machine_mode wider_mode;
398 while (iter.next_mode (&wider_mode)
399 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
400 best_mode = wider_mode;
402 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
403 new_bitnum);
405 return NULL_RTX;
408 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
409 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
410 offset is then BITNUM / BITS_PER_UNIT. */
412 static bool
413 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
414 unsigned HOST_WIDE_INT bitsize,
415 enum machine_mode struct_mode)
417 if (BYTES_BIG_ENDIAN)
418 return (bitnum % BITS_PER_UNIT == 0
419 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
420 || (bitnum + bitsize) % BITS_PER_WORD == 0));
421 else
422 return bitnum % BITS_PER_WORD == 0;
425 /* Return true if -fstrict-volatile-bitfields applies an access of OP0
426 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
427 Return false if the access would touch memory outside the range
428 BITREGION_START to BITREGION_END for conformance to the C++ memory
429 model. */
431 static bool
432 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
433 unsigned HOST_WIDE_INT bitnum,
434 enum machine_mode fieldmode,
435 unsigned HOST_WIDE_INT bitregion_start,
436 unsigned HOST_WIDE_INT bitregion_end)
438 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
440 /* -fstrict-volatile-bitfields must be enabled and we must have a
441 volatile MEM. */
442 if (!MEM_P (op0)
443 || !MEM_VOLATILE_P (op0)
444 || flag_strict_volatile_bitfields <= 0)
445 return false;
447 /* Non-integral modes likely only happen with packed structures.
448 Punt. */
449 if (!SCALAR_INT_MODE_P (fieldmode))
450 return false;
452 /* The bit size must not be larger than the field mode, and
453 the field mode must not be larger than a word. */
454 if (bitsize > modesize || modesize > BITS_PER_WORD)
455 return false;
457 /* Check for cases of unaligned fields that must be split. */
458 if (bitnum % BITS_PER_UNIT + bitsize > modesize
459 || (STRICT_ALIGNMENT
460 && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
461 return false;
463 /* Check for cases where the C++ memory model applies. */
464 if (bitregion_end != 0
465 && (bitnum - bitnum % modesize < bitregion_start
466 || bitnum - bitnum % modesize + modesize > bitregion_end))
467 return false;
469 return true;
472 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
473 bit number BITNUM can be treated as a simple value of mode MODE. */
475 static bool
476 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
477 unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
479 return (MEM_P (op0)
480 && bitnum % BITS_PER_UNIT == 0
481 && bitsize == GET_MODE_BITSIZE (mode)
482 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
483 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
484 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
487 /* Try to use instruction INSV to store VALUE into a field of OP0.
488 BITSIZE and BITNUM are as for store_bit_field. */
490 static bool
491 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
492 unsigned HOST_WIDE_INT bitsize,
493 unsigned HOST_WIDE_INT bitnum, rtx value)
495 struct expand_operand ops[4];
496 rtx value1;
497 rtx xop0 = op0;
498 rtx last = get_last_insn ();
499 bool copy_back = false;
501 enum machine_mode op_mode = insv->field_mode;
502 unsigned int unit = GET_MODE_BITSIZE (op_mode);
503 if (bitsize == 0 || bitsize > unit)
504 return false;
506 if (MEM_P (xop0))
507 /* Get a reference to the first byte of the field. */
508 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
509 &bitnum);
510 else
512 /* Convert from counting within OP0 to counting in OP_MODE. */
513 if (BYTES_BIG_ENDIAN)
514 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
516 /* If xop0 is a register, we need it in OP_MODE
517 to make it acceptable to the format of insv. */
518 if (GET_CODE (xop0) == SUBREG)
519 /* We can't just change the mode, because this might clobber op0,
520 and we will need the original value of op0 if insv fails. */
521 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
522 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
523 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
526 /* If the destination is a paradoxical subreg such that we need a
527 truncate to the inner mode, perform the insertion on a temporary and
528 truncate the result to the original destination. Note that we can't
529 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
530 X) 0)) is (reg:N X). */
531 if (GET_CODE (xop0) == SUBREG
532 && REG_P (SUBREG_REG (xop0))
533 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
534 op_mode))
536 rtx tem = gen_reg_rtx (op_mode);
537 emit_move_insn (tem, xop0);
538 xop0 = tem;
539 copy_back = true;
542 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
543 "backwards" from the size of the unit we are inserting into.
544 Otherwise, we count bits from the most significant on a
545 BYTES/BITS_BIG_ENDIAN machine. */
547 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
548 bitnum = unit - bitsize - bitnum;
550 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
551 value1 = value;
552 if (GET_MODE (value) != op_mode)
554 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
556 /* Optimization: Don't bother really extending VALUE
557 if it has all the bits we will actually use. However,
558 if we must narrow it, be sure we do it correctly. */
560 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
562 rtx tmp;
564 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
565 if (! tmp)
566 tmp = simplify_gen_subreg (op_mode,
567 force_reg (GET_MODE (value),
568 value1),
569 GET_MODE (value), 0);
570 value1 = tmp;
572 else
573 value1 = gen_lowpart (op_mode, value1);
575 else if (CONST_INT_P (value))
576 value1 = gen_int_mode (INTVAL (value), op_mode);
577 else
578 /* Parse phase is supposed to make VALUE's data type
579 match that of the component reference, which is a type
580 at least as wide as the field; so VALUE should have
581 a mode that corresponds to that type. */
582 gcc_assert (CONSTANT_P (value));
585 create_fixed_operand (&ops[0], xop0);
586 create_integer_operand (&ops[1], bitsize);
587 create_integer_operand (&ops[2], bitnum);
588 create_input_operand (&ops[3], value1, op_mode);
589 if (maybe_expand_insn (insv->icode, 4, ops))
591 if (copy_back)
592 convert_move (op0, xop0, true);
593 return true;
595 delete_insns_since (last);
596 return false;
599 /* A subroutine of store_bit_field, with the same arguments. Return true
600 if the operation could be implemented.
602 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
603 no other way of implementing the operation. If FALLBACK_P is false,
604 return false instead. */
606 static bool
607 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
608 unsigned HOST_WIDE_INT bitnum,
609 unsigned HOST_WIDE_INT bitregion_start,
610 unsigned HOST_WIDE_INT bitregion_end,
611 enum machine_mode fieldmode,
612 rtx value, bool fallback_p)
614 rtx op0 = str_rtx;
615 rtx orig_value;
617 while (GET_CODE (op0) == SUBREG)
619 /* The following line once was done only if WORDS_BIG_ENDIAN,
620 but I think that is a mistake. WORDS_BIG_ENDIAN is
621 meaningful at a much higher level; when structures are copied
622 between memory and regs, the higher-numbered regs
623 always get higher addresses. */
624 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
625 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
626 int byte_offset = 0;
628 /* Paradoxical subregs need special handling on big endian machines. */
629 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
631 int difference = inner_mode_size - outer_mode_size;
633 if (WORDS_BIG_ENDIAN)
634 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
635 if (BYTES_BIG_ENDIAN)
636 byte_offset += difference % UNITS_PER_WORD;
638 else
639 byte_offset = SUBREG_BYTE (op0);
641 bitnum += byte_offset * BITS_PER_UNIT;
642 op0 = SUBREG_REG (op0);
645 /* No action is needed if the target is a register and if the field
646 lies completely outside that register. This can occur if the source
647 code contains an out-of-bounds access to a small array. */
648 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
649 return true;
651 /* Use vec_set patterns for inserting parts of vectors whenever
652 available. */
653 if (VECTOR_MODE_P (GET_MODE (op0))
654 && !MEM_P (op0)
655 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
656 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
657 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
658 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
660 struct expand_operand ops[3];
661 enum machine_mode outermode = GET_MODE (op0);
662 enum machine_mode innermode = GET_MODE_INNER (outermode);
663 enum insn_code icode = optab_handler (vec_set_optab, outermode);
664 int pos = bitnum / GET_MODE_BITSIZE (innermode);
666 create_fixed_operand (&ops[0], op0);
667 create_input_operand (&ops[1], value, innermode);
668 create_integer_operand (&ops[2], pos);
669 if (maybe_expand_insn (icode, 3, ops))
670 return true;
673 /* If the target is a register, overwriting the entire object, or storing
674 a full-word or multi-word field can be done with just a SUBREG. */
675 if (!MEM_P (op0)
676 && bitsize == GET_MODE_BITSIZE (fieldmode)
677 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
678 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
680 /* Use the subreg machinery either to narrow OP0 to the required
681 words or to cope with mode punning between equal-sized modes.
682 In the latter case, use subreg on the rhs side, not lhs. */
683 rtx sub;
685 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
687 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
688 if (sub)
690 emit_move_insn (op0, sub);
691 return true;
694 else
696 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
697 bitnum / BITS_PER_UNIT);
698 if (sub)
700 emit_move_insn (sub, value);
701 return true;
706 /* If the target is memory, storing any naturally aligned field can be
707 done with a simple store. For targets that support fast unaligned
708 memory, any naturally sized, unit aligned field can be done directly. */
709 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
711 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
712 emit_move_insn (op0, value);
713 return true;
716 /* Make sure we are playing with integral modes. Pun with subregs
717 if we aren't. This must come after the entire register case above,
718 since that case is valid for any mode. The following cases are only
719 valid for integral modes. */
721 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
722 if (imode != GET_MODE (op0))
724 if (MEM_P (op0))
725 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
726 else
728 gcc_assert (imode != BLKmode);
729 op0 = gen_lowpart (imode, op0);
734 /* Storing an lsb-aligned field in a register
735 can be done with a movstrict instruction. */
737 if (!MEM_P (op0)
738 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
739 && bitsize == GET_MODE_BITSIZE (fieldmode)
740 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
742 struct expand_operand ops[2];
743 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
744 rtx arg0 = op0;
745 unsigned HOST_WIDE_INT subreg_off;
747 if (GET_CODE (arg0) == SUBREG)
749 /* Else we've got some float mode source being extracted into
750 a different float mode destination -- this combination of
751 subregs results in Severe Tire Damage. */
752 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
753 || GET_MODE_CLASS (fieldmode) == MODE_INT
754 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
755 arg0 = SUBREG_REG (arg0);
758 subreg_off = bitnum / BITS_PER_UNIT;
759 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
761 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
763 create_fixed_operand (&ops[0], arg0);
764 /* Shrink the source operand to FIELDMODE. */
765 create_convert_operand_to (&ops[1], value, fieldmode, false);
766 if (maybe_expand_insn (icode, 2, ops))
767 return true;
771 /* Handle fields bigger than a word. */
773 if (bitsize > BITS_PER_WORD)
775 /* Here we transfer the words of the field
776 in the order least significant first.
777 This is because the most significant word is the one which may
778 be less than full.
779 However, only do that if the value is not BLKmode. */
781 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
782 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
783 unsigned int i;
784 rtx last;
786 /* This is the mode we must force value to, so that there will be enough
787 subwords to extract. Note that fieldmode will often (always?) be
788 VOIDmode, because that is what store_field uses to indicate that this
789 is a bit field, but passing VOIDmode to operand_subword_force
790 is not allowed. */
791 fieldmode = GET_MODE (value);
792 if (fieldmode == VOIDmode)
793 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
795 last = get_last_insn ();
796 for (i = 0; i < nwords; i++)
798 /* If I is 0, use the low-order word in both field and target;
799 if I is 1, use the next to lowest word; and so on. */
800 unsigned int wordnum = (backwards
801 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
802 - i - 1
803 : i);
804 unsigned int bit_offset = (backwards
805 ? MAX ((int) bitsize - ((int) i + 1)
806 * BITS_PER_WORD,
808 : (int) i * BITS_PER_WORD);
809 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
810 unsigned HOST_WIDE_INT new_bitsize =
811 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
813 /* If the remaining chunk doesn't have full wordsize we have
814 to make sure that for big endian machines the higher order
815 bits are used. */
816 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
817 value_word = simplify_expand_binop (word_mode, lshr_optab,
818 value_word,
819 GEN_INT (BITS_PER_WORD
820 - new_bitsize),
821 NULL_RTX, true,
822 OPTAB_LIB_WIDEN);
824 if (!store_bit_field_1 (op0, new_bitsize,
825 bitnum + bit_offset,
826 bitregion_start, bitregion_end,
827 word_mode,
828 value_word, fallback_p))
830 delete_insns_since (last);
831 return false;
834 return true;
837 /* If VALUE has a floating-point or complex mode, access it as an
838 integer of the corresponding size. This can occur on a machine
839 with 64 bit registers that uses SFmode for float. It can also
840 occur for unaligned float or complex fields. */
841 orig_value = value;
842 if (GET_MODE (value) != VOIDmode
843 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
844 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
846 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
847 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
850 /* If OP0 is a multi-word register, narrow it to the affected word.
851 If the region spans two words, defer to store_split_bit_field. */
852 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
854 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
855 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
856 gcc_assert (op0);
857 bitnum %= BITS_PER_WORD;
858 if (bitnum + bitsize > BITS_PER_WORD)
860 if (!fallback_p)
861 return false;
863 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
864 bitregion_end, value);
865 return true;
869 /* From here on we can assume that the field to be stored in fits
870 within a word. If the destination is a register, it too fits
871 in a word. */
873 extraction_insn insv;
874 if (!MEM_P (op0)
875 && get_best_reg_extraction_insn (&insv, EP_insv,
876 GET_MODE_BITSIZE (GET_MODE (op0)),
877 fieldmode)
878 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
879 return true;
881 /* If OP0 is a memory, try copying it to a register and seeing if a
882 cheap register alternative is available. */
883 if (MEM_P (op0))
885 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
886 fieldmode)
887 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
888 return true;
890 rtx last = get_last_insn ();
892 /* Try loading part of OP0 into a register, inserting the bitfield
893 into that, and then copying the result back to OP0. */
894 unsigned HOST_WIDE_INT bitpos;
895 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
896 bitregion_start, bitregion_end,
897 fieldmode, &bitpos);
898 if (xop0)
900 rtx tempreg = copy_to_reg (xop0);
901 if (store_bit_field_1 (tempreg, bitsize, bitpos,
902 bitregion_start, bitregion_end,
903 fieldmode, orig_value, false))
905 emit_move_insn (xop0, tempreg);
906 return true;
908 delete_insns_since (last);
912 if (!fallback_p)
913 return false;
915 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
916 bitregion_end, value);
917 return true;
920 /* Generate code to store value from rtx VALUE
921 into a bit-field within structure STR_RTX
922 containing BITSIZE bits starting at bit BITNUM.
924 BITREGION_START is bitpos of the first bitfield in this region.
925 BITREGION_END is the bitpos of the ending bitfield in this region.
926 These two fields are 0, if the C++ memory model does not apply,
927 or we are not interested in keeping track of bitfield regions.
929 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
931 void
932 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
933 unsigned HOST_WIDE_INT bitnum,
934 unsigned HOST_WIDE_INT bitregion_start,
935 unsigned HOST_WIDE_INT bitregion_end,
936 enum machine_mode fieldmode,
937 rtx value)
939 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
940 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
941 bitregion_start, bitregion_end))
944 /* Storing any naturally aligned field can be done with a simple
945 store. For targets that support fast unaligned memory, any
946 naturally sized, unit aligned field can be done directly. */
947 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
949 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
950 bitnum / BITS_PER_UNIT);
951 emit_move_insn (str_rtx, value);
953 else
955 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
956 &bitnum);
957 /* Explicitly override the C/C++ memory model; ignore the
958 bit range so that we can do the access in the mode mandated
959 by -fstrict-volatile-bitfields instead. */
960 store_fixed_bit_field_1 (str_rtx, bitsize, bitnum,
961 value);
964 return;
967 /* Under the C++0x memory model, we must not touch bits outside the
968 bit region. Adjust the address to start at the beginning of the
969 bit region. */
970 if (MEM_P (str_rtx) && bitregion_start > 0)
972 enum machine_mode bestmode;
973 HOST_WIDE_INT offset, size;
975 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
977 offset = bitregion_start / BITS_PER_UNIT;
978 bitnum -= bitregion_start;
979 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
980 bitregion_end -= bitregion_start;
981 bitregion_start = 0;
982 bestmode = get_best_mode (bitsize, bitnum,
983 bitregion_start, bitregion_end,
984 MEM_ALIGN (str_rtx), VOIDmode,
985 MEM_VOLATILE_P (str_rtx));
986 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
989 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
990 bitregion_start, bitregion_end,
991 fieldmode, value, true))
992 gcc_unreachable ();
995 /* Use shifts and boolean operations to store VALUE into a bit field of
996 width BITSIZE in OP0, starting at bit BITNUM. */
998 static void
999 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1000 unsigned HOST_WIDE_INT bitnum,
1001 unsigned HOST_WIDE_INT bitregion_start,
1002 unsigned HOST_WIDE_INT bitregion_end,
1003 rtx value)
1005 enum machine_mode mode;
1007 /* There is a case not handled here:
1008 a structure with a known alignment of just a halfword
1009 and a field split across two aligned halfwords within the structure.
1010 Or likewise a structure with a known alignment of just a byte
1011 and a field split across two bytes.
1012 Such cases are not supposed to be able to occur. */
1014 if (MEM_P (op0))
1016 mode = GET_MODE (op0);
1017 if (GET_MODE_BITSIZE (mode) == 0
1018 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1019 mode = word_mode;
1020 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1021 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1023 if (mode == VOIDmode)
1025 /* The only way this should occur is if the field spans word
1026 boundaries. */
1027 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1028 bitregion_end, value);
1029 return;
1032 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1035 store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1036 return;
1039 /* Helper function for store_fixed_bit_field, stores
1040 the bit field always using the MODE of OP0. */
1042 static void
1043 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1044 unsigned HOST_WIDE_INT bitnum,
1045 rtx value)
1047 enum machine_mode mode;
1048 rtx temp;
1049 int all_zero = 0;
1050 int all_one = 0;
1052 mode = GET_MODE (op0);
1053 gcc_assert (SCALAR_INT_MODE_P (mode));
1055 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1056 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1058 if (BYTES_BIG_ENDIAN)
1059 /* BITNUM is the distance between our msb
1060 and that of the containing datum.
1061 Convert it to the distance from the lsb. */
1062 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1064 /* Now BITNUM is always the distance between our lsb
1065 and that of OP0. */
1067 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1068 we must first convert its mode to MODE. */
1070 if (CONST_INT_P (value))
1072 HOST_WIDE_INT v = INTVAL (value);
1074 if (bitsize < HOST_BITS_PER_WIDE_INT)
1075 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1077 if (v == 0)
1078 all_zero = 1;
1079 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1080 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1081 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1082 all_one = 1;
1084 value = lshift_value (mode, v, bitnum);
1086 else
1088 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1089 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1091 if (GET_MODE (value) != mode)
1092 value = convert_to_mode (mode, value, 1);
1094 if (must_and)
1095 value = expand_binop (mode, and_optab, value,
1096 mask_rtx (mode, 0, bitsize, 0),
1097 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1098 if (bitnum > 0)
1099 value = expand_shift (LSHIFT_EXPR, mode, value,
1100 bitnum, NULL_RTX, 1);
1103 /* Now clear the chosen bits in OP0,
1104 except that if VALUE is -1 we need not bother. */
1105 /* We keep the intermediates in registers to allow CSE to combine
1106 consecutive bitfield assignments. */
1108 temp = force_reg (mode, op0);
1110 if (! all_one)
1112 temp = expand_binop (mode, and_optab, temp,
1113 mask_rtx (mode, bitnum, bitsize, 1),
1114 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1115 temp = force_reg (mode, temp);
1118 /* Now logical-or VALUE into OP0, unless it is zero. */
1120 if (! all_zero)
1122 temp = expand_binop (mode, ior_optab, temp, value,
1123 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1124 temp = force_reg (mode, temp);
1127 if (op0 != temp)
1129 op0 = copy_rtx (op0);
1130 emit_move_insn (op0, temp);
1134 /* Store a bit field that is split across multiple accessible memory objects.
1136 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1137 BITSIZE is the field width; BITPOS the position of its first bit
1138 (within the word).
1139 VALUE is the value to store.
1141 This does not yet handle fields wider than BITS_PER_WORD. */
1143 static void
1144 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1145 unsigned HOST_WIDE_INT bitpos,
1146 unsigned HOST_WIDE_INT bitregion_start,
1147 unsigned HOST_WIDE_INT bitregion_end,
1148 rtx value)
1150 unsigned int unit;
1151 unsigned int bitsdone = 0;
1153 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1154 much at a time. */
1155 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1156 unit = BITS_PER_WORD;
1157 else
1158 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1160 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1161 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1162 again, and we will mutually recurse forever. */
1163 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1164 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1166 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1167 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1168 that VALUE might be a floating-point constant. */
1169 if (CONSTANT_P (value) && !CONST_INT_P (value))
1171 rtx word = gen_lowpart_common (word_mode, value);
1173 if (word && (value != word))
1174 value = word;
1175 else
1176 value = gen_lowpart_common (word_mode,
1177 force_reg (GET_MODE (value) != VOIDmode
1178 ? GET_MODE (value)
1179 : word_mode, value));
1182 while (bitsdone < bitsize)
1184 unsigned HOST_WIDE_INT thissize;
1185 rtx part, word;
1186 unsigned HOST_WIDE_INT thispos;
1187 unsigned HOST_WIDE_INT offset;
1189 offset = (bitpos + bitsdone) / unit;
1190 thispos = (bitpos + bitsdone) % unit;
1192 /* When region of bytes we can touch is restricted, decrease
1193 UNIT close to the end of the region as needed. If op0 is a REG
1194 or SUBREG of REG, don't do this, as there can't be data races
1195 on a register and we can expand shorter code in some cases. */
1196 if (bitregion_end
1197 && unit > BITS_PER_UNIT
1198 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1199 && !REG_P (op0)
1200 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1202 unit = unit / 2;
1203 continue;
1206 /* THISSIZE must not overrun a word boundary. Otherwise,
1207 store_fixed_bit_field will call us again, and we will mutually
1208 recurse forever. */
1209 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1210 thissize = MIN (thissize, unit - thispos);
1212 if (BYTES_BIG_ENDIAN)
1214 /* Fetch successively less significant portions. */
1215 if (CONST_INT_P (value))
1216 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1217 >> (bitsize - bitsdone - thissize))
1218 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1219 else
1221 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1222 /* The args are chosen so that the last part includes the
1223 lsb. Give extract_bit_field the value it needs (with
1224 endianness compensation) to fetch the piece we want. */
1225 part = extract_fixed_bit_field (word_mode, value, thissize,
1226 total_bits - bitsize + bitsdone,
1227 NULL_RTX, 1);
1230 else
1232 /* Fetch successively more significant portions. */
1233 if (CONST_INT_P (value))
1234 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1235 >> bitsdone)
1236 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1237 else
1238 part = extract_fixed_bit_field (word_mode, value, thissize,
1239 bitsdone, NULL_RTX, 1);
1242 /* If OP0 is a register, then handle OFFSET here.
1244 When handling multiword bitfields, extract_bit_field may pass
1245 down a word_mode SUBREG of a larger REG for a bitfield that actually
1246 crosses a word boundary. Thus, for a SUBREG, we must find
1247 the current word starting from the base register. */
1248 if (GET_CODE (op0) == SUBREG)
1250 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1251 + (offset * unit / BITS_PER_WORD);
1252 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1253 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1254 word = word_offset ? const0_rtx : op0;
1255 else
1256 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1257 GET_MODE (SUBREG_REG (op0)));
1258 offset &= BITS_PER_WORD / unit - 1;
1260 else if (REG_P (op0))
1262 enum machine_mode op0_mode = GET_MODE (op0);
1263 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1264 word = offset ? const0_rtx : op0;
1265 else
1266 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1267 GET_MODE (op0));
1268 offset &= BITS_PER_WORD / unit - 1;
1270 else
1271 word = op0;
1273 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1274 it is just an out-of-bounds access. Ignore it. */
1275 if (word != const0_rtx)
1276 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1277 bitregion_start, bitregion_end, part);
1278 bitsdone += thissize;
1282 /* A subroutine of extract_bit_field_1 that converts return value X
1283 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1284 to extract_bit_field. */
1286 static rtx
1287 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1288 enum machine_mode tmode, bool unsignedp)
1290 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1291 return x;
1293 /* If the x mode is not a scalar integral, first convert to the
1294 integer mode of that size and then access it as a floating-point
1295 value via a SUBREG. */
1296 if (!SCALAR_INT_MODE_P (tmode))
1298 enum machine_mode smode;
1300 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1301 x = convert_to_mode (smode, x, unsignedp);
1302 x = force_reg (smode, x);
1303 return gen_lowpart (tmode, x);
1306 return convert_to_mode (tmode, x, unsignedp);
1309 /* Try to use an ext(z)v pattern to extract a field from OP0.
1310 Return the extracted value on success, otherwise return null.
1311 EXT_MODE is the mode of the extraction and the other arguments
1312 are as for extract_bit_field. */
1314 static rtx
1315 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1316 unsigned HOST_WIDE_INT bitsize,
1317 unsigned HOST_WIDE_INT bitnum,
1318 int unsignedp, rtx target,
1319 enum machine_mode mode, enum machine_mode tmode)
1321 struct expand_operand ops[4];
1322 rtx spec_target = target;
1323 rtx spec_target_subreg = 0;
1324 enum machine_mode ext_mode = extv->field_mode;
1325 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1327 if (bitsize == 0 || unit < bitsize)
1328 return NULL_RTX;
1330 if (MEM_P (op0))
1331 /* Get a reference to the first byte of the field. */
1332 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1333 &bitnum);
1334 else
1336 /* Convert from counting within OP0 to counting in EXT_MODE. */
1337 if (BYTES_BIG_ENDIAN)
1338 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1340 /* If op0 is a register, we need it in EXT_MODE to make it
1341 acceptable to the format of ext(z)v. */
1342 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1343 return NULL_RTX;
1344 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1345 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1348 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1349 "backwards" from the size of the unit we are extracting from.
1350 Otherwise, we count bits from the most significant on a
1351 BYTES/BITS_BIG_ENDIAN machine. */
1353 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1354 bitnum = unit - bitsize - bitnum;
1356 if (target == 0)
1357 target = spec_target = gen_reg_rtx (tmode);
1359 if (GET_MODE (target) != ext_mode)
1361 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1362 between the mode of the extraction (word_mode) and the target
1363 mode. Instead, create a temporary and use convert_move to set
1364 the target. */
1365 if (REG_P (target)
1366 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1368 target = gen_lowpart (ext_mode, target);
1369 if (GET_MODE_PRECISION (ext_mode)
1370 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1371 spec_target_subreg = target;
1373 else
1374 target = gen_reg_rtx (ext_mode);
1377 create_output_operand (&ops[0], target, ext_mode);
1378 create_fixed_operand (&ops[1], op0);
1379 create_integer_operand (&ops[2], bitsize);
1380 create_integer_operand (&ops[3], bitnum);
1381 if (maybe_expand_insn (extv->icode, 4, ops))
1383 target = ops[0].value;
1384 if (target == spec_target)
1385 return target;
1386 if (target == spec_target_subreg)
1387 return spec_target;
1388 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1390 return NULL_RTX;
1393 /* A subroutine of extract_bit_field, with the same arguments.
1394 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1395 if we can find no other means of implementing the operation.
1396 if FALLBACK_P is false, return NULL instead. */
1398 static rtx
1399 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1400 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1401 enum machine_mode mode, enum machine_mode tmode,
1402 bool fallback_p)
1404 rtx op0 = str_rtx;
1405 enum machine_mode int_mode;
1406 enum machine_mode mode1;
1408 if (tmode == VOIDmode)
1409 tmode = mode;
1411 while (GET_CODE (op0) == SUBREG)
1413 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1414 op0 = SUBREG_REG (op0);
1417 /* If we have an out-of-bounds access to a register, just return an
1418 uninitialized register of the required mode. This can occur if the
1419 source code contains an out-of-bounds access to a small array. */
1420 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1421 return gen_reg_rtx (tmode);
1423 if (REG_P (op0)
1424 && mode == GET_MODE (op0)
1425 && bitnum == 0
1426 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1428 /* We're trying to extract a full register from itself. */
1429 return op0;
1432 /* See if we can get a better vector mode before extracting. */
1433 if (VECTOR_MODE_P (GET_MODE (op0))
1434 && !MEM_P (op0)
1435 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1437 enum machine_mode new_mode;
1439 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1440 new_mode = MIN_MODE_VECTOR_FLOAT;
1441 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1442 new_mode = MIN_MODE_VECTOR_FRACT;
1443 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1444 new_mode = MIN_MODE_VECTOR_UFRACT;
1445 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1446 new_mode = MIN_MODE_VECTOR_ACCUM;
1447 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1448 new_mode = MIN_MODE_VECTOR_UACCUM;
1449 else
1450 new_mode = MIN_MODE_VECTOR_INT;
1452 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1453 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1454 && targetm.vector_mode_supported_p (new_mode))
1455 break;
1456 if (new_mode != VOIDmode)
1457 op0 = gen_lowpart (new_mode, op0);
1460 /* Use vec_extract patterns for extracting parts of vectors whenever
1461 available. */
1462 if (VECTOR_MODE_P (GET_MODE (op0))
1463 && !MEM_P (op0)
1464 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1465 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1466 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1468 struct expand_operand ops[3];
1469 enum machine_mode outermode = GET_MODE (op0);
1470 enum machine_mode innermode = GET_MODE_INNER (outermode);
1471 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1472 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1474 create_output_operand (&ops[0], target, innermode);
1475 create_input_operand (&ops[1], op0, outermode);
1476 create_integer_operand (&ops[2], pos);
1477 if (maybe_expand_insn (icode, 3, ops))
1479 target = ops[0].value;
1480 if (GET_MODE (target) != mode)
1481 return gen_lowpart (tmode, target);
1482 return target;
1486 /* Make sure we are playing with integral modes. Pun with subregs
1487 if we aren't. */
1489 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1490 if (imode != GET_MODE (op0))
1492 if (MEM_P (op0))
1493 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1494 else if (imode != BLKmode)
1496 op0 = gen_lowpart (imode, op0);
1498 /* If we got a SUBREG, force it into a register since we
1499 aren't going to be able to do another SUBREG on it. */
1500 if (GET_CODE (op0) == SUBREG)
1501 op0 = force_reg (imode, op0);
1503 else if (REG_P (op0))
1505 rtx reg, subreg;
1506 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1507 MODE_INT);
1508 reg = gen_reg_rtx (imode);
1509 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1510 emit_move_insn (subreg, op0);
1511 op0 = reg;
1512 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1514 else
1516 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1517 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1518 emit_move_insn (mem, op0);
1519 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1524 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1525 If that's wrong, the solution is to test for it and set TARGET to 0
1526 if needed. */
1528 /* Get the mode of the field to use for atomic access or subreg
1529 conversion. */
1530 mode1 = mode;
1531 if (SCALAR_INT_MODE_P (tmode))
1533 enum machine_mode try_mode = mode_for_size (bitsize,
1534 GET_MODE_CLASS (tmode), 0);
1535 if (try_mode != BLKmode)
1536 mode1 = try_mode;
1538 gcc_assert (mode1 != BLKmode);
1540 /* Extraction of a full MODE1 value can be done with a subreg as long
1541 as the least significant bit of the value is the least significant
1542 bit of either OP0 or a word of OP0. */
1543 if (!MEM_P (op0)
1544 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1545 && bitsize == GET_MODE_BITSIZE (mode1)
1546 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1548 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1549 bitnum / BITS_PER_UNIT);
1550 if (sub)
1551 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1554 /* Extraction of a full MODE1 value can be done with a load as long as
1555 the field is on a byte boundary and is sufficiently aligned. */
1556 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1558 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1559 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1562 /* Handle fields bigger than a word. */
1564 if (bitsize > BITS_PER_WORD)
1566 /* Here we transfer the words of the field
1567 in the order least significant first.
1568 This is because the most significant word is the one which may
1569 be less than full. */
1571 unsigned int backwards = WORDS_BIG_ENDIAN;
1572 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1573 unsigned int i;
1574 rtx last;
1576 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1577 target = gen_reg_rtx (mode);
1579 /* Indicate for flow that the entire target reg is being set. */
1580 emit_clobber (target);
1582 last = get_last_insn ();
1583 for (i = 0; i < nwords; i++)
1585 /* If I is 0, use the low-order word in both field and target;
1586 if I is 1, use the next to lowest word; and so on. */
1587 /* Word number in TARGET to use. */
1588 unsigned int wordnum
1589 = (backwards
1590 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1591 : i);
1592 /* Offset from start of field in OP0. */
1593 unsigned int bit_offset = (backwards
1594 ? MAX ((int) bitsize - ((int) i + 1)
1595 * BITS_PER_WORD,
1597 : (int) i * BITS_PER_WORD);
1598 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1599 rtx result_part
1600 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1601 bitsize - i * BITS_PER_WORD),
1602 bitnum + bit_offset, 1, target_part,
1603 mode, word_mode, fallback_p);
1605 gcc_assert (target_part);
1606 if (!result_part)
1608 delete_insns_since (last);
1609 return NULL;
1612 if (result_part != target_part)
1613 emit_move_insn (target_part, result_part);
1616 if (unsignedp)
1618 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1619 need to be zero'd out. */
1620 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1622 unsigned int i, total_words;
1624 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1625 for (i = nwords; i < total_words; i++)
1626 emit_move_insn
1627 (operand_subword (target,
1628 backwards ? total_words - i - 1 : i,
1629 1, VOIDmode),
1630 const0_rtx);
1632 return target;
1635 /* Signed bit field: sign-extend with two arithmetic shifts. */
1636 target = expand_shift (LSHIFT_EXPR, mode, target,
1637 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1638 return expand_shift (RSHIFT_EXPR, mode, target,
1639 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1642 /* If OP0 is a multi-word register, narrow it to the affected word.
1643 If the region spans two words, defer to extract_split_bit_field. */
1644 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1646 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1647 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1648 bitnum %= BITS_PER_WORD;
1649 if (bitnum + bitsize > BITS_PER_WORD)
1651 if (!fallback_p)
1652 return NULL_RTX;
1653 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1654 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1658 /* From here on we know the desired field is smaller than a word.
1659 If OP0 is a register, it too fits within a word. */
1660 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1661 extraction_insn extv;
1662 if (!MEM_P (op0)
1663 /* ??? We could limit the structure size to the part of OP0 that
1664 contains the field, with appropriate checks for endianness
1665 and TRULY_NOOP_TRUNCATION. */
1666 && get_best_reg_extraction_insn (&extv, pattern,
1667 GET_MODE_BITSIZE (GET_MODE (op0)),
1668 tmode))
1670 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1671 unsignedp, target, mode,
1672 tmode);
1673 if (result)
1674 return result;
1677 /* If OP0 is a memory, try copying it to a register and seeing if a
1678 cheap register alternative is available. */
1679 if (MEM_P (op0))
1681 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1682 tmode))
1684 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1685 bitnum, unsignedp,
1686 target, mode,
1687 tmode);
1688 if (result)
1689 return result;
1692 rtx last = get_last_insn ();
1694 /* Try loading part of OP0 into a register and extracting the
1695 bitfield from that. */
1696 unsigned HOST_WIDE_INT bitpos;
1697 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1698 0, 0, tmode, &bitpos);
1699 if (xop0)
1701 xop0 = copy_to_reg (xop0);
1702 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1703 unsignedp, target,
1704 mode, tmode, false);
1705 if (result)
1706 return result;
1707 delete_insns_since (last);
1711 if (!fallback_p)
1712 return NULL;
1714 /* Find a correspondingly-sized integer field, so we can apply
1715 shifts and masks to it. */
1716 int_mode = int_mode_for_mode (tmode);
1717 if (int_mode == BLKmode)
1718 int_mode = int_mode_for_mode (mode);
1719 /* Should probably push op0 out to memory and then do a load. */
1720 gcc_assert (int_mode != BLKmode);
1722 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1723 target, unsignedp);
1724 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1727 /* Generate code to extract a byte-field from STR_RTX
1728 containing BITSIZE bits, starting at BITNUM,
1729 and put it in TARGET if possible (if TARGET is nonzero).
1730 Regardless of TARGET, we return the rtx for where the value is placed.
1732 STR_RTX is the structure containing the byte (a REG or MEM).
1733 UNSIGNEDP is nonzero if this is an unsigned bit field.
1734 MODE is the natural mode of the field value once extracted.
1735 TMODE is the mode the caller would like the value to have;
1736 but the value may be returned with type MODE instead.
1738 If a TARGET is specified and we can store in it at no extra cost,
1739 we do so, and return TARGET.
1740 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1741 if they are equally easy. */
1744 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1745 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1746 enum machine_mode mode, enum machine_mode tmode)
1748 enum machine_mode mode1;
1750 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1751 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1752 mode1 = GET_MODE (str_rtx);
1753 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1754 mode1 = GET_MODE (target);
1755 else
1756 mode1 = tmode;
1758 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1760 rtx result;
1762 /* Extraction of a full MODE1 value can be done with a load as long as
1763 the field is on a byte boundary and is sufficiently aligned. */
1764 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1765 result = adjust_bitfield_address (str_rtx, mode1,
1766 bitnum / BITS_PER_UNIT);
1767 else
1769 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1770 &bitnum);
1771 result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1772 target, unsignedp);
1775 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1778 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1779 target, mode, tmode, true);
1782 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1783 from bit BITNUM of OP0.
1785 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1786 If TARGET is nonzero, attempts to store the value there
1787 and return TARGET, but this is not guaranteed.
1788 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1790 static rtx
1791 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1792 unsigned HOST_WIDE_INT bitsize,
1793 unsigned HOST_WIDE_INT bitnum, rtx target,
1794 int unsignedp)
1796 enum machine_mode mode;
1798 if (MEM_P (op0))
1800 mode = get_best_mode (bitsize, bitnum, 0, 0,
1801 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1803 if (mode == VOIDmode)
1804 /* The only way this should occur is if the field spans word
1805 boundaries. */
1806 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1808 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1811 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1812 target, unsignedp);
1815 /* Helper function for extract_fixed_bit_field, extracts
1816 the bit field always using the MODE of OP0. */
1818 static rtx
1819 extract_fixed_bit_field_1 (enum machine_mode tmode, rtx op0,
1820 unsigned HOST_WIDE_INT bitsize,
1821 unsigned HOST_WIDE_INT bitnum, rtx target,
1822 int unsignedp)
1824 enum machine_mode mode;
1826 mode = GET_MODE (op0);
1827 gcc_assert (SCALAR_INT_MODE_P (mode));
1829 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1830 for invalid input, such as extract equivalent of f5 from
1831 gcc.dg/pr48335-2.c. */
1833 if (BYTES_BIG_ENDIAN)
1834 /* BITNUM is the distance between our msb and that of OP0.
1835 Convert it to the distance from the lsb. */
1836 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1838 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1839 We have reduced the big-endian case to the little-endian case. */
1841 if (unsignedp)
1843 if (bitnum)
1845 /* If the field does not already start at the lsb,
1846 shift it so it does. */
1847 /* Maybe propagate the target for the shift. */
1848 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1849 if (tmode != mode)
1850 subtarget = 0;
1851 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1853 /* Convert the value to the desired mode. */
1854 if (mode != tmode)
1855 op0 = convert_to_mode (tmode, op0, 1);
1857 /* Unless the msb of the field used to be the msb when we shifted,
1858 mask out the upper bits. */
1860 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1861 return expand_binop (GET_MODE (op0), and_optab, op0,
1862 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1863 target, 1, OPTAB_LIB_WIDEN);
1864 return op0;
1867 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1868 then arithmetic-shift its lsb to the lsb of the word. */
1869 op0 = force_reg (mode, op0);
1871 /* Find the narrowest integer mode that contains the field. */
1873 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1874 mode = GET_MODE_WIDER_MODE (mode))
1875 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1877 op0 = convert_to_mode (mode, op0, 0);
1878 break;
1881 if (mode != tmode)
1882 target = 0;
1884 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1886 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1887 /* Maybe propagate the target for the shift. */
1888 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1889 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1892 return expand_shift (RSHIFT_EXPR, mode, op0,
1893 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1896 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1897 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1898 complement of that if COMPLEMENT. The mask is truncated if
1899 necessary to the width of mode MODE. The mask is zero-extended if
1900 BITSIZE+BITPOS is too small for MODE. */
1902 static rtx
1903 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1905 double_int mask;
1907 mask = double_int::mask (bitsize);
1908 mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1910 if (complement)
1911 mask = ~mask;
1913 return immed_double_int_const (mask, mode);
1916 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1917 VALUE << BITPOS. */
1919 static rtx
1920 lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
1921 int bitpos)
1923 double_int val;
1925 val = double_int::from_uhwi (value);
1926 val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1928 return immed_double_int_const (val, mode);
1931 /* Extract a bit field that is split across two words
1932 and return an RTX for the result.
1934 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1935 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1936 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1938 static rtx
1939 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1940 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1942 unsigned int unit;
1943 unsigned int bitsdone = 0;
1944 rtx result = NULL_RTX;
1945 int first = 1;
1947 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1948 much at a time. */
1949 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1950 unit = BITS_PER_WORD;
1951 else
1952 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1954 while (bitsdone < bitsize)
1956 unsigned HOST_WIDE_INT thissize;
1957 rtx part, word;
1958 unsigned HOST_WIDE_INT thispos;
1959 unsigned HOST_WIDE_INT offset;
1961 offset = (bitpos + bitsdone) / unit;
1962 thispos = (bitpos + bitsdone) % unit;
1964 /* THISSIZE must not overrun a word boundary. Otherwise,
1965 extract_fixed_bit_field will call us again, and we will mutually
1966 recurse forever. */
1967 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1968 thissize = MIN (thissize, unit - thispos);
1970 /* If OP0 is a register, then handle OFFSET here.
1972 When handling multiword bitfields, extract_bit_field may pass
1973 down a word_mode SUBREG of a larger REG for a bitfield that actually
1974 crosses a word boundary. Thus, for a SUBREG, we must find
1975 the current word starting from the base register. */
1976 if (GET_CODE (op0) == SUBREG)
1978 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1979 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1980 GET_MODE (SUBREG_REG (op0)));
1981 offset = 0;
1983 else if (REG_P (op0))
1985 word = operand_subword_force (op0, offset, GET_MODE (op0));
1986 offset = 0;
1988 else
1989 word = op0;
1991 /* Extract the parts in bit-counting order,
1992 whose meaning is determined by BYTES_PER_UNIT.
1993 OFFSET is in UNITs, and UNIT is in bits. */
1994 part = extract_fixed_bit_field (word_mode, word, thissize,
1995 offset * unit + thispos, 0, 1);
1996 bitsdone += thissize;
1998 /* Shift this part into place for the result. */
1999 if (BYTES_BIG_ENDIAN)
2001 if (bitsize != bitsdone)
2002 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2003 bitsize - bitsdone, 0, 1);
2005 else
2007 if (bitsdone != thissize)
2008 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2009 bitsdone - thissize, 0, 1);
2012 if (first)
2013 result = part;
2014 else
2015 /* Combine the parts with bitwise or. This works
2016 because we extracted each part as an unsigned bit field. */
2017 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2018 OPTAB_LIB_WIDEN);
2020 first = 0;
2023 /* Unsigned bit field: we are done. */
2024 if (unsignedp)
2025 return result;
2026 /* Signed bit field: sign-extend with two arithmetic shifts. */
2027 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2028 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2029 return expand_shift (RSHIFT_EXPR, word_mode, result,
2030 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2033 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2034 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2035 MODE, fill the upper bits with zeros. Fail if the layout of either
2036 mode is unknown (as for CC modes) or if the extraction would involve
2037 unprofitable mode punning. Return the value on success, otherwise
2038 return null.
2040 This is different from gen_lowpart* in these respects:
2042 - the returned value must always be considered an rvalue
2044 - when MODE is wider than SRC_MODE, the extraction involves
2045 a zero extension
2047 - when MODE is smaller than SRC_MODE, the extraction involves
2048 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2050 In other words, this routine performs a computation, whereas the
2051 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2052 operations. */
2055 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2057 enum machine_mode int_mode, src_int_mode;
2059 if (mode == src_mode)
2060 return src;
2062 if (CONSTANT_P (src))
2064 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2065 fails, it will happily create (subreg (symbol_ref)) or similar
2066 invalid SUBREGs. */
2067 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2068 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2069 if (ret)
2070 return ret;
2072 if (GET_MODE (src) == VOIDmode
2073 || !validate_subreg (mode, src_mode, src, byte))
2074 return NULL_RTX;
2076 src = force_reg (GET_MODE (src), src);
2077 return gen_rtx_SUBREG (mode, src, byte);
2080 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2081 return NULL_RTX;
2083 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2084 && MODES_TIEABLE_P (mode, src_mode))
2086 rtx x = gen_lowpart_common (mode, src);
2087 if (x)
2088 return x;
2091 src_int_mode = int_mode_for_mode (src_mode);
2092 int_mode = int_mode_for_mode (mode);
2093 if (src_int_mode == BLKmode || int_mode == BLKmode)
2094 return NULL_RTX;
2096 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2097 return NULL_RTX;
2098 if (!MODES_TIEABLE_P (int_mode, mode))
2099 return NULL_RTX;
2101 src = gen_lowpart (src_int_mode, src);
2102 src = convert_modes (int_mode, src_int_mode, src, true);
2103 src = gen_lowpart (mode, src);
2104 return src;
2107 /* Add INC into TARGET. */
2109 void
2110 expand_inc (rtx target, rtx inc)
2112 rtx value = expand_binop (GET_MODE (target), add_optab,
2113 target, inc,
2114 target, 0, OPTAB_LIB_WIDEN);
2115 if (value != target)
2116 emit_move_insn (target, value);
2119 /* Subtract DEC from TARGET. */
2121 void
2122 expand_dec (rtx target, rtx dec)
2124 rtx value = expand_binop (GET_MODE (target), sub_optab,
2125 target, dec,
2126 target, 0, OPTAB_LIB_WIDEN);
2127 if (value != target)
2128 emit_move_insn (target, value);
2131 /* Output a shift instruction for expression code CODE,
2132 with SHIFTED being the rtx for the value to shift,
2133 and AMOUNT the rtx for the amount to shift by.
2134 Store the result in the rtx TARGET, if that is convenient.
2135 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2136 Return the rtx for where the value is. */
2138 static rtx
2139 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2140 rtx amount, rtx target, int unsignedp)
2142 rtx op1, temp = 0;
2143 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2144 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2145 optab lshift_optab = ashl_optab;
2146 optab rshift_arith_optab = ashr_optab;
2147 optab rshift_uns_optab = lshr_optab;
2148 optab lrotate_optab = rotl_optab;
2149 optab rrotate_optab = rotr_optab;
2150 enum machine_mode op1_mode;
2151 int attempt;
2152 bool speed = optimize_insn_for_speed_p ();
2154 op1 = amount;
2155 op1_mode = GET_MODE (op1);
2157 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2158 shift amount is a vector, use the vector/vector shift patterns. */
2159 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2161 lshift_optab = vashl_optab;
2162 rshift_arith_optab = vashr_optab;
2163 rshift_uns_optab = vlshr_optab;
2164 lrotate_optab = vrotl_optab;
2165 rrotate_optab = vrotr_optab;
2168 /* Previously detected shift-counts computed by NEGATE_EXPR
2169 and shifted in the other direction; but that does not work
2170 on all machines. */
2172 if (SHIFT_COUNT_TRUNCATED)
2174 if (CONST_INT_P (op1)
2175 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2176 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2177 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2178 % GET_MODE_BITSIZE (mode));
2179 else if (GET_CODE (op1) == SUBREG
2180 && subreg_lowpart_p (op1)
2181 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2182 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2183 op1 = SUBREG_REG (op1);
2186 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2187 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2188 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2189 amount instead. */
2190 if (rotate
2191 && CONST_INT_P (op1)
2192 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (mode) / 2 + left,
2193 GET_MODE_BITSIZE (mode) - 1))
2195 op1 = GEN_INT (GET_MODE_BITSIZE (mode) - INTVAL (op1));
2196 left = !left;
2197 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2200 if (op1 == const0_rtx)
2201 return shifted;
2203 /* Check whether its cheaper to implement a left shift by a constant
2204 bit count by a sequence of additions. */
2205 if (code == LSHIFT_EXPR
2206 && CONST_INT_P (op1)
2207 && INTVAL (op1) > 0
2208 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2209 && INTVAL (op1) < MAX_BITS_PER_WORD
2210 && (shift_cost (speed, mode, INTVAL (op1))
2211 > INTVAL (op1) * add_cost (speed, mode))
2212 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2214 int i;
2215 for (i = 0; i < INTVAL (op1); i++)
2217 temp = force_reg (mode, shifted);
2218 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2219 unsignedp, OPTAB_LIB_WIDEN);
2221 return shifted;
2224 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2226 enum optab_methods methods;
2228 if (attempt == 0)
2229 methods = OPTAB_DIRECT;
2230 else if (attempt == 1)
2231 methods = OPTAB_WIDEN;
2232 else
2233 methods = OPTAB_LIB_WIDEN;
2235 if (rotate)
2237 /* Widening does not work for rotation. */
2238 if (methods == OPTAB_WIDEN)
2239 continue;
2240 else if (methods == OPTAB_LIB_WIDEN)
2242 /* If we have been unable to open-code this by a rotation,
2243 do it as the IOR of two shifts. I.e., to rotate A
2244 by N bits, compute
2245 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2246 where C is the bitsize of A.
2248 It is theoretically possible that the target machine might
2249 not be able to perform either shift and hence we would
2250 be making two libcalls rather than just the one for the
2251 shift (similarly if IOR could not be done). We will allow
2252 this extremely unlikely lossage to avoid complicating the
2253 code below. */
2255 rtx subtarget = target == shifted ? 0 : target;
2256 rtx new_amount, other_amount;
2257 rtx temp1;
2259 new_amount = op1;
2260 if (op1 == const0_rtx)
2261 return shifted;
2262 else if (CONST_INT_P (op1))
2263 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2264 - INTVAL (op1));
2265 else
2267 other_amount
2268 = simplify_gen_unary (NEG, GET_MODE (op1),
2269 op1, GET_MODE (op1));
2270 HOST_WIDE_INT mask = GET_MODE_PRECISION (mode) - 1;
2271 other_amount
2272 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2273 gen_int_mode (mask, GET_MODE (op1)));
2276 shifted = force_reg (mode, shifted);
2278 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2279 mode, shifted, new_amount, 0, 1);
2280 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2281 mode, shifted, other_amount,
2282 subtarget, 1);
2283 return expand_binop (mode, ior_optab, temp, temp1, target,
2284 unsignedp, methods);
2287 temp = expand_binop (mode,
2288 left ? lrotate_optab : rrotate_optab,
2289 shifted, op1, target, unsignedp, methods);
2291 else if (unsignedp)
2292 temp = expand_binop (mode,
2293 left ? lshift_optab : rshift_uns_optab,
2294 shifted, op1, target, unsignedp, methods);
2296 /* Do arithmetic shifts.
2297 Also, if we are going to widen the operand, we can just as well
2298 use an arithmetic right-shift instead of a logical one. */
2299 if (temp == 0 && ! rotate
2300 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2302 enum optab_methods methods1 = methods;
2304 /* If trying to widen a log shift to an arithmetic shift,
2305 don't accept an arithmetic shift of the same size. */
2306 if (unsignedp)
2307 methods1 = OPTAB_MUST_WIDEN;
2309 /* Arithmetic shift */
2311 temp = expand_binop (mode,
2312 left ? lshift_optab : rshift_arith_optab,
2313 shifted, op1, target, unsignedp, methods1);
2316 /* We used to try extzv here for logical right shifts, but that was
2317 only useful for one machine, the VAX, and caused poor code
2318 generation there for lshrdi3, so the code was deleted and a
2319 define_expand for lshrsi3 was added to vax.md. */
2322 gcc_assert (temp);
2323 return temp;
2326 /* Output a shift instruction for expression code CODE,
2327 with SHIFTED being the rtx for the value to shift,
2328 and AMOUNT the amount to shift by.
2329 Store the result in the rtx TARGET, if that is convenient.
2330 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2331 Return the rtx for where the value is. */
2334 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2335 int amount, rtx target, int unsignedp)
2337 return expand_shift_1 (code, mode,
2338 shifted, GEN_INT (amount), target, unsignedp);
2341 /* Output a shift instruction for expression code CODE,
2342 with SHIFTED being the rtx for the value to shift,
2343 and AMOUNT the tree for the amount to shift by.
2344 Store the result in the rtx TARGET, if that is convenient.
2345 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2346 Return the rtx for where the value is. */
2349 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2350 tree amount, rtx target, int unsignedp)
2352 return expand_shift_1 (code, mode,
2353 shifted, expand_normal (amount), target, unsignedp);
2357 /* Indicates the type of fixup needed after a constant multiplication.
2358 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2359 the result should be negated, and ADD_VARIANT means that the
2360 multiplicand should be added to the result. */
2361 enum mult_variant {basic_variant, negate_variant, add_variant};
2363 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2364 const struct mult_cost *, enum machine_mode mode);
2365 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2366 struct algorithm *, enum mult_variant *, int);
2367 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2368 const struct algorithm *, enum mult_variant);
2369 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2370 static rtx extract_high_half (enum machine_mode, rtx);
2371 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2372 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2373 int, int);
2374 /* Compute and return the best algorithm for multiplying by T.
2375 The algorithm must cost less than cost_limit
2376 If retval.cost >= COST_LIMIT, no algorithm was found and all
2377 other field of the returned struct are undefined.
2378 MODE is the machine mode of the multiplication. */
2380 static void
2381 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2382 const struct mult_cost *cost_limit, enum machine_mode mode)
2384 int m;
2385 struct algorithm *alg_in, *best_alg;
2386 struct mult_cost best_cost;
2387 struct mult_cost new_limit;
2388 int op_cost, op_latency;
2389 unsigned HOST_WIDE_INT orig_t = t;
2390 unsigned HOST_WIDE_INT q;
2391 int maxm, hash_index;
2392 bool cache_hit = false;
2393 enum alg_code cache_alg = alg_zero;
2394 bool speed = optimize_insn_for_speed_p ();
2395 enum machine_mode imode;
2396 struct alg_hash_entry *entry_ptr;
2398 /* Indicate that no algorithm is yet found. If no algorithm
2399 is found, this value will be returned and indicate failure. */
2400 alg_out->cost.cost = cost_limit->cost + 1;
2401 alg_out->cost.latency = cost_limit->latency + 1;
2403 if (cost_limit->cost < 0
2404 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2405 return;
2407 /* Be prepared for vector modes. */
2408 imode = GET_MODE_INNER (mode);
2409 if (imode == VOIDmode)
2410 imode = mode;
2412 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2414 /* Restrict the bits of "t" to the multiplication's mode. */
2415 t &= GET_MODE_MASK (imode);
2417 /* t == 1 can be done in zero cost. */
2418 if (t == 1)
2420 alg_out->ops = 1;
2421 alg_out->cost.cost = 0;
2422 alg_out->cost.latency = 0;
2423 alg_out->op[0] = alg_m;
2424 return;
2427 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2428 fail now. */
2429 if (t == 0)
2431 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2432 return;
2433 else
2435 alg_out->ops = 1;
2436 alg_out->cost.cost = zero_cost (speed);
2437 alg_out->cost.latency = zero_cost (speed);
2438 alg_out->op[0] = alg_zero;
2439 return;
2443 /* We'll be needing a couple extra algorithm structures now. */
2445 alg_in = XALLOCA (struct algorithm);
2446 best_alg = XALLOCA (struct algorithm);
2447 best_cost = *cost_limit;
2449 /* Compute the hash index. */
2450 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2452 /* See if we already know what to do for T. */
2453 entry_ptr = alg_hash_entry_ptr (hash_index);
2454 if (entry_ptr->t == t
2455 && entry_ptr->mode == mode
2456 && entry_ptr->mode == mode
2457 && entry_ptr->speed == speed
2458 && entry_ptr->alg != alg_unknown)
2460 cache_alg = entry_ptr->alg;
2462 if (cache_alg == alg_impossible)
2464 /* The cache tells us that it's impossible to synthesize
2465 multiplication by T within entry_ptr->cost. */
2466 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2467 /* COST_LIMIT is at least as restrictive as the one
2468 recorded in the hash table, in which case we have no
2469 hope of synthesizing a multiplication. Just
2470 return. */
2471 return;
2473 /* If we get here, COST_LIMIT is less restrictive than the
2474 one recorded in the hash table, so we may be able to
2475 synthesize a multiplication. Proceed as if we didn't
2476 have the cache entry. */
2478 else
2480 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2481 /* The cached algorithm shows that this multiplication
2482 requires more cost than COST_LIMIT. Just return. This
2483 way, we don't clobber this cache entry with
2484 alg_impossible but retain useful information. */
2485 return;
2487 cache_hit = true;
2489 switch (cache_alg)
2491 case alg_shift:
2492 goto do_alg_shift;
2494 case alg_add_t_m2:
2495 case alg_sub_t_m2:
2496 goto do_alg_addsub_t_m2;
2498 case alg_add_factor:
2499 case alg_sub_factor:
2500 goto do_alg_addsub_factor;
2502 case alg_add_t2_m:
2503 goto do_alg_add_t2_m;
2505 case alg_sub_t2_m:
2506 goto do_alg_sub_t2_m;
2508 default:
2509 gcc_unreachable ();
2514 /* If we have a group of zero bits at the low-order part of T, try
2515 multiplying by the remaining bits and then doing a shift. */
2517 if ((t & 1) == 0)
2519 do_alg_shift:
2520 m = floor_log2 (t & -t); /* m = number of low zero bits */
2521 if (m < maxm)
2523 q = t >> m;
2524 /* The function expand_shift will choose between a shift and
2525 a sequence of additions, so the observed cost is given as
2526 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2527 op_cost = m * add_cost (speed, mode);
2528 if (shift_cost (speed, mode, m) < op_cost)
2529 op_cost = shift_cost (speed, mode, m);
2530 new_limit.cost = best_cost.cost - op_cost;
2531 new_limit.latency = best_cost.latency - op_cost;
2532 synth_mult (alg_in, q, &new_limit, mode);
2534 alg_in->cost.cost += op_cost;
2535 alg_in->cost.latency += op_cost;
2536 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2538 struct algorithm *x;
2539 best_cost = alg_in->cost;
2540 x = alg_in, alg_in = best_alg, best_alg = x;
2541 best_alg->log[best_alg->ops] = m;
2542 best_alg->op[best_alg->ops] = alg_shift;
2545 /* See if treating ORIG_T as a signed number yields a better
2546 sequence. Try this sequence only for a negative ORIG_T
2547 as it would be useless for a non-negative ORIG_T. */
2548 if ((HOST_WIDE_INT) orig_t < 0)
2550 /* Shift ORIG_T as follows because a right shift of a
2551 negative-valued signed type is implementation
2552 defined. */
2553 q = ~(~orig_t >> m);
2554 /* The function expand_shift will choose between a shift
2555 and a sequence of additions, so the observed cost is
2556 given as MIN (m * add_cost(speed, mode),
2557 shift_cost(speed, mode, m)). */
2558 op_cost = m * add_cost (speed, mode);
2559 if (shift_cost (speed, mode, m) < op_cost)
2560 op_cost = shift_cost (speed, mode, m);
2561 new_limit.cost = best_cost.cost - op_cost;
2562 new_limit.latency = best_cost.latency - op_cost;
2563 synth_mult (alg_in, q, &new_limit, mode);
2565 alg_in->cost.cost += op_cost;
2566 alg_in->cost.latency += op_cost;
2567 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2569 struct algorithm *x;
2570 best_cost = alg_in->cost;
2571 x = alg_in, alg_in = best_alg, best_alg = x;
2572 best_alg->log[best_alg->ops] = m;
2573 best_alg->op[best_alg->ops] = alg_shift;
2577 if (cache_hit)
2578 goto done;
2581 /* If we have an odd number, add or subtract one. */
2582 if ((t & 1) != 0)
2584 unsigned HOST_WIDE_INT w;
2586 do_alg_addsub_t_m2:
2587 for (w = 1; (w & t) != 0; w <<= 1)
2589 /* If T was -1, then W will be zero after the loop. This is another
2590 case where T ends with ...111. Handling this with (T + 1) and
2591 subtract 1 produces slightly better code and results in algorithm
2592 selection much faster than treating it like the ...0111 case
2593 below. */
2594 if (w == 0
2595 || (w > 2
2596 /* Reject the case where t is 3.
2597 Thus we prefer addition in that case. */
2598 && t != 3))
2600 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2602 op_cost = add_cost (speed, mode);
2603 new_limit.cost = best_cost.cost - op_cost;
2604 new_limit.latency = best_cost.latency - op_cost;
2605 synth_mult (alg_in, t + 1, &new_limit, mode);
2607 alg_in->cost.cost += op_cost;
2608 alg_in->cost.latency += op_cost;
2609 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2611 struct algorithm *x;
2612 best_cost = alg_in->cost;
2613 x = alg_in, alg_in = best_alg, best_alg = x;
2614 best_alg->log[best_alg->ops] = 0;
2615 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2618 else
2620 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2622 op_cost = add_cost (speed, mode);
2623 new_limit.cost = best_cost.cost - op_cost;
2624 new_limit.latency = best_cost.latency - op_cost;
2625 synth_mult (alg_in, t - 1, &new_limit, mode);
2627 alg_in->cost.cost += op_cost;
2628 alg_in->cost.latency += op_cost;
2629 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2631 struct algorithm *x;
2632 best_cost = alg_in->cost;
2633 x = alg_in, alg_in = best_alg, best_alg = x;
2634 best_alg->log[best_alg->ops] = 0;
2635 best_alg->op[best_alg->ops] = alg_add_t_m2;
2639 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2640 quickly with a - a * n for some appropriate constant n. */
2641 m = exact_log2 (-orig_t + 1);
2642 if (m >= 0 && m < maxm)
2644 op_cost = shiftsub1_cost (speed, mode, m);
2645 new_limit.cost = best_cost.cost - op_cost;
2646 new_limit.latency = best_cost.latency - op_cost;
2647 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2648 &new_limit, mode);
2650 alg_in->cost.cost += op_cost;
2651 alg_in->cost.latency += op_cost;
2652 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2654 struct algorithm *x;
2655 best_cost = alg_in->cost;
2656 x = alg_in, alg_in = best_alg, best_alg = x;
2657 best_alg->log[best_alg->ops] = m;
2658 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2662 if (cache_hit)
2663 goto done;
2666 /* Look for factors of t of the form
2667 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2668 If we find such a factor, we can multiply by t using an algorithm that
2669 multiplies by q, shift the result by m and add/subtract it to itself.
2671 We search for large factors first and loop down, even if large factors
2672 are less probable than small; if we find a large factor we will find a
2673 good sequence quickly, and therefore be able to prune (by decreasing
2674 COST_LIMIT) the search. */
2676 do_alg_addsub_factor:
2677 for (m = floor_log2 (t - 1); m >= 2; m--)
2679 unsigned HOST_WIDE_INT d;
2681 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2682 if (t % d == 0 && t > d && m < maxm
2683 && (!cache_hit || cache_alg == alg_add_factor))
2685 /* If the target has a cheap shift-and-add instruction use
2686 that in preference to a shift insn followed by an add insn.
2687 Assume that the shift-and-add is "atomic" with a latency
2688 equal to its cost, otherwise assume that on superscalar
2689 hardware the shift may be executed concurrently with the
2690 earlier steps in the algorithm. */
2691 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2692 if (shiftadd_cost (speed, mode, m) < op_cost)
2694 op_cost = shiftadd_cost (speed, mode, m);
2695 op_latency = op_cost;
2697 else
2698 op_latency = add_cost (speed, mode);
2700 new_limit.cost = best_cost.cost - op_cost;
2701 new_limit.latency = best_cost.latency - op_latency;
2702 synth_mult (alg_in, t / d, &new_limit, mode);
2704 alg_in->cost.cost += op_cost;
2705 alg_in->cost.latency += op_latency;
2706 if (alg_in->cost.latency < op_cost)
2707 alg_in->cost.latency = op_cost;
2708 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2710 struct algorithm *x;
2711 best_cost = alg_in->cost;
2712 x = alg_in, alg_in = best_alg, best_alg = x;
2713 best_alg->log[best_alg->ops] = m;
2714 best_alg->op[best_alg->ops] = alg_add_factor;
2716 /* Other factors will have been taken care of in the recursion. */
2717 break;
2720 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2721 if (t % d == 0 && t > d && m < maxm
2722 && (!cache_hit || cache_alg == alg_sub_factor))
2724 /* If the target has a cheap shift-and-subtract insn use
2725 that in preference to a shift insn followed by a sub insn.
2726 Assume that the shift-and-sub is "atomic" with a latency
2727 equal to it's cost, otherwise assume that on superscalar
2728 hardware the shift may be executed concurrently with the
2729 earlier steps in the algorithm. */
2730 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2731 if (shiftsub0_cost (speed, mode, m) < op_cost)
2733 op_cost = shiftsub0_cost (speed, mode, m);
2734 op_latency = op_cost;
2736 else
2737 op_latency = add_cost (speed, mode);
2739 new_limit.cost = best_cost.cost - op_cost;
2740 new_limit.latency = best_cost.latency - op_latency;
2741 synth_mult (alg_in, t / d, &new_limit, mode);
2743 alg_in->cost.cost += op_cost;
2744 alg_in->cost.latency += op_latency;
2745 if (alg_in->cost.latency < op_cost)
2746 alg_in->cost.latency = op_cost;
2747 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2749 struct algorithm *x;
2750 best_cost = alg_in->cost;
2751 x = alg_in, alg_in = best_alg, best_alg = x;
2752 best_alg->log[best_alg->ops] = m;
2753 best_alg->op[best_alg->ops] = alg_sub_factor;
2755 break;
2758 if (cache_hit)
2759 goto done;
2761 /* Try shift-and-add (load effective address) instructions,
2762 i.e. do a*3, a*5, a*9. */
2763 if ((t & 1) != 0)
2765 do_alg_add_t2_m:
2766 q = t - 1;
2767 q = q & -q;
2768 m = exact_log2 (q);
2769 if (m >= 0 && m < maxm)
2771 op_cost = shiftadd_cost (speed, mode, m);
2772 new_limit.cost = best_cost.cost - op_cost;
2773 new_limit.latency = best_cost.latency - op_cost;
2774 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2776 alg_in->cost.cost += op_cost;
2777 alg_in->cost.latency += op_cost;
2778 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2780 struct algorithm *x;
2781 best_cost = alg_in->cost;
2782 x = alg_in, alg_in = best_alg, best_alg = x;
2783 best_alg->log[best_alg->ops] = m;
2784 best_alg->op[best_alg->ops] = alg_add_t2_m;
2787 if (cache_hit)
2788 goto done;
2790 do_alg_sub_t2_m:
2791 q = t + 1;
2792 q = q & -q;
2793 m = exact_log2 (q);
2794 if (m >= 0 && m < maxm)
2796 op_cost = shiftsub0_cost (speed, mode, m);
2797 new_limit.cost = best_cost.cost - op_cost;
2798 new_limit.latency = best_cost.latency - op_cost;
2799 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2801 alg_in->cost.cost += op_cost;
2802 alg_in->cost.latency += op_cost;
2803 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2805 struct algorithm *x;
2806 best_cost = alg_in->cost;
2807 x = alg_in, alg_in = best_alg, best_alg = x;
2808 best_alg->log[best_alg->ops] = m;
2809 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2812 if (cache_hit)
2813 goto done;
2816 done:
2817 /* If best_cost has not decreased, we have not found any algorithm. */
2818 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2820 /* We failed to find an algorithm. Record alg_impossible for
2821 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2822 we are asked to find an algorithm for T within the same or
2823 lower COST_LIMIT, we can immediately return to the
2824 caller. */
2825 entry_ptr->t = t;
2826 entry_ptr->mode = mode;
2827 entry_ptr->speed = speed;
2828 entry_ptr->alg = alg_impossible;
2829 entry_ptr->cost = *cost_limit;
2830 return;
2833 /* Cache the result. */
2834 if (!cache_hit)
2836 entry_ptr->t = t;
2837 entry_ptr->mode = mode;
2838 entry_ptr->speed = speed;
2839 entry_ptr->alg = best_alg->op[best_alg->ops];
2840 entry_ptr->cost.cost = best_cost.cost;
2841 entry_ptr->cost.latency = best_cost.latency;
2844 /* If we are getting a too long sequence for `struct algorithm'
2845 to record, make this search fail. */
2846 if (best_alg->ops == MAX_BITS_PER_WORD)
2847 return;
2849 /* Copy the algorithm from temporary space to the space at alg_out.
2850 We avoid using structure assignment because the majority of
2851 best_alg is normally undefined, and this is a critical function. */
2852 alg_out->ops = best_alg->ops + 1;
2853 alg_out->cost = best_cost;
2854 memcpy (alg_out->op, best_alg->op,
2855 alg_out->ops * sizeof *alg_out->op);
2856 memcpy (alg_out->log, best_alg->log,
2857 alg_out->ops * sizeof *alg_out->log);
2860 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2861 Try three variations:
2863 - a shift/add sequence based on VAL itself
2864 - a shift/add sequence based on -VAL, followed by a negation
2865 - a shift/add sequence based on VAL - 1, followed by an addition.
2867 Return true if the cheapest of these cost less than MULT_COST,
2868 describing the algorithm in *ALG and final fixup in *VARIANT. */
2870 static bool
2871 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2872 struct algorithm *alg, enum mult_variant *variant,
2873 int mult_cost)
2875 struct algorithm alg2;
2876 struct mult_cost limit;
2877 int op_cost;
2878 bool speed = optimize_insn_for_speed_p ();
2880 /* Fail quickly for impossible bounds. */
2881 if (mult_cost < 0)
2882 return false;
2884 /* Ensure that mult_cost provides a reasonable upper bound.
2885 Any constant multiplication can be performed with less
2886 than 2 * bits additions. */
2887 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2888 if (mult_cost > op_cost)
2889 mult_cost = op_cost;
2891 *variant = basic_variant;
2892 limit.cost = mult_cost;
2893 limit.latency = mult_cost;
2894 synth_mult (alg, val, &limit, mode);
2896 /* This works only if the inverted value actually fits in an
2897 `unsigned int' */
2898 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2900 op_cost = neg_cost (speed, mode);
2901 if (MULT_COST_LESS (&alg->cost, mult_cost))
2903 limit.cost = alg->cost.cost - op_cost;
2904 limit.latency = alg->cost.latency - op_cost;
2906 else
2908 limit.cost = mult_cost - op_cost;
2909 limit.latency = mult_cost - op_cost;
2912 synth_mult (&alg2, -val, &limit, mode);
2913 alg2.cost.cost += op_cost;
2914 alg2.cost.latency += op_cost;
2915 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2916 *alg = alg2, *variant = negate_variant;
2919 /* This proves very useful for division-by-constant. */
2920 op_cost = add_cost (speed, mode);
2921 if (MULT_COST_LESS (&alg->cost, mult_cost))
2923 limit.cost = alg->cost.cost - op_cost;
2924 limit.latency = alg->cost.latency - op_cost;
2926 else
2928 limit.cost = mult_cost - op_cost;
2929 limit.latency = mult_cost - op_cost;
2932 synth_mult (&alg2, val - 1, &limit, mode);
2933 alg2.cost.cost += op_cost;
2934 alg2.cost.latency += op_cost;
2935 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2936 *alg = alg2, *variant = add_variant;
2938 return MULT_COST_LESS (&alg->cost, mult_cost);
2941 /* A subroutine of expand_mult, used for constant multiplications.
2942 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2943 convenient. Use the shift/add sequence described by ALG and apply
2944 the final fixup specified by VARIANT. */
2946 static rtx
2947 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2948 rtx target, const struct algorithm *alg,
2949 enum mult_variant variant)
2951 HOST_WIDE_INT val_so_far;
2952 rtx insn, accum, tem;
2953 int opno;
2954 enum machine_mode nmode;
2956 /* Avoid referencing memory over and over and invalid sharing
2957 on SUBREGs. */
2958 op0 = force_reg (mode, op0);
2960 /* ACCUM starts out either as OP0 or as a zero, depending on
2961 the first operation. */
2963 if (alg->op[0] == alg_zero)
2965 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2966 val_so_far = 0;
2968 else if (alg->op[0] == alg_m)
2970 accum = copy_to_mode_reg (mode, op0);
2971 val_so_far = 1;
2973 else
2974 gcc_unreachable ();
2976 for (opno = 1; opno < alg->ops; opno++)
2978 int log = alg->log[opno];
2979 rtx shift_subtarget = optimize ? 0 : accum;
2980 rtx add_target
2981 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2982 && !optimize)
2983 ? target : 0;
2984 rtx accum_target = optimize ? 0 : accum;
2985 rtx accum_inner;
2987 switch (alg->op[opno])
2989 case alg_shift:
2990 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2991 /* REG_EQUAL note will be attached to the following insn. */
2992 emit_move_insn (accum, tem);
2993 val_so_far <<= log;
2994 break;
2996 case alg_add_t_m2:
2997 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2998 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2999 add_target ? add_target : accum_target);
3000 val_so_far += (HOST_WIDE_INT) 1 << log;
3001 break;
3003 case alg_sub_t_m2:
3004 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3005 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3006 add_target ? add_target : accum_target);
3007 val_so_far -= (HOST_WIDE_INT) 1 << log;
3008 break;
3010 case alg_add_t2_m:
3011 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3012 log, shift_subtarget, 0);
3013 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3014 add_target ? add_target : accum_target);
3015 val_so_far = (val_so_far << log) + 1;
3016 break;
3018 case alg_sub_t2_m:
3019 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3020 log, shift_subtarget, 0);
3021 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3022 add_target ? add_target : accum_target);
3023 val_so_far = (val_so_far << log) - 1;
3024 break;
3026 case alg_add_factor:
3027 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3028 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3029 add_target ? add_target : accum_target);
3030 val_so_far += val_so_far << log;
3031 break;
3033 case alg_sub_factor:
3034 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3035 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3036 (add_target
3037 ? add_target : (optimize ? 0 : tem)));
3038 val_so_far = (val_so_far << log) - val_so_far;
3039 break;
3041 default:
3042 gcc_unreachable ();
3045 if (SCALAR_INT_MODE_P (mode))
3047 /* Write a REG_EQUAL note on the last insn so that we can cse
3048 multiplication sequences. Note that if ACCUM is a SUBREG,
3049 we've set the inner register and must properly indicate that. */
3050 tem = op0, nmode = mode;
3051 accum_inner = accum;
3052 if (GET_CODE (accum) == SUBREG)
3054 accum_inner = SUBREG_REG (accum);
3055 nmode = GET_MODE (accum_inner);
3056 tem = gen_lowpart (nmode, op0);
3059 insn = get_last_insn ();
3060 set_dst_reg_note (insn, REG_EQUAL,
3061 gen_rtx_MULT (nmode, tem,
3062 gen_int_mode (val_so_far, nmode)),
3063 accum_inner);
3067 if (variant == negate_variant)
3069 val_so_far = -val_so_far;
3070 accum = expand_unop (mode, neg_optab, accum, target, 0);
3072 else if (variant == add_variant)
3074 val_so_far = val_so_far + 1;
3075 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3078 /* Compare only the bits of val and val_so_far that are significant
3079 in the result mode, to avoid sign-/zero-extension confusion. */
3080 nmode = GET_MODE_INNER (mode);
3081 if (nmode == VOIDmode)
3082 nmode = mode;
3083 val &= GET_MODE_MASK (nmode);
3084 val_so_far &= GET_MODE_MASK (nmode);
3085 gcc_assert (val == val_so_far);
3087 return accum;
3090 /* Perform a multiplication and return an rtx for the result.
3091 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3092 TARGET is a suggestion for where to store the result (an rtx).
3094 We check specially for a constant integer as OP1.
3095 If you want this check for OP0 as well, then before calling
3096 you should swap the two operands if OP0 would be constant. */
3099 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3100 int unsignedp)
3102 enum mult_variant variant;
3103 struct algorithm algorithm;
3104 rtx scalar_op1;
3105 int max_cost;
3106 bool speed = optimize_insn_for_speed_p ();
3107 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3109 if (CONSTANT_P (op0))
3111 rtx temp = op0;
3112 op0 = op1;
3113 op1 = temp;
3116 /* For vectors, there are several simplifications that can be made if
3117 all elements of the vector constant are identical. */
3118 scalar_op1 = op1;
3119 if (GET_CODE (op1) == CONST_VECTOR)
3121 int i, n = CONST_VECTOR_NUNITS (op1);
3122 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3123 for (i = 1; i < n; ++i)
3124 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3125 goto skip_scalar;
3128 if (INTEGRAL_MODE_P (mode))
3130 rtx fake_reg;
3131 HOST_WIDE_INT coeff;
3132 bool is_neg;
3133 int mode_bitsize;
3135 if (op1 == CONST0_RTX (mode))
3136 return op1;
3137 if (op1 == CONST1_RTX (mode))
3138 return op0;
3139 if (op1 == CONSTM1_RTX (mode))
3140 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3141 op0, target, 0);
3143 if (do_trapv)
3144 goto skip_synth;
3146 /* These are the operations that are potentially turned into
3147 a sequence of shifts and additions. */
3148 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3150 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3151 less than or equal in size to `unsigned int' this doesn't matter.
3152 If the mode is larger than `unsigned int', then synth_mult works
3153 only if the constant value exactly fits in an `unsigned int' without
3154 any truncation. This means that multiplying by negative values does
3155 not work; results are off by 2^32 on a 32 bit machine. */
3157 if (CONST_INT_P (scalar_op1))
3159 coeff = INTVAL (scalar_op1);
3160 is_neg = coeff < 0;
3162 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3164 /* If we are multiplying in DImode, it may still be a win
3165 to try to work with shifts and adds. */
3166 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3167 && (CONST_DOUBLE_LOW (scalar_op1) > 0
3168 || (CONST_DOUBLE_LOW (scalar_op1) < 0
3169 && EXACT_POWER_OF_2_OR_ZERO_P
3170 (CONST_DOUBLE_LOW (scalar_op1)))))
3172 coeff = CONST_DOUBLE_LOW (scalar_op1);
3173 is_neg = false;
3175 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3177 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3178 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3180 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3181 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3182 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3183 return expand_shift (LSHIFT_EXPR, mode, op0,
3184 shift, target, unsignedp);
3186 goto skip_synth;
3188 else
3189 goto skip_synth;
3191 else
3192 goto skip_synth;
3194 /* We used to test optimize here, on the grounds that it's better to
3195 produce a smaller program when -O is not used. But this causes
3196 such a terrible slowdown sometimes that it seems better to always
3197 use synth_mult. */
3199 /* Special case powers of two. */
3200 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3201 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3202 return expand_shift (LSHIFT_EXPR, mode, op0,
3203 floor_log2 (coeff), target, unsignedp);
3205 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3207 /* Attempt to handle multiplication of DImode values by negative
3208 coefficients, by performing the multiplication by a positive
3209 multiplier and then inverting the result. */
3210 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3212 /* Its safe to use -coeff even for INT_MIN, as the
3213 result is interpreted as an unsigned coefficient.
3214 Exclude cost of op0 from max_cost to match the cost
3215 calculation of the synth_mult. */
3216 coeff = -(unsigned HOST_WIDE_INT) coeff;
3217 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3218 - neg_cost (speed, mode));
3219 if (max_cost <= 0)
3220 goto skip_synth;
3222 /* Special case powers of two. */
3223 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3225 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3226 floor_log2 (coeff), target, unsignedp);
3227 return expand_unop (mode, neg_optab, temp, target, 0);
3230 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3231 max_cost))
3233 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3234 &algorithm, variant);
3235 return expand_unop (mode, neg_optab, temp, target, 0);
3237 goto skip_synth;
3240 /* Exclude cost of op0 from max_cost to match the cost
3241 calculation of the synth_mult. */
3242 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3243 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3244 return expand_mult_const (mode, op0, coeff, target,
3245 &algorithm, variant);
3247 skip_synth:
3249 /* Expand x*2.0 as x+x. */
3250 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3252 REAL_VALUE_TYPE d;
3253 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3255 if (REAL_VALUES_EQUAL (d, dconst2))
3257 op0 = force_reg (GET_MODE (op0), op0);
3258 return expand_binop (mode, add_optab, op0, op0,
3259 target, unsignedp, OPTAB_LIB_WIDEN);
3262 skip_scalar:
3264 /* This used to use umul_optab if unsigned, but for non-widening multiply
3265 there is no difference between signed and unsigned. */
3266 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3267 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3268 gcc_assert (op0);
3269 return op0;
3272 /* Return a cost estimate for multiplying a register by the given
3273 COEFFicient in the given MODE and SPEED. */
3276 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3278 int max_cost;
3279 struct algorithm algorithm;
3280 enum mult_variant variant;
3282 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3283 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3284 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3285 return algorithm.cost.cost;
3286 else
3287 return max_cost;
3290 /* Perform a widening multiplication and return an rtx for the result.
3291 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3292 TARGET is a suggestion for where to store the result (an rtx).
3293 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3294 or smul_widen_optab.
3296 We check specially for a constant integer as OP1, comparing the
3297 cost of a widening multiply against the cost of a sequence of shifts
3298 and adds. */
3301 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3302 int unsignedp, optab this_optab)
3304 bool speed = optimize_insn_for_speed_p ();
3305 rtx cop1;
3307 if (CONST_INT_P (op1)
3308 && GET_MODE (op0) != VOIDmode
3309 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3310 this_optab == umul_widen_optab))
3311 && CONST_INT_P (cop1)
3312 && (INTVAL (cop1) >= 0
3313 || HWI_COMPUTABLE_MODE_P (mode)))
3315 HOST_WIDE_INT coeff = INTVAL (cop1);
3316 int max_cost;
3317 enum mult_variant variant;
3318 struct algorithm algorithm;
3320 /* Special case powers of two. */
3321 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3323 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3324 return expand_shift (LSHIFT_EXPR, mode, op0,
3325 floor_log2 (coeff), target, unsignedp);
3328 /* Exclude cost of op0 from max_cost to match the cost
3329 calculation of the synth_mult. */
3330 max_cost = mul_widen_cost (speed, mode);
3331 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3332 max_cost))
3334 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3335 return expand_mult_const (mode, op0, coeff, target,
3336 &algorithm, variant);
3339 return expand_binop (mode, this_optab, op0, op1, target,
3340 unsignedp, OPTAB_LIB_WIDEN);
3343 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3344 replace division by D, and put the least significant N bits of the result
3345 in *MULTIPLIER_PTR and return the most significant bit.
3347 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3348 needed precision is in PRECISION (should be <= N).
3350 PRECISION should be as small as possible so this function can choose
3351 multiplier more freely.
3353 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3354 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3356 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3357 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3359 unsigned HOST_WIDE_INT
3360 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3361 unsigned HOST_WIDE_INT *multiplier_ptr,
3362 int *post_shift_ptr, int *lgup_ptr)
3364 double_int mhigh, mlow;
3365 int lgup, post_shift;
3366 int pow, pow2;
3368 /* lgup = ceil(log2(divisor)); */
3369 lgup = ceil_log2 (d);
3371 gcc_assert (lgup <= n);
3373 pow = n + lgup;
3374 pow2 = n + lgup - precision;
3376 /* We could handle this with some effort, but this case is much
3377 better handled directly with a scc insn, so rely on caller using
3378 that. */
3379 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3381 /* mlow = 2^(N + lgup)/d */
3382 double_int val = double_int_zero.set_bit (pow);
3383 mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3385 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3386 val |= double_int_zero.set_bit (pow2);
3387 mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3389 gcc_assert (!mhigh.high || val.high - d < d);
3390 gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3391 /* Assert that mlow < mhigh. */
3392 gcc_assert (mlow.ult (mhigh));
3394 /* If precision == N, then mlow, mhigh exceed 2^N
3395 (but they do not exceed 2^(N+1)). */
3397 /* Reduce to lowest terms. */
3398 for (post_shift = lgup; post_shift > 0; post_shift--)
3400 int shft = HOST_BITS_PER_WIDE_INT - 1;
3401 unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3402 unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3403 if (ml_lo >= mh_lo)
3404 break;
3406 mlow = double_int::from_uhwi (ml_lo);
3407 mhigh = double_int::from_uhwi (mh_lo);
3410 *post_shift_ptr = post_shift;
3411 *lgup_ptr = lgup;
3412 if (n < HOST_BITS_PER_WIDE_INT)
3414 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3415 *multiplier_ptr = mhigh.low & mask;
3416 return mhigh.low >= mask;
3418 else
3420 *multiplier_ptr = mhigh.low;
3421 return mhigh.high;
3425 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3426 congruent to 1 (mod 2**N). */
3428 static unsigned HOST_WIDE_INT
3429 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3431 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3433 /* The algorithm notes that the choice y = x satisfies
3434 x*y == 1 mod 2^3, since x is assumed odd.
3435 Each iteration doubles the number of bits of significance in y. */
3437 unsigned HOST_WIDE_INT mask;
3438 unsigned HOST_WIDE_INT y = x;
3439 int nbit = 3;
3441 mask = (n == HOST_BITS_PER_WIDE_INT
3442 ? ~(unsigned HOST_WIDE_INT) 0
3443 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3445 while (nbit < n)
3447 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3448 nbit *= 2;
3450 return y;
3453 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3454 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3455 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3456 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3457 become signed.
3459 The result is put in TARGET if that is convenient.
3461 MODE is the mode of operation. */
3464 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3465 rtx op1, rtx target, int unsignedp)
3467 rtx tem;
3468 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3470 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3471 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3472 tem = expand_and (mode, tem, op1, NULL_RTX);
3473 adj_operand
3474 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3475 adj_operand);
3477 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3478 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3479 tem = expand_and (mode, tem, op0, NULL_RTX);
3480 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3481 target);
3483 return target;
3486 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3488 static rtx
3489 extract_high_half (enum machine_mode mode, rtx op)
3491 enum machine_mode wider_mode;
3493 if (mode == word_mode)
3494 return gen_highpart (mode, op);
3496 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3498 wider_mode = GET_MODE_WIDER_MODE (mode);
3499 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3500 GET_MODE_BITSIZE (mode), 0, 1);
3501 return convert_modes (mode, wider_mode, op, 0);
3504 /* Like expmed_mult_highpart, but only consider using a multiplication
3505 optab. OP1 is an rtx for the constant operand. */
3507 static rtx
3508 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3509 rtx target, int unsignedp, int max_cost)
3511 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3512 enum machine_mode wider_mode;
3513 optab moptab;
3514 rtx tem;
3515 int size;
3516 bool speed = optimize_insn_for_speed_p ();
3518 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3520 wider_mode = GET_MODE_WIDER_MODE (mode);
3521 size = GET_MODE_BITSIZE (mode);
3523 /* Firstly, try using a multiplication insn that only generates the needed
3524 high part of the product, and in the sign flavor of unsignedp. */
3525 if (mul_highpart_cost (speed, mode) < max_cost)
3527 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3528 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3529 unsignedp, OPTAB_DIRECT);
3530 if (tem)
3531 return tem;
3534 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3535 Need to adjust the result after the multiplication. */
3536 if (size - 1 < BITS_PER_WORD
3537 && (mul_highpart_cost (speed, mode)
3538 + 2 * shift_cost (speed, mode, size-1)
3539 + 4 * add_cost (speed, mode) < max_cost))
3541 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3542 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3543 unsignedp, OPTAB_DIRECT);
3544 if (tem)
3545 /* We used the wrong signedness. Adjust the result. */
3546 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3547 tem, unsignedp);
3550 /* Try widening multiplication. */
3551 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3552 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3553 && mul_widen_cost (speed, wider_mode) < max_cost)
3555 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3556 unsignedp, OPTAB_WIDEN);
3557 if (tem)
3558 return extract_high_half (mode, tem);
3561 /* Try widening the mode and perform a non-widening multiplication. */
3562 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3563 && size - 1 < BITS_PER_WORD
3564 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3565 < max_cost))
3567 rtx insns, wop0, wop1;
3569 /* We need to widen the operands, for example to ensure the
3570 constant multiplier is correctly sign or zero extended.
3571 Use a sequence to clean-up any instructions emitted by
3572 the conversions if things don't work out. */
3573 start_sequence ();
3574 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3575 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3576 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3577 unsignedp, OPTAB_WIDEN);
3578 insns = get_insns ();
3579 end_sequence ();
3581 if (tem)
3583 emit_insn (insns);
3584 return extract_high_half (mode, tem);
3588 /* Try widening multiplication of opposite signedness, and adjust. */
3589 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3590 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3591 && size - 1 < BITS_PER_WORD
3592 && (mul_widen_cost (speed, wider_mode)
3593 + 2 * shift_cost (speed, mode, size-1)
3594 + 4 * add_cost (speed, mode) < max_cost))
3596 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3597 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3598 if (tem != 0)
3600 tem = extract_high_half (mode, tem);
3601 /* We used the wrong signedness. Adjust the result. */
3602 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3603 target, unsignedp);
3607 return 0;
3610 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3611 putting the high half of the result in TARGET if that is convenient,
3612 and return where the result is. If the operation can not be performed,
3613 0 is returned.
3615 MODE is the mode of operation and result.
3617 UNSIGNEDP nonzero means unsigned multiply.
3619 MAX_COST is the total allowed cost for the expanded RTL. */
3621 static rtx
3622 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3623 rtx target, int unsignedp, int max_cost)
3625 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3626 unsigned HOST_WIDE_INT cnst1;
3627 int extra_cost;
3628 bool sign_adjust = false;
3629 enum mult_variant variant;
3630 struct algorithm alg;
3631 rtx tem;
3632 bool speed = optimize_insn_for_speed_p ();
3634 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3635 /* We can't support modes wider than HOST_BITS_PER_INT. */
3636 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3638 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3640 /* We can't optimize modes wider than BITS_PER_WORD.
3641 ??? We might be able to perform double-word arithmetic if
3642 mode == word_mode, however all the cost calculations in
3643 synth_mult etc. assume single-word operations. */
3644 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3645 return expmed_mult_highpart_optab (mode, op0, op1, target,
3646 unsignedp, max_cost);
3648 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3650 /* Check whether we try to multiply by a negative constant. */
3651 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3653 sign_adjust = true;
3654 extra_cost += add_cost (speed, mode);
3657 /* See whether shift/add multiplication is cheap enough. */
3658 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3659 max_cost - extra_cost))
3661 /* See whether the specialized multiplication optabs are
3662 cheaper than the shift/add version. */
3663 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3664 alg.cost.cost + extra_cost);
3665 if (tem)
3666 return tem;
3668 tem = convert_to_mode (wider_mode, op0, unsignedp);
3669 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3670 tem = extract_high_half (mode, tem);
3672 /* Adjust result for signedness. */
3673 if (sign_adjust)
3674 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3676 return tem;
3678 return expmed_mult_highpart_optab (mode, op0, op1, target,
3679 unsignedp, max_cost);
3683 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3685 static rtx
3686 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3688 unsigned HOST_WIDE_INT masklow, maskhigh;
3689 rtx result, temp, shift, label;
3690 int logd;
3692 logd = floor_log2 (d);
3693 result = gen_reg_rtx (mode);
3695 /* Avoid conditional branches when they're expensive. */
3696 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3697 && optimize_insn_for_speed_p ())
3699 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3700 mode, 0, -1);
3701 if (signmask)
3703 signmask = force_reg (mode, signmask);
3704 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3705 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3707 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3708 which instruction sequence to use. If logical right shifts
3709 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3710 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3712 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3713 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3714 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3715 > COSTS_N_INSNS (2)))
3717 temp = expand_binop (mode, xor_optab, op0, signmask,
3718 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3719 temp = expand_binop (mode, sub_optab, temp, signmask,
3720 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3721 temp = expand_binop (mode, and_optab, temp,
3722 gen_int_mode (masklow, mode),
3723 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3724 temp = expand_binop (mode, xor_optab, temp, signmask,
3725 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3726 temp = expand_binop (mode, sub_optab, temp, signmask,
3727 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3729 else
3731 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3732 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3733 signmask = force_reg (mode, signmask);
3735 temp = expand_binop (mode, add_optab, op0, signmask,
3736 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3737 temp = expand_binop (mode, and_optab, temp,
3738 gen_int_mode (masklow, mode),
3739 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3740 temp = expand_binop (mode, sub_optab, temp, signmask,
3741 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3743 return temp;
3747 /* Mask contains the mode's signbit and the significant bits of the
3748 modulus. By including the signbit in the operation, many targets
3749 can avoid an explicit compare operation in the following comparison
3750 against zero. */
3752 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3753 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3755 masklow |= HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (mode) - 1);
3756 maskhigh = -1;
3758 else
3759 maskhigh = HOST_WIDE_INT_M1U
3760 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3762 temp = expand_binop (mode, and_optab, op0,
3763 immed_double_const (masklow, maskhigh, mode),
3764 result, 1, OPTAB_LIB_WIDEN);
3765 if (temp != result)
3766 emit_move_insn (result, temp);
3768 label = gen_label_rtx ();
3769 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3771 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3772 0, OPTAB_LIB_WIDEN);
3773 masklow = HOST_WIDE_INT_M1U << logd;
3774 maskhigh = -1;
3775 temp = expand_binop (mode, ior_optab, temp,
3776 immed_double_const (masklow, maskhigh, mode),
3777 result, 1, OPTAB_LIB_WIDEN);
3778 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3779 0, OPTAB_LIB_WIDEN);
3780 if (temp != result)
3781 emit_move_insn (result, temp);
3782 emit_label (label);
3783 return result;
3786 /* Expand signed division of OP0 by a power of two D in mode MODE.
3787 This routine is only called for positive values of D. */
3789 static rtx
3790 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3792 rtx temp, label;
3793 int logd;
3795 logd = floor_log2 (d);
3797 if (d == 2
3798 && BRANCH_COST (optimize_insn_for_speed_p (),
3799 false) >= 1)
3801 temp = gen_reg_rtx (mode);
3802 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3803 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3804 0, OPTAB_LIB_WIDEN);
3805 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3808 #ifdef HAVE_conditional_move
3809 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3810 >= 2)
3812 rtx temp2;
3814 start_sequence ();
3815 temp2 = copy_to_mode_reg (mode, op0);
3816 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3817 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3818 temp = force_reg (mode, temp);
3820 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3821 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3822 mode, temp, temp2, mode, 0);
3823 if (temp2)
3825 rtx seq = get_insns ();
3826 end_sequence ();
3827 emit_insn (seq);
3828 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3830 end_sequence ();
3832 #endif
3834 if (BRANCH_COST (optimize_insn_for_speed_p (),
3835 false) >= 2)
3837 int ushift = GET_MODE_BITSIZE (mode) - logd;
3839 temp = gen_reg_rtx (mode);
3840 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3841 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3842 > COSTS_N_INSNS (1))
3843 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3844 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3845 else
3846 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3847 ushift, NULL_RTX, 1);
3848 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3849 0, OPTAB_LIB_WIDEN);
3850 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3853 label = gen_label_rtx ();
3854 temp = copy_to_mode_reg (mode, op0);
3855 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3856 expand_inc (temp, gen_int_mode (d - 1, mode));
3857 emit_label (label);
3858 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3861 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3862 if that is convenient, and returning where the result is.
3863 You may request either the quotient or the remainder as the result;
3864 specify REM_FLAG nonzero to get the remainder.
3866 CODE is the expression code for which kind of division this is;
3867 it controls how rounding is done. MODE is the machine mode to use.
3868 UNSIGNEDP nonzero means do unsigned division. */
3870 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3871 and then correct it by or'ing in missing high bits
3872 if result of ANDI is nonzero.
3873 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3874 This could optimize to a bfexts instruction.
3875 But C doesn't use these operations, so their optimizations are
3876 left for later. */
3877 /* ??? For modulo, we don't actually need the highpart of the first product,
3878 the low part will do nicely. And for small divisors, the second multiply
3879 can also be a low-part only multiply or even be completely left out.
3880 E.g. to calculate the remainder of a division by 3 with a 32 bit
3881 multiply, multiply with 0x55555556 and extract the upper two bits;
3882 the result is exact for inputs up to 0x1fffffff.
3883 The input range can be reduced by using cross-sum rules.
3884 For odd divisors >= 3, the following table gives right shift counts
3885 so that if a number is shifted by an integer multiple of the given
3886 amount, the remainder stays the same:
3887 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3888 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3889 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3890 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3891 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3893 Cross-sum rules for even numbers can be derived by leaving as many bits
3894 to the right alone as the divisor has zeros to the right.
3895 E.g. if x is an unsigned 32 bit number:
3896 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3900 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3901 rtx op0, rtx op1, rtx target, int unsignedp)
3903 enum machine_mode compute_mode;
3904 rtx tquotient;
3905 rtx quotient = 0, remainder = 0;
3906 rtx last;
3907 int size;
3908 rtx insn;
3909 optab optab1, optab2;
3910 int op1_is_constant, op1_is_pow2 = 0;
3911 int max_cost, extra_cost;
3912 static HOST_WIDE_INT last_div_const = 0;
3913 bool speed = optimize_insn_for_speed_p ();
3915 op1_is_constant = CONST_INT_P (op1);
3916 if (op1_is_constant)
3918 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3919 if (unsignedp)
3920 ext_op1 &= GET_MODE_MASK (mode);
3921 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3922 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3926 This is the structure of expand_divmod:
3928 First comes code to fix up the operands so we can perform the operations
3929 correctly and efficiently.
3931 Second comes a switch statement with code specific for each rounding mode.
3932 For some special operands this code emits all RTL for the desired
3933 operation, for other cases, it generates only a quotient and stores it in
3934 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3935 to indicate that it has not done anything.
3937 Last comes code that finishes the operation. If QUOTIENT is set and
3938 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3939 QUOTIENT is not set, it is computed using trunc rounding.
3941 We try to generate special code for division and remainder when OP1 is a
3942 constant. If |OP1| = 2**n we can use shifts and some other fast
3943 operations. For other values of OP1, we compute a carefully selected
3944 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3945 by m.
3947 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3948 half of the product. Different strategies for generating the product are
3949 implemented in expmed_mult_highpart.
3951 If what we actually want is the remainder, we generate that by another
3952 by-constant multiplication and a subtraction. */
3954 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3955 code below will malfunction if we are, so check here and handle
3956 the special case if so. */
3957 if (op1 == const1_rtx)
3958 return rem_flag ? const0_rtx : op0;
3960 /* When dividing by -1, we could get an overflow.
3961 negv_optab can handle overflows. */
3962 if (! unsignedp && op1 == constm1_rtx)
3964 if (rem_flag)
3965 return const0_rtx;
3966 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3967 ? negv_optab : neg_optab, op0, target, 0);
3970 if (target
3971 /* Don't use the function value register as a target
3972 since we have to read it as well as write it,
3973 and function-inlining gets confused by this. */
3974 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3975 /* Don't clobber an operand while doing a multi-step calculation. */
3976 || ((rem_flag || op1_is_constant)
3977 && (reg_mentioned_p (target, op0)
3978 || (MEM_P (op0) && MEM_P (target))))
3979 || reg_mentioned_p (target, op1)
3980 || (MEM_P (op1) && MEM_P (target))))
3981 target = 0;
3983 /* Get the mode in which to perform this computation. Normally it will
3984 be MODE, but sometimes we can't do the desired operation in MODE.
3985 If so, pick a wider mode in which we can do the operation. Convert
3986 to that mode at the start to avoid repeated conversions.
3988 First see what operations we need. These depend on the expression
3989 we are evaluating. (We assume that divxx3 insns exist under the
3990 same conditions that modxx3 insns and that these insns don't normally
3991 fail. If these assumptions are not correct, we may generate less
3992 efficient code in some cases.)
3994 Then see if we find a mode in which we can open-code that operation
3995 (either a division, modulus, or shift). Finally, check for the smallest
3996 mode for which we can do the operation with a library call. */
3998 /* We might want to refine this now that we have division-by-constant
3999 optimization. Since expmed_mult_highpart tries so many variants, it is
4000 not straightforward to generalize this. Maybe we should make an array
4001 of possible modes in init_expmed? Save this for GCC 2.7. */
4003 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4004 ? (unsignedp ? lshr_optab : ashr_optab)
4005 : (unsignedp ? udiv_optab : sdiv_optab));
4006 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4007 ? optab1
4008 : (unsignedp ? udivmod_optab : sdivmod_optab));
4010 for (compute_mode = mode; compute_mode != VOIDmode;
4011 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4012 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4013 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4014 break;
4016 if (compute_mode == VOIDmode)
4017 for (compute_mode = mode; compute_mode != VOIDmode;
4018 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4019 if (optab_libfunc (optab1, compute_mode)
4020 || optab_libfunc (optab2, compute_mode))
4021 break;
4023 /* If we still couldn't find a mode, use MODE, but expand_binop will
4024 probably die. */
4025 if (compute_mode == VOIDmode)
4026 compute_mode = mode;
4028 if (target && GET_MODE (target) == compute_mode)
4029 tquotient = target;
4030 else
4031 tquotient = gen_reg_rtx (compute_mode);
4033 size = GET_MODE_BITSIZE (compute_mode);
4034 #if 0
4035 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4036 (mode), and thereby get better code when OP1 is a constant. Do that
4037 later. It will require going over all usages of SIZE below. */
4038 size = GET_MODE_BITSIZE (mode);
4039 #endif
4041 /* Only deduct something for a REM if the last divide done was
4042 for a different constant. Then set the constant of the last
4043 divide. */
4044 max_cost = (unsignedp
4045 ? udiv_cost (speed, compute_mode)
4046 : sdiv_cost (speed, compute_mode));
4047 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4048 && INTVAL (op1) == last_div_const))
4049 max_cost -= (mul_cost (speed, compute_mode)
4050 + add_cost (speed, compute_mode));
4052 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4054 /* Now convert to the best mode to use. */
4055 if (compute_mode != mode)
4057 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4058 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4060 /* convert_modes may have placed op1 into a register, so we
4061 must recompute the following. */
4062 op1_is_constant = CONST_INT_P (op1);
4063 op1_is_pow2 = (op1_is_constant
4064 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4065 || (! unsignedp
4066 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4069 /* If one of the operands is a volatile MEM, copy it into a register. */
4071 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4072 op0 = force_reg (compute_mode, op0);
4073 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4074 op1 = force_reg (compute_mode, op1);
4076 /* If we need the remainder or if OP1 is constant, we need to
4077 put OP0 in a register in case it has any queued subexpressions. */
4078 if (rem_flag || op1_is_constant)
4079 op0 = force_reg (compute_mode, op0);
4081 last = get_last_insn ();
4083 /* Promote floor rounding to trunc rounding for unsigned operations. */
4084 if (unsignedp)
4086 if (code == FLOOR_DIV_EXPR)
4087 code = TRUNC_DIV_EXPR;
4088 if (code == FLOOR_MOD_EXPR)
4089 code = TRUNC_MOD_EXPR;
4090 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4091 code = TRUNC_DIV_EXPR;
4094 if (op1 != const0_rtx)
4095 switch (code)
4097 case TRUNC_MOD_EXPR:
4098 case TRUNC_DIV_EXPR:
4099 if (op1_is_constant)
4101 if (unsignedp)
4103 unsigned HOST_WIDE_INT mh, ml;
4104 int pre_shift, post_shift;
4105 int dummy;
4106 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4107 & GET_MODE_MASK (compute_mode));
4109 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4111 pre_shift = floor_log2 (d);
4112 if (rem_flag)
4114 unsigned HOST_WIDE_INT mask
4115 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4116 remainder
4117 = expand_binop (compute_mode, and_optab, op0,
4118 gen_int_mode (mask, compute_mode),
4119 remainder, 1,
4120 OPTAB_LIB_WIDEN);
4121 if (remainder)
4122 return gen_lowpart (mode, remainder);
4124 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4125 pre_shift, tquotient, 1);
4127 else if (size <= HOST_BITS_PER_WIDE_INT)
4129 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4131 /* Most significant bit of divisor is set; emit an scc
4132 insn. */
4133 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4134 compute_mode, 1, 1);
4136 else
4138 /* Find a suitable multiplier and right shift count
4139 instead of multiplying with D. */
4141 mh = choose_multiplier (d, size, size,
4142 &ml, &post_shift, &dummy);
4144 /* If the suggested multiplier is more than SIZE bits,
4145 we can do better for even divisors, using an
4146 initial right shift. */
4147 if (mh != 0 && (d & 1) == 0)
4149 pre_shift = floor_log2 (d & -d);
4150 mh = choose_multiplier (d >> pre_shift, size,
4151 size - pre_shift,
4152 &ml, &post_shift, &dummy);
4153 gcc_assert (!mh);
4155 else
4156 pre_shift = 0;
4158 if (mh != 0)
4160 rtx t1, t2, t3, t4;
4162 if (post_shift - 1 >= BITS_PER_WORD)
4163 goto fail1;
4165 extra_cost
4166 = (shift_cost (speed, compute_mode, post_shift - 1)
4167 + shift_cost (speed, compute_mode, 1)
4168 + 2 * add_cost (speed, compute_mode));
4169 t1 = expmed_mult_highpart
4170 (compute_mode, op0,
4171 gen_int_mode (ml, compute_mode),
4172 NULL_RTX, 1, max_cost - extra_cost);
4173 if (t1 == 0)
4174 goto fail1;
4175 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4176 op0, t1),
4177 NULL_RTX);
4178 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4179 t2, 1, NULL_RTX, 1);
4180 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4181 t1, t3),
4182 NULL_RTX);
4183 quotient = expand_shift
4184 (RSHIFT_EXPR, compute_mode, t4,
4185 post_shift - 1, tquotient, 1);
4187 else
4189 rtx t1, t2;
4191 if (pre_shift >= BITS_PER_WORD
4192 || post_shift >= BITS_PER_WORD)
4193 goto fail1;
4195 t1 = expand_shift
4196 (RSHIFT_EXPR, compute_mode, op0,
4197 pre_shift, NULL_RTX, 1);
4198 extra_cost
4199 = (shift_cost (speed, compute_mode, pre_shift)
4200 + shift_cost (speed, compute_mode, post_shift));
4201 t2 = expmed_mult_highpart
4202 (compute_mode, t1,
4203 gen_int_mode (ml, compute_mode),
4204 NULL_RTX, 1, max_cost - extra_cost);
4205 if (t2 == 0)
4206 goto fail1;
4207 quotient = expand_shift
4208 (RSHIFT_EXPR, compute_mode, t2,
4209 post_shift, tquotient, 1);
4213 else /* Too wide mode to use tricky code */
4214 break;
4216 insn = get_last_insn ();
4217 if (insn != last)
4218 set_dst_reg_note (insn, REG_EQUAL,
4219 gen_rtx_UDIV (compute_mode, op0, op1),
4220 quotient);
4222 else /* TRUNC_DIV, signed */
4224 unsigned HOST_WIDE_INT ml;
4225 int lgup, post_shift;
4226 rtx mlr;
4227 HOST_WIDE_INT d = INTVAL (op1);
4228 unsigned HOST_WIDE_INT abs_d;
4230 /* Since d might be INT_MIN, we have to cast to
4231 unsigned HOST_WIDE_INT before negating to avoid
4232 undefined signed overflow. */
4233 abs_d = (d >= 0
4234 ? (unsigned HOST_WIDE_INT) d
4235 : - (unsigned HOST_WIDE_INT) d);
4237 /* n rem d = n rem -d */
4238 if (rem_flag && d < 0)
4240 d = abs_d;
4241 op1 = gen_int_mode (abs_d, compute_mode);
4244 if (d == 1)
4245 quotient = op0;
4246 else if (d == -1)
4247 quotient = expand_unop (compute_mode, neg_optab, op0,
4248 tquotient, 0);
4249 else if (HOST_BITS_PER_WIDE_INT >= size
4250 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4252 /* This case is not handled correctly below. */
4253 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4254 compute_mode, 1, 1);
4255 if (quotient == 0)
4256 goto fail1;
4258 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4259 && (rem_flag
4260 ? smod_pow2_cheap (speed, compute_mode)
4261 : sdiv_pow2_cheap (speed, compute_mode))
4262 /* We assume that cheap metric is true if the
4263 optab has an expander for this mode. */
4264 && ((optab_handler ((rem_flag ? smod_optab
4265 : sdiv_optab),
4266 compute_mode)
4267 != CODE_FOR_nothing)
4268 || (optab_handler (sdivmod_optab,
4269 compute_mode)
4270 != CODE_FOR_nothing)))
4272 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4274 if (rem_flag)
4276 remainder = expand_smod_pow2 (compute_mode, op0, d);
4277 if (remainder)
4278 return gen_lowpart (mode, remainder);
4281 if (sdiv_pow2_cheap (speed, compute_mode)
4282 && ((optab_handler (sdiv_optab, compute_mode)
4283 != CODE_FOR_nothing)
4284 || (optab_handler (sdivmod_optab, compute_mode)
4285 != CODE_FOR_nothing)))
4286 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4287 compute_mode, op0,
4288 gen_int_mode (abs_d,
4289 compute_mode),
4290 NULL_RTX, 0);
4291 else
4292 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4294 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4295 negate the quotient. */
4296 if (d < 0)
4298 insn = get_last_insn ();
4299 if (insn != last
4300 && abs_d < ((unsigned HOST_WIDE_INT) 1
4301 << (HOST_BITS_PER_WIDE_INT - 1)))
4302 set_dst_reg_note (insn, REG_EQUAL,
4303 gen_rtx_DIV (compute_mode, op0,
4304 gen_int_mode
4305 (abs_d,
4306 compute_mode)),
4307 quotient);
4309 quotient = expand_unop (compute_mode, neg_optab,
4310 quotient, quotient, 0);
4313 else if (size <= HOST_BITS_PER_WIDE_INT)
4315 choose_multiplier (abs_d, size, size - 1,
4316 &ml, &post_shift, &lgup);
4317 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4319 rtx t1, t2, t3;
4321 if (post_shift >= BITS_PER_WORD
4322 || size - 1 >= BITS_PER_WORD)
4323 goto fail1;
4325 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4326 + shift_cost (speed, compute_mode, size - 1)
4327 + add_cost (speed, compute_mode));
4328 t1 = expmed_mult_highpart
4329 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4330 NULL_RTX, 0, max_cost - extra_cost);
4331 if (t1 == 0)
4332 goto fail1;
4333 t2 = expand_shift
4334 (RSHIFT_EXPR, compute_mode, t1,
4335 post_shift, NULL_RTX, 0);
4336 t3 = expand_shift
4337 (RSHIFT_EXPR, compute_mode, op0,
4338 size - 1, NULL_RTX, 0);
4339 if (d < 0)
4340 quotient
4341 = force_operand (gen_rtx_MINUS (compute_mode,
4342 t3, t2),
4343 tquotient);
4344 else
4345 quotient
4346 = force_operand (gen_rtx_MINUS (compute_mode,
4347 t2, t3),
4348 tquotient);
4350 else
4352 rtx t1, t2, t3, t4;
4354 if (post_shift >= BITS_PER_WORD
4355 || size - 1 >= BITS_PER_WORD)
4356 goto fail1;
4358 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4359 mlr = gen_int_mode (ml, compute_mode);
4360 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4361 + shift_cost (speed, compute_mode, size - 1)
4362 + 2 * add_cost (speed, compute_mode));
4363 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4364 NULL_RTX, 0,
4365 max_cost - extra_cost);
4366 if (t1 == 0)
4367 goto fail1;
4368 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4369 t1, op0),
4370 NULL_RTX);
4371 t3 = expand_shift
4372 (RSHIFT_EXPR, compute_mode, t2,
4373 post_shift, NULL_RTX, 0);
4374 t4 = expand_shift
4375 (RSHIFT_EXPR, compute_mode, op0,
4376 size - 1, NULL_RTX, 0);
4377 if (d < 0)
4378 quotient
4379 = force_operand (gen_rtx_MINUS (compute_mode,
4380 t4, t3),
4381 tquotient);
4382 else
4383 quotient
4384 = force_operand (gen_rtx_MINUS (compute_mode,
4385 t3, t4),
4386 tquotient);
4389 else /* Too wide mode to use tricky code */
4390 break;
4392 insn = get_last_insn ();
4393 if (insn != last)
4394 set_dst_reg_note (insn, REG_EQUAL,
4395 gen_rtx_DIV (compute_mode, op0, op1),
4396 quotient);
4398 break;
4400 fail1:
4401 delete_insns_since (last);
4402 break;
4404 case FLOOR_DIV_EXPR:
4405 case FLOOR_MOD_EXPR:
4406 /* We will come here only for signed operations. */
4407 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4409 unsigned HOST_WIDE_INT mh, ml;
4410 int pre_shift, lgup, post_shift;
4411 HOST_WIDE_INT d = INTVAL (op1);
4413 if (d > 0)
4415 /* We could just as easily deal with negative constants here,
4416 but it does not seem worth the trouble for GCC 2.6. */
4417 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4419 pre_shift = floor_log2 (d);
4420 if (rem_flag)
4422 unsigned HOST_WIDE_INT mask
4423 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4424 remainder = expand_binop
4425 (compute_mode, and_optab, op0,
4426 gen_int_mode (mask, compute_mode),
4427 remainder, 0, OPTAB_LIB_WIDEN);
4428 if (remainder)
4429 return gen_lowpart (mode, remainder);
4431 quotient = expand_shift
4432 (RSHIFT_EXPR, compute_mode, op0,
4433 pre_shift, tquotient, 0);
4435 else
4437 rtx t1, t2, t3, t4;
4439 mh = choose_multiplier (d, size, size - 1,
4440 &ml, &post_shift, &lgup);
4441 gcc_assert (!mh);
4443 if (post_shift < BITS_PER_WORD
4444 && size - 1 < BITS_PER_WORD)
4446 t1 = expand_shift
4447 (RSHIFT_EXPR, compute_mode, op0,
4448 size - 1, NULL_RTX, 0);
4449 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4450 NULL_RTX, 0, OPTAB_WIDEN);
4451 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4452 + shift_cost (speed, compute_mode, size - 1)
4453 + 2 * add_cost (speed, compute_mode));
4454 t3 = expmed_mult_highpart
4455 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4456 NULL_RTX, 1, max_cost - extra_cost);
4457 if (t3 != 0)
4459 t4 = expand_shift
4460 (RSHIFT_EXPR, compute_mode, t3,
4461 post_shift, NULL_RTX, 1);
4462 quotient = expand_binop (compute_mode, xor_optab,
4463 t4, t1, tquotient, 0,
4464 OPTAB_WIDEN);
4469 else
4471 rtx nsign, t1, t2, t3, t4;
4472 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4473 op0, constm1_rtx), NULL_RTX);
4474 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4475 0, OPTAB_WIDEN);
4476 nsign = expand_shift
4477 (RSHIFT_EXPR, compute_mode, t2,
4478 size - 1, NULL_RTX, 0);
4479 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4480 NULL_RTX);
4481 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4482 NULL_RTX, 0);
4483 if (t4)
4485 rtx t5;
4486 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4487 NULL_RTX, 0);
4488 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4489 t4, t5),
4490 tquotient);
4495 if (quotient != 0)
4496 break;
4497 delete_insns_since (last);
4499 /* Try using an instruction that produces both the quotient and
4500 remainder, using truncation. We can easily compensate the quotient
4501 or remainder to get floor rounding, once we have the remainder.
4502 Notice that we compute also the final remainder value here,
4503 and return the result right away. */
4504 if (target == 0 || GET_MODE (target) != compute_mode)
4505 target = gen_reg_rtx (compute_mode);
4507 if (rem_flag)
4509 remainder
4510 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4511 quotient = gen_reg_rtx (compute_mode);
4513 else
4515 quotient
4516 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4517 remainder = gen_reg_rtx (compute_mode);
4520 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4521 quotient, remainder, 0))
4523 /* This could be computed with a branch-less sequence.
4524 Save that for later. */
4525 rtx tem;
4526 rtx label = gen_label_rtx ();
4527 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4528 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4529 NULL_RTX, 0, OPTAB_WIDEN);
4530 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4531 expand_dec (quotient, const1_rtx);
4532 expand_inc (remainder, op1);
4533 emit_label (label);
4534 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4537 /* No luck with division elimination or divmod. Have to do it
4538 by conditionally adjusting op0 *and* the result. */
4540 rtx label1, label2, label3, label4, label5;
4541 rtx adjusted_op0;
4542 rtx tem;
4544 quotient = gen_reg_rtx (compute_mode);
4545 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4546 label1 = gen_label_rtx ();
4547 label2 = gen_label_rtx ();
4548 label3 = gen_label_rtx ();
4549 label4 = gen_label_rtx ();
4550 label5 = gen_label_rtx ();
4551 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4552 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4553 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4554 quotient, 0, OPTAB_LIB_WIDEN);
4555 if (tem != quotient)
4556 emit_move_insn (quotient, tem);
4557 emit_jump_insn (gen_jump (label5));
4558 emit_barrier ();
4559 emit_label (label1);
4560 expand_inc (adjusted_op0, const1_rtx);
4561 emit_jump_insn (gen_jump (label4));
4562 emit_barrier ();
4563 emit_label (label2);
4564 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4565 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4566 quotient, 0, OPTAB_LIB_WIDEN);
4567 if (tem != quotient)
4568 emit_move_insn (quotient, tem);
4569 emit_jump_insn (gen_jump (label5));
4570 emit_barrier ();
4571 emit_label (label3);
4572 expand_dec (adjusted_op0, const1_rtx);
4573 emit_label (label4);
4574 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4575 quotient, 0, OPTAB_LIB_WIDEN);
4576 if (tem != quotient)
4577 emit_move_insn (quotient, tem);
4578 expand_dec (quotient, const1_rtx);
4579 emit_label (label5);
4581 break;
4583 case CEIL_DIV_EXPR:
4584 case CEIL_MOD_EXPR:
4585 if (unsignedp)
4587 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4589 rtx t1, t2, t3;
4590 unsigned HOST_WIDE_INT d = INTVAL (op1);
4591 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4592 floor_log2 (d), tquotient, 1);
4593 t2 = expand_binop (compute_mode, and_optab, op0,
4594 gen_int_mode (d - 1, compute_mode),
4595 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4596 t3 = gen_reg_rtx (compute_mode);
4597 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4598 compute_mode, 1, 1);
4599 if (t3 == 0)
4601 rtx lab;
4602 lab = gen_label_rtx ();
4603 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4604 expand_inc (t1, const1_rtx);
4605 emit_label (lab);
4606 quotient = t1;
4608 else
4609 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4610 t1, t3),
4611 tquotient);
4612 break;
4615 /* Try using an instruction that produces both the quotient and
4616 remainder, using truncation. We can easily compensate the
4617 quotient or remainder to get ceiling rounding, once we have the
4618 remainder. Notice that we compute also the final remainder
4619 value here, and return the result right away. */
4620 if (target == 0 || GET_MODE (target) != compute_mode)
4621 target = gen_reg_rtx (compute_mode);
4623 if (rem_flag)
4625 remainder = (REG_P (target)
4626 ? target : gen_reg_rtx (compute_mode));
4627 quotient = gen_reg_rtx (compute_mode);
4629 else
4631 quotient = (REG_P (target)
4632 ? target : gen_reg_rtx (compute_mode));
4633 remainder = gen_reg_rtx (compute_mode);
4636 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4637 remainder, 1))
4639 /* This could be computed with a branch-less sequence.
4640 Save that for later. */
4641 rtx label = gen_label_rtx ();
4642 do_cmp_and_jump (remainder, const0_rtx, EQ,
4643 compute_mode, label);
4644 expand_inc (quotient, const1_rtx);
4645 expand_dec (remainder, op1);
4646 emit_label (label);
4647 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4650 /* No luck with division elimination or divmod. Have to do it
4651 by conditionally adjusting op0 *and* the result. */
4653 rtx label1, label2;
4654 rtx adjusted_op0, tem;
4656 quotient = gen_reg_rtx (compute_mode);
4657 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4658 label1 = gen_label_rtx ();
4659 label2 = gen_label_rtx ();
4660 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4661 compute_mode, label1);
4662 emit_move_insn (quotient, const0_rtx);
4663 emit_jump_insn (gen_jump (label2));
4664 emit_barrier ();
4665 emit_label (label1);
4666 expand_dec (adjusted_op0, const1_rtx);
4667 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4668 quotient, 1, OPTAB_LIB_WIDEN);
4669 if (tem != quotient)
4670 emit_move_insn (quotient, tem);
4671 expand_inc (quotient, const1_rtx);
4672 emit_label (label2);
4675 else /* signed */
4677 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4678 && INTVAL (op1) >= 0)
4680 /* This is extremely similar to the code for the unsigned case
4681 above. For 2.7 we should merge these variants, but for
4682 2.6.1 I don't want to touch the code for unsigned since that
4683 get used in C. The signed case will only be used by other
4684 languages (Ada). */
4686 rtx t1, t2, t3;
4687 unsigned HOST_WIDE_INT d = INTVAL (op1);
4688 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4689 floor_log2 (d), tquotient, 0);
4690 t2 = expand_binop (compute_mode, and_optab, op0,
4691 gen_int_mode (d - 1, compute_mode),
4692 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4693 t3 = gen_reg_rtx (compute_mode);
4694 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4695 compute_mode, 1, 1);
4696 if (t3 == 0)
4698 rtx lab;
4699 lab = gen_label_rtx ();
4700 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4701 expand_inc (t1, const1_rtx);
4702 emit_label (lab);
4703 quotient = t1;
4705 else
4706 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4707 t1, t3),
4708 tquotient);
4709 break;
4712 /* Try using an instruction that produces both the quotient and
4713 remainder, using truncation. We can easily compensate the
4714 quotient or remainder to get ceiling rounding, once we have the
4715 remainder. Notice that we compute also the final remainder
4716 value here, and return the result right away. */
4717 if (target == 0 || GET_MODE (target) != compute_mode)
4718 target = gen_reg_rtx (compute_mode);
4719 if (rem_flag)
4721 remainder= (REG_P (target)
4722 ? target : gen_reg_rtx (compute_mode));
4723 quotient = gen_reg_rtx (compute_mode);
4725 else
4727 quotient = (REG_P (target)
4728 ? target : gen_reg_rtx (compute_mode));
4729 remainder = gen_reg_rtx (compute_mode);
4732 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4733 remainder, 0))
4735 /* This could be computed with a branch-less sequence.
4736 Save that for later. */
4737 rtx tem;
4738 rtx label = gen_label_rtx ();
4739 do_cmp_and_jump (remainder, const0_rtx, EQ,
4740 compute_mode, label);
4741 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4742 NULL_RTX, 0, OPTAB_WIDEN);
4743 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4744 expand_inc (quotient, const1_rtx);
4745 expand_dec (remainder, op1);
4746 emit_label (label);
4747 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4750 /* No luck with division elimination or divmod. Have to do it
4751 by conditionally adjusting op0 *and* the result. */
4753 rtx label1, label2, label3, label4, label5;
4754 rtx adjusted_op0;
4755 rtx tem;
4757 quotient = gen_reg_rtx (compute_mode);
4758 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4759 label1 = gen_label_rtx ();
4760 label2 = gen_label_rtx ();
4761 label3 = gen_label_rtx ();
4762 label4 = gen_label_rtx ();
4763 label5 = gen_label_rtx ();
4764 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4765 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4766 compute_mode, label1);
4767 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4768 quotient, 0, OPTAB_LIB_WIDEN);
4769 if (tem != quotient)
4770 emit_move_insn (quotient, tem);
4771 emit_jump_insn (gen_jump (label5));
4772 emit_barrier ();
4773 emit_label (label1);
4774 expand_dec (adjusted_op0, const1_rtx);
4775 emit_jump_insn (gen_jump (label4));
4776 emit_barrier ();
4777 emit_label (label2);
4778 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4779 compute_mode, label3);
4780 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4781 quotient, 0, OPTAB_LIB_WIDEN);
4782 if (tem != quotient)
4783 emit_move_insn (quotient, tem);
4784 emit_jump_insn (gen_jump (label5));
4785 emit_barrier ();
4786 emit_label (label3);
4787 expand_inc (adjusted_op0, const1_rtx);
4788 emit_label (label4);
4789 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4790 quotient, 0, OPTAB_LIB_WIDEN);
4791 if (tem != quotient)
4792 emit_move_insn (quotient, tem);
4793 expand_inc (quotient, const1_rtx);
4794 emit_label (label5);
4797 break;
4799 case EXACT_DIV_EXPR:
4800 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4802 HOST_WIDE_INT d = INTVAL (op1);
4803 unsigned HOST_WIDE_INT ml;
4804 int pre_shift;
4805 rtx t1;
4807 pre_shift = floor_log2 (d & -d);
4808 ml = invert_mod2n (d >> pre_shift, size);
4809 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4810 pre_shift, NULL_RTX, unsignedp);
4811 quotient = expand_mult (compute_mode, t1,
4812 gen_int_mode (ml, compute_mode),
4813 NULL_RTX, 1);
4815 insn = get_last_insn ();
4816 set_dst_reg_note (insn, REG_EQUAL,
4817 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4818 compute_mode, op0, op1),
4819 quotient);
4821 break;
4823 case ROUND_DIV_EXPR:
4824 case ROUND_MOD_EXPR:
4825 if (unsignedp)
4827 rtx tem;
4828 rtx label;
4829 label = gen_label_rtx ();
4830 quotient = gen_reg_rtx (compute_mode);
4831 remainder = gen_reg_rtx (compute_mode);
4832 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4834 rtx tem;
4835 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4836 quotient, 1, OPTAB_LIB_WIDEN);
4837 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4838 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4839 remainder, 1, OPTAB_LIB_WIDEN);
4841 tem = plus_constant (compute_mode, op1, -1);
4842 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4843 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4844 expand_inc (quotient, const1_rtx);
4845 expand_dec (remainder, op1);
4846 emit_label (label);
4848 else
4850 rtx abs_rem, abs_op1, tem, mask;
4851 rtx label;
4852 label = gen_label_rtx ();
4853 quotient = gen_reg_rtx (compute_mode);
4854 remainder = gen_reg_rtx (compute_mode);
4855 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4857 rtx tem;
4858 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4859 quotient, 0, OPTAB_LIB_WIDEN);
4860 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4861 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4862 remainder, 0, OPTAB_LIB_WIDEN);
4864 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4865 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4866 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4867 1, NULL_RTX, 1);
4868 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4869 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4870 NULL_RTX, 0, OPTAB_WIDEN);
4871 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4872 size - 1, NULL_RTX, 0);
4873 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4874 NULL_RTX, 0, OPTAB_WIDEN);
4875 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4876 NULL_RTX, 0, OPTAB_WIDEN);
4877 expand_inc (quotient, tem);
4878 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4879 NULL_RTX, 0, OPTAB_WIDEN);
4880 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4881 NULL_RTX, 0, OPTAB_WIDEN);
4882 expand_dec (remainder, tem);
4883 emit_label (label);
4885 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4887 default:
4888 gcc_unreachable ();
4891 if (quotient == 0)
4893 if (target && GET_MODE (target) != compute_mode)
4894 target = 0;
4896 if (rem_flag)
4898 /* Try to produce the remainder without producing the quotient.
4899 If we seem to have a divmod pattern that does not require widening,
4900 don't try widening here. We should really have a WIDEN argument
4901 to expand_twoval_binop, since what we'd really like to do here is
4902 1) try a mod insn in compute_mode
4903 2) try a divmod insn in compute_mode
4904 3) try a div insn in compute_mode and multiply-subtract to get
4905 remainder
4906 4) try the same things with widening allowed. */
4907 remainder
4908 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4909 op0, op1, target,
4910 unsignedp,
4911 ((optab_handler (optab2, compute_mode)
4912 != CODE_FOR_nothing)
4913 ? OPTAB_DIRECT : OPTAB_WIDEN));
4914 if (remainder == 0)
4916 /* No luck there. Can we do remainder and divide at once
4917 without a library call? */
4918 remainder = gen_reg_rtx (compute_mode);
4919 if (! expand_twoval_binop ((unsignedp
4920 ? udivmod_optab
4921 : sdivmod_optab),
4922 op0, op1,
4923 NULL_RTX, remainder, unsignedp))
4924 remainder = 0;
4927 if (remainder)
4928 return gen_lowpart (mode, remainder);
4931 /* Produce the quotient. Try a quotient insn, but not a library call.
4932 If we have a divmod in this mode, use it in preference to widening
4933 the div (for this test we assume it will not fail). Note that optab2
4934 is set to the one of the two optabs that the call below will use. */
4935 quotient
4936 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4937 op0, op1, rem_flag ? NULL_RTX : target,
4938 unsignedp,
4939 ((optab_handler (optab2, compute_mode)
4940 != CODE_FOR_nothing)
4941 ? OPTAB_DIRECT : OPTAB_WIDEN));
4943 if (quotient == 0)
4945 /* No luck there. Try a quotient-and-remainder insn,
4946 keeping the quotient alone. */
4947 quotient = gen_reg_rtx (compute_mode);
4948 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4949 op0, op1,
4950 quotient, NULL_RTX, unsignedp))
4952 quotient = 0;
4953 if (! rem_flag)
4954 /* Still no luck. If we are not computing the remainder,
4955 use a library call for the quotient. */
4956 quotient = sign_expand_binop (compute_mode,
4957 udiv_optab, sdiv_optab,
4958 op0, op1, target,
4959 unsignedp, OPTAB_LIB_WIDEN);
4964 if (rem_flag)
4966 if (target && GET_MODE (target) != compute_mode)
4967 target = 0;
4969 if (quotient == 0)
4971 /* No divide instruction either. Use library for remainder. */
4972 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4973 op0, op1, target,
4974 unsignedp, OPTAB_LIB_WIDEN);
4975 /* No remainder function. Try a quotient-and-remainder
4976 function, keeping the remainder. */
4977 if (!remainder)
4979 remainder = gen_reg_rtx (compute_mode);
4980 if (!expand_twoval_binop_libfunc
4981 (unsignedp ? udivmod_optab : sdivmod_optab,
4982 op0, op1,
4983 NULL_RTX, remainder,
4984 unsignedp ? UMOD : MOD))
4985 remainder = NULL_RTX;
4988 else
4990 /* We divided. Now finish doing X - Y * (X / Y). */
4991 remainder = expand_mult (compute_mode, quotient, op1,
4992 NULL_RTX, unsignedp);
4993 remainder = expand_binop (compute_mode, sub_optab, op0,
4994 remainder, target, unsignedp,
4995 OPTAB_LIB_WIDEN);
4999 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5002 /* Return a tree node with data type TYPE, describing the value of X.
5003 Usually this is an VAR_DECL, if there is no obvious better choice.
5004 X may be an expression, however we only support those expressions
5005 generated by loop.c. */
5007 tree
5008 make_tree (tree type, rtx x)
5010 tree t;
5012 switch (GET_CODE (x))
5014 case CONST_INT:
5016 HOST_WIDE_INT hi = 0;
5018 if (INTVAL (x) < 0
5019 && !(TYPE_UNSIGNED (type)
5020 && (GET_MODE_BITSIZE (TYPE_MODE (type))
5021 < HOST_BITS_PER_WIDE_INT)))
5022 hi = -1;
5024 t = build_int_cst_wide (type, INTVAL (x), hi);
5026 return t;
5029 case CONST_DOUBLE:
5030 if (GET_MODE (x) == VOIDmode)
5031 t = build_int_cst_wide (type,
5032 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5033 else
5035 REAL_VALUE_TYPE d;
5037 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5038 t = build_real (type, d);
5041 return t;
5043 case CONST_VECTOR:
5045 int units = CONST_VECTOR_NUNITS (x);
5046 tree itype = TREE_TYPE (type);
5047 tree *elts;
5048 int i;
5050 /* Build a tree with vector elements. */
5051 elts = XALLOCAVEC (tree, units);
5052 for (i = units - 1; i >= 0; --i)
5054 rtx elt = CONST_VECTOR_ELT (x, i);
5055 elts[i] = make_tree (itype, elt);
5058 return build_vector (type, elts);
5061 case PLUS:
5062 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5063 make_tree (type, XEXP (x, 1)));
5065 case MINUS:
5066 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5067 make_tree (type, XEXP (x, 1)));
5069 case NEG:
5070 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5072 case MULT:
5073 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5074 make_tree (type, XEXP (x, 1)));
5076 case ASHIFT:
5077 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5078 make_tree (type, XEXP (x, 1)));
5080 case LSHIFTRT:
5081 t = unsigned_type_for (type);
5082 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5083 make_tree (t, XEXP (x, 0)),
5084 make_tree (type, XEXP (x, 1))));
5086 case ASHIFTRT:
5087 t = signed_type_for (type);
5088 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5089 make_tree (t, XEXP (x, 0)),
5090 make_tree (type, XEXP (x, 1))));
5092 case DIV:
5093 if (TREE_CODE (type) != REAL_TYPE)
5094 t = signed_type_for (type);
5095 else
5096 t = type;
5098 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5099 make_tree (t, XEXP (x, 0)),
5100 make_tree (t, XEXP (x, 1))));
5101 case UDIV:
5102 t = unsigned_type_for (type);
5103 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5104 make_tree (t, XEXP (x, 0)),
5105 make_tree (t, XEXP (x, 1))));
5107 case SIGN_EXTEND:
5108 case ZERO_EXTEND:
5109 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5110 GET_CODE (x) == ZERO_EXTEND);
5111 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5113 case CONST:
5114 return make_tree (type, XEXP (x, 0));
5116 case SYMBOL_REF:
5117 t = SYMBOL_REF_DECL (x);
5118 if (t)
5119 return fold_convert (type, build_fold_addr_expr (t));
5120 /* else fall through. */
5122 default:
5123 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5125 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5126 address mode to pointer mode. */
5127 if (POINTER_TYPE_P (type))
5128 x = convert_memory_address_addr_space
5129 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5131 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5132 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5133 t->decl_with_rtl.rtl = x;
5135 return t;
5139 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5140 and returning TARGET.
5142 If TARGET is 0, a pseudo-register or constant is returned. */
5145 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5147 rtx tem = 0;
5149 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5150 tem = simplify_binary_operation (AND, mode, op0, op1);
5151 if (tem == 0)
5152 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5154 if (target == 0)
5155 target = tem;
5156 else if (tem != target)
5157 emit_move_insn (target, tem);
5158 return target;
5161 /* Helper function for emit_store_flag. */
5162 static rtx
5163 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5164 enum machine_mode mode, enum machine_mode compare_mode,
5165 int unsignedp, rtx x, rtx y, int normalizep,
5166 enum machine_mode target_mode)
5168 struct expand_operand ops[4];
5169 rtx op0, last, comparison, subtarget;
5170 enum machine_mode result_mode = targetm.cstore_mode (icode);
5172 last = get_last_insn ();
5173 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5174 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5175 if (!x || !y)
5177 delete_insns_since (last);
5178 return NULL_RTX;
5181 if (target_mode == VOIDmode)
5182 target_mode = result_mode;
5183 if (!target)
5184 target = gen_reg_rtx (target_mode);
5186 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5188 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5189 create_fixed_operand (&ops[1], comparison);
5190 create_fixed_operand (&ops[2], x);
5191 create_fixed_operand (&ops[3], y);
5192 if (!maybe_expand_insn (icode, 4, ops))
5194 delete_insns_since (last);
5195 return NULL_RTX;
5197 subtarget = ops[0].value;
5199 /* If we are converting to a wider mode, first convert to
5200 TARGET_MODE, then normalize. This produces better combining
5201 opportunities on machines that have a SIGN_EXTRACT when we are
5202 testing a single bit. This mostly benefits the 68k.
5204 If STORE_FLAG_VALUE does not have the sign bit set when
5205 interpreted in MODE, we can do this conversion as unsigned, which
5206 is usually more efficient. */
5207 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5209 convert_move (target, subtarget,
5210 val_signbit_known_clear_p (result_mode,
5211 STORE_FLAG_VALUE));
5212 op0 = target;
5213 result_mode = target_mode;
5215 else
5216 op0 = subtarget;
5218 /* If we want to keep subexpressions around, don't reuse our last
5219 target. */
5220 if (optimize)
5221 subtarget = 0;
5223 /* Now normalize to the proper value in MODE. Sometimes we don't
5224 have to do anything. */
5225 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5227 /* STORE_FLAG_VALUE might be the most negative number, so write
5228 the comparison this way to avoid a compiler-time warning. */
5229 else if (- normalizep == STORE_FLAG_VALUE)
5230 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5232 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5233 it hard to use a value of just the sign bit due to ANSI integer
5234 constant typing rules. */
5235 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5236 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5237 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5238 normalizep == 1);
5239 else
5241 gcc_assert (STORE_FLAG_VALUE & 1);
5243 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5244 if (normalizep == -1)
5245 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5248 /* If we were converting to a smaller mode, do the conversion now. */
5249 if (target_mode != result_mode)
5251 convert_move (target, op0, 0);
5252 return target;
5254 else
5255 return op0;
5259 /* A subroutine of emit_store_flag only including "tricks" that do not
5260 need a recursive call. These are kept separate to avoid infinite
5261 loops. */
5263 static rtx
5264 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5265 enum machine_mode mode, int unsignedp, int normalizep,
5266 enum machine_mode target_mode)
5268 rtx subtarget;
5269 enum insn_code icode;
5270 enum machine_mode compare_mode;
5271 enum mode_class mclass;
5272 enum rtx_code scode;
5273 rtx tem;
5275 if (unsignedp)
5276 code = unsigned_condition (code);
5277 scode = swap_condition (code);
5279 /* If one operand is constant, make it the second one. Only do this
5280 if the other operand is not constant as well. */
5282 if (swap_commutative_operands_p (op0, op1))
5284 tem = op0;
5285 op0 = op1;
5286 op1 = tem;
5287 code = swap_condition (code);
5290 if (mode == VOIDmode)
5291 mode = GET_MODE (op0);
5293 /* For some comparisons with 1 and -1, we can convert this to
5294 comparisons with zero. This will often produce more opportunities for
5295 store-flag insns. */
5297 switch (code)
5299 case LT:
5300 if (op1 == const1_rtx)
5301 op1 = const0_rtx, code = LE;
5302 break;
5303 case LE:
5304 if (op1 == constm1_rtx)
5305 op1 = const0_rtx, code = LT;
5306 break;
5307 case GE:
5308 if (op1 == const1_rtx)
5309 op1 = const0_rtx, code = GT;
5310 break;
5311 case GT:
5312 if (op1 == constm1_rtx)
5313 op1 = const0_rtx, code = GE;
5314 break;
5315 case GEU:
5316 if (op1 == const1_rtx)
5317 op1 = const0_rtx, code = NE;
5318 break;
5319 case LTU:
5320 if (op1 == const1_rtx)
5321 op1 = const0_rtx, code = EQ;
5322 break;
5323 default:
5324 break;
5327 /* If we are comparing a double-word integer with zero or -1, we can
5328 convert the comparison into one involving a single word. */
5329 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5330 && GET_MODE_CLASS (mode) == MODE_INT
5331 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5333 if ((code == EQ || code == NE)
5334 && (op1 == const0_rtx || op1 == constm1_rtx))
5336 rtx op00, op01;
5338 /* Do a logical OR or AND of the two words and compare the
5339 result. */
5340 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5341 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5342 tem = expand_binop (word_mode,
5343 op1 == const0_rtx ? ior_optab : and_optab,
5344 op00, op01, NULL_RTX, unsignedp,
5345 OPTAB_DIRECT);
5347 if (tem != 0)
5348 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5349 unsignedp, normalizep);
5351 else if ((code == LT || code == GE) && op1 == const0_rtx)
5353 rtx op0h;
5355 /* If testing the sign bit, can just test on high word. */
5356 op0h = simplify_gen_subreg (word_mode, op0, mode,
5357 subreg_highpart_offset (word_mode,
5358 mode));
5359 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5360 unsignedp, normalizep);
5362 else
5363 tem = NULL_RTX;
5365 if (tem)
5367 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5368 return tem;
5369 if (!target)
5370 target = gen_reg_rtx (target_mode);
5372 convert_move (target, tem,
5373 !val_signbit_known_set_p (word_mode,
5374 (normalizep ? normalizep
5375 : STORE_FLAG_VALUE)));
5376 return target;
5380 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5381 complement of A (for GE) and shifting the sign bit to the low bit. */
5382 if (op1 == const0_rtx && (code == LT || code == GE)
5383 && GET_MODE_CLASS (mode) == MODE_INT
5384 && (normalizep || STORE_FLAG_VALUE == 1
5385 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5387 subtarget = target;
5389 if (!target)
5390 target_mode = mode;
5392 /* If the result is to be wider than OP0, it is best to convert it
5393 first. If it is to be narrower, it is *incorrect* to convert it
5394 first. */
5395 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5397 op0 = convert_modes (target_mode, mode, op0, 0);
5398 mode = target_mode;
5401 if (target_mode != mode)
5402 subtarget = 0;
5404 if (code == GE)
5405 op0 = expand_unop (mode, one_cmpl_optab, op0,
5406 ((STORE_FLAG_VALUE == 1 || normalizep)
5407 ? 0 : subtarget), 0);
5409 if (STORE_FLAG_VALUE == 1 || normalizep)
5410 /* If we are supposed to produce a 0/1 value, we want to do
5411 a logical shift from the sign bit to the low-order bit; for
5412 a -1/0 value, we do an arithmetic shift. */
5413 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5414 GET_MODE_BITSIZE (mode) - 1,
5415 subtarget, normalizep != -1);
5417 if (mode != target_mode)
5418 op0 = convert_modes (target_mode, mode, op0, 0);
5420 return op0;
5423 mclass = GET_MODE_CLASS (mode);
5424 for (compare_mode = mode; compare_mode != VOIDmode;
5425 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5427 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5428 icode = optab_handler (cstore_optab, optab_mode);
5429 if (icode != CODE_FOR_nothing)
5431 do_pending_stack_adjust ();
5432 tem = emit_cstore (target, icode, code, mode, compare_mode,
5433 unsignedp, op0, op1, normalizep, target_mode);
5434 if (tem)
5435 return tem;
5437 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5439 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5440 unsignedp, op1, op0, normalizep, target_mode);
5441 if (tem)
5442 return tem;
5444 break;
5448 return 0;
5451 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5452 and storing in TARGET. Normally return TARGET.
5453 Return 0 if that cannot be done.
5455 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5456 it is VOIDmode, they cannot both be CONST_INT.
5458 UNSIGNEDP is for the case where we have to widen the operands
5459 to perform the operation. It says to use zero-extension.
5461 NORMALIZEP is 1 if we should convert the result to be either zero
5462 or one. Normalize is -1 if we should convert the result to be
5463 either zero or -1. If NORMALIZEP is zero, the result will be left
5464 "raw" out of the scc insn. */
5467 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5468 enum machine_mode mode, int unsignedp, int normalizep)
5470 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5471 enum rtx_code rcode;
5472 rtx subtarget;
5473 rtx tem, last, trueval;
5475 /* If we compare constants, we shouldn't use a store-flag operation,
5476 but a constant load. We can get there via the vanilla route that
5477 usually generates a compare-branch sequence, but will in this case
5478 fold the comparison to a constant, and thus elide the branch. */
5479 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5480 return NULL_RTX;
5482 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5483 target_mode);
5484 if (tem)
5485 return tem;
5487 /* If we reached here, we can't do this with a scc insn, however there
5488 are some comparisons that can be done in other ways. Don't do any
5489 of these cases if branches are very cheap. */
5490 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5491 return 0;
5493 /* See what we need to return. We can only return a 1, -1, or the
5494 sign bit. */
5496 if (normalizep == 0)
5498 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5499 normalizep = STORE_FLAG_VALUE;
5501 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5503 else
5504 return 0;
5507 last = get_last_insn ();
5509 /* If optimizing, use different pseudo registers for each insn, instead
5510 of reusing the same pseudo. This leads to better CSE, but slows
5511 down the compiler, since there are more pseudos */
5512 subtarget = (!optimize
5513 && (target_mode == mode)) ? target : NULL_RTX;
5514 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5516 /* For floating-point comparisons, try the reverse comparison or try
5517 changing the "orderedness" of the comparison. */
5518 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5520 enum rtx_code first_code;
5521 bool and_them;
5523 rcode = reverse_condition_maybe_unordered (code);
5524 if (can_compare_p (rcode, mode, ccp_store_flag)
5525 && (code == ORDERED || code == UNORDERED
5526 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5527 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5529 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5530 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5532 /* For the reverse comparison, use either an addition or a XOR. */
5533 if (want_add
5534 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5535 optimize_insn_for_speed_p ()) == 0)
5537 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5538 STORE_FLAG_VALUE, target_mode);
5539 if (tem)
5540 return expand_binop (target_mode, add_optab, tem,
5541 gen_int_mode (normalizep, target_mode),
5542 target, 0, OPTAB_WIDEN);
5544 else if (!want_add
5545 && rtx_cost (trueval, XOR, 1,
5546 optimize_insn_for_speed_p ()) == 0)
5548 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5549 normalizep, target_mode);
5550 if (tem)
5551 return expand_binop (target_mode, xor_optab, tem, trueval,
5552 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5556 delete_insns_since (last);
5558 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5559 if (code == ORDERED || code == UNORDERED)
5560 return 0;
5562 and_them = split_comparison (code, mode, &first_code, &code);
5564 /* If there are no NaNs, the first comparison should always fall through.
5565 Effectively change the comparison to the other one. */
5566 if (!HONOR_NANS (mode))
5568 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5569 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5570 target_mode);
5573 #ifdef HAVE_conditional_move
5574 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5575 conditional move. */
5576 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5577 normalizep, target_mode);
5578 if (tem == 0)
5579 return 0;
5581 if (and_them)
5582 tem = emit_conditional_move (target, code, op0, op1, mode,
5583 tem, const0_rtx, GET_MODE (tem), 0);
5584 else
5585 tem = emit_conditional_move (target, code, op0, op1, mode,
5586 trueval, tem, GET_MODE (tem), 0);
5588 if (tem == 0)
5589 delete_insns_since (last);
5590 return tem;
5591 #else
5592 return 0;
5593 #endif
5596 /* The remaining tricks only apply to integer comparisons. */
5598 if (GET_MODE_CLASS (mode) != MODE_INT)
5599 return 0;
5601 /* If this is an equality comparison of integers, we can try to exclusive-or
5602 (or subtract) the two operands and use a recursive call to try the
5603 comparison with zero. Don't do any of these cases if branches are
5604 very cheap. */
5606 if ((code == EQ || code == NE) && op1 != const0_rtx)
5608 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5609 OPTAB_WIDEN);
5611 if (tem == 0)
5612 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5613 OPTAB_WIDEN);
5614 if (tem != 0)
5615 tem = emit_store_flag (target, code, tem, const0_rtx,
5616 mode, unsignedp, normalizep);
5617 if (tem != 0)
5618 return tem;
5620 delete_insns_since (last);
5623 /* For integer comparisons, try the reverse comparison. However, for
5624 small X and if we'd have anyway to extend, implementing "X != 0"
5625 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5626 rcode = reverse_condition (code);
5627 if (can_compare_p (rcode, mode, ccp_store_flag)
5628 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5629 && code == NE
5630 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5631 && op1 == const0_rtx))
5633 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5634 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5636 /* Again, for the reverse comparison, use either an addition or a XOR. */
5637 if (want_add
5638 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5639 optimize_insn_for_speed_p ()) == 0)
5641 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5642 STORE_FLAG_VALUE, target_mode);
5643 if (tem != 0)
5644 tem = expand_binop (target_mode, add_optab, tem,
5645 gen_int_mode (normalizep, target_mode),
5646 target, 0, OPTAB_WIDEN);
5648 else if (!want_add
5649 && rtx_cost (trueval, XOR, 1,
5650 optimize_insn_for_speed_p ()) == 0)
5652 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5653 normalizep, target_mode);
5654 if (tem != 0)
5655 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5656 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5659 if (tem != 0)
5660 return tem;
5661 delete_insns_since (last);
5664 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5665 the constant zero. Reject all other comparisons at this point. Only
5666 do LE and GT if branches are expensive since they are expensive on
5667 2-operand machines. */
5669 if (op1 != const0_rtx
5670 || (code != EQ && code != NE
5671 && (BRANCH_COST (optimize_insn_for_speed_p (),
5672 false) <= 1 || (code != LE && code != GT))))
5673 return 0;
5675 /* Try to put the result of the comparison in the sign bit. Assume we can't
5676 do the necessary operation below. */
5678 tem = 0;
5680 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5681 the sign bit set. */
5683 if (code == LE)
5685 /* This is destructive, so SUBTARGET can't be OP0. */
5686 if (rtx_equal_p (subtarget, op0))
5687 subtarget = 0;
5689 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5690 OPTAB_WIDEN);
5691 if (tem)
5692 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5693 OPTAB_WIDEN);
5696 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5697 number of bits in the mode of OP0, minus one. */
5699 if (code == GT)
5701 if (rtx_equal_p (subtarget, op0))
5702 subtarget = 0;
5704 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5705 GET_MODE_BITSIZE (mode) - 1,
5706 subtarget, 0);
5707 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5708 OPTAB_WIDEN);
5711 if (code == EQ || code == NE)
5713 /* For EQ or NE, one way to do the comparison is to apply an operation
5714 that converts the operand into a positive number if it is nonzero
5715 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5716 for NE we negate. This puts the result in the sign bit. Then we
5717 normalize with a shift, if needed.
5719 Two operations that can do the above actions are ABS and FFS, so try
5720 them. If that doesn't work, and MODE is smaller than a full word,
5721 we can use zero-extension to the wider mode (an unsigned conversion)
5722 as the operation. */
5724 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5725 that is compensated by the subsequent overflow when subtracting
5726 one / negating. */
5728 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5729 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5730 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5731 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5732 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5734 tem = convert_modes (word_mode, mode, op0, 1);
5735 mode = word_mode;
5738 if (tem != 0)
5740 if (code == EQ)
5741 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5742 0, OPTAB_WIDEN);
5743 else
5744 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5747 /* If we couldn't do it that way, for NE we can "or" the two's complement
5748 of the value with itself. For EQ, we take the one's complement of
5749 that "or", which is an extra insn, so we only handle EQ if branches
5750 are expensive. */
5752 if (tem == 0
5753 && (code == NE
5754 || BRANCH_COST (optimize_insn_for_speed_p (),
5755 false) > 1))
5757 if (rtx_equal_p (subtarget, op0))
5758 subtarget = 0;
5760 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5761 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5762 OPTAB_WIDEN);
5764 if (tem && code == EQ)
5765 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5769 if (tem && normalizep)
5770 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5771 GET_MODE_BITSIZE (mode) - 1,
5772 subtarget, normalizep == 1);
5774 if (tem)
5776 if (!target)
5778 else if (GET_MODE (tem) != target_mode)
5780 convert_move (target, tem, 0);
5781 tem = target;
5783 else if (!subtarget)
5785 emit_move_insn (target, tem);
5786 tem = target;
5789 else
5790 delete_insns_since (last);
5792 return tem;
5795 /* Like emit_store_flag, but always succeeds. */
5798 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5799 enum machine_mode mode, int unsignedp, int normalizep)
5801 rtx tem, label;
5802 rtx trueval, falseval;
5804 /* First see if emit_store_flag can do the job. */
5805 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5806 if (tem != 0)
5807 return tem;
5809 if (!target)
5810 target = gen_reg_rtx (word_mode);
5812 /* If this failed, we have to do this with set/compare/jump/set code.
5813 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5814 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5815 if (code == NE
5816 && GET_MODE_CLASS (mode) == MODE_INT
5817 && REG_P (target)
5818 && op0 == target
5819 && op1 == const0_rtx)
5821 label = gen_label_rtx ();
5822 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5823 mode, NULL_RTX, NULL_RTX, label, -1);
5824 emit_move_insn (target, trueval);
5825 emit_label (label);
5826 return target;
5829 if (!REG_P (target)
5830 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5831 target = gen_reg_rtx (GET_MODE (target));
5833 /* Jump in the right direction if the target cannot implement CODE
5834 but can jump on its reverse condition. */
5835 falseval = const0_rtx;
5836 if (! can_compare_p (code, mode, ccp_jump)
5837 && (! FLOAT_MODE_P (mode)
5838 || code == ORDERED || code == UNORDERED
5839 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5840 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5842 enum rtx_code rcode;
5843 if (FLOAT_MODE_P (mode))
5844 rcode = reverse_condition_maybe_unordered (code);
5845 else
5846 rcode = reverse_condition (code);
5848 /* Canonicalize to UNORDERED for the libcall. */
5849 if (can_compare_p (rcode, mode, ccp_jump)
5850 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5852 falseval = trueval;
5853 trueval = const0_rtx;
5854 code = rcode;
5858 emit_move_insn (target, trueval);
5859 label = gen_label_rtx ();
5860 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5861 NULL_RTX, label, -1);
5863 emit_move_insn (target, falseval);
5864 emit_label (label);
5866 return target;
5869 /* Perform possibly multi-word comparison and conditional jump to LABEL
5870 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5871 now a thin wrapper around do_compare_rtx_and_jump. */
5873 static void
5874 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5875 rtx label)
5877 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5878 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5879 NULL_RTX, NULL_RTX, label, -1);