Merge from mainline (160224:163495).
[official-gcc/graphite-test-results.git] / gcc / implicit-zee.c
bloba96dadd3035cf8418d9c489fb6cace79a3f37bb8
1 /* Redundant Zero-extension elimination for targets that implicitly
2 zero-extend writes to the lower 32-bit portion of 64-bit registers.
3 Copyright (C) 2010 Free Software Foundation, Inc.
4 Contributed by Sriraman Tallam (tmsriram@google.com) and
5 Silvius Rus (rus@google.com)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
24 /* Problem Description :
25 --------------------
26 This pass is intended to be applicable only to targets that implicitly
27 zero-extend 64-bit registers after writing to their lower 32-bit half.
28 For instance, x86_64 zero-extends the upper bits of a register
29 implicitly whenever an instruction writes to its lower 32-bit half.
30 For example, the instruction *add edi,eax* also zero-extends the upper
31 32-bits of rax after doing the addition. These zero extensions come
32 for free and GCC does not always exploit this well. That is, it has
33 been observed that there are plenty of cases where GCC explicitly
34 zero-extends registers for x86_64 that are actually useless because
35 these registers were already implicitly zero-extended in a prior
36 instruction. This pass tries to eliminate such useless zero extension
37 instructions.
39 How does this pass work ?
40 --------------------------
42 This pass is run after register allocation. Hence, all registers that
43 this pass deals with are hard registers. This pass first looks for a
44 zero-extension instruction that could possibly be redundant. Such zero
45 extension instructions show up in RTL with the pattern :
46 (set (reg:DI x) (zero_extend:DI (reg:SI x))).
47 where x can be any one of the 64-bit hard registers.
48 Now, this pass tries to eliminate this instruction by merging the
49 zero-extension with the definitions of register x. For instance, if
50 one of the definitions of register x was :
51 (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
52 then the combination converts this into :
53 (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
54 If all the merged definitions are recognizable assembly instructions,
55 the zero-extension is effectively eliminated. For example, in x86_64,
56 implicit zero-extensions are captured with appropriate patterns in the
57 i386.md file. Hence, these merged definition can be matched to a single
58 assembly instruction. The original zero-extension instruction is then
59 deleted if all the definitions can be merged.
61 However, there are cases where the definition instruction cannot be
62 merged with a zero-extend. Examples are CALL instructions. In such
63 cases, the original zero extension is not redundant and this pass does
64 not delete it.
66 Handling conditional moves :
67 ----------------------------
69 Architectures like x86_64 support conditional moves whose semantics for
70 zero-extension differ from the other instructions. For instance, the
71 instruction *cmov ebx, eax*
72 zero-extends eax onto rax only when the move from ebx to eax happens.
73 Otherwise, eax may not be zero-extended. Conditional moves appear as
74 RTL instructions of the form
75 (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
76 This pass tries to merge a zero-extension with a conditional move by
77 actually merging the defintions of y and z with a zero-extend and then
78 converting the conditional move into :
79 (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
80 Since registers y and z are zero-extended, register x will also be
81 zero-extended after the conditional move. Note that this step has to
82 be done transitively since the definition of a conditional copy can be
83 another conditional copy.
85 Motivating Example I :
86 ---------------------
87 For this program :
88 **********************************************
89 bad_code.c
91 int mask[1000];
93 int foo(unsigned x)
95 if (x < 10)
96 x = x * 45;
97 else
98 x = x * 78;
99 return mask[x];
101 **********************************************
103 $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination)
104 ........
105 400315: b8 4e 00 00 00 mov $0x4e,%eax
106 40031a: 0f af f8 imul %eax,%edi
107 40031d: 89 ff mov %edi,%edi --> Useless extend
108 40031f: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
109 400326: c3 retq
110 ......
111 400330: ba 2d 00 00 00 mov $0x2d,%edx
112 400335: 0f af fa imul %edx,%edi
113 400338: 89 ff mov %edi,%edi --> Useless extend
114 40033a: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
115 400341: c3 retq
117 $ gcc -O2 -fzee bad_code.c
118 ......
119 400315: 6b ff 4e imul $0x4e,%edi,%edi
120 400318: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
121 40031f: c3 retq
122 400320: 6b ff 2d imul $0x2d,%edi,%edi
123 400323: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
124 40032a: c3 retq
126 Motivating Example II :
127 ---------------------
129 Here is an example with a conditional move.
131 For this program :
132 **********************************************
134 unsigned long long foo(unsigned x , unsigned y)
136 unsigned z;
137 if (x > 100)
138 z = x + y;
139 else
140 z = x - y;
141 return (unsigned long long)(z);
144 $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination)
145 ............
146 400360: 8d 14 3e lea (%rsi,%rdi,1),%edx
147 400363: 89 f8 mov %edi,%eax
148 400365: 29 f0 sub %esi,%eax
149 400367: 83 ff 65 cmp $0x65,%edi
150 40036a: 0f 43 c2 cmovae %edx,%eax
151 40036d: 89 c0 mov %eax,%eax --> Useless extend
152 40036f: c3 retq
154 $ gcc -O2 -fzee bad_code.c
155 .............
156 400360: 89 fa mov %edi,%edx
157 400362: 8d 04 3e lea (%rsi,%rdi,1),%eax
158 400365: 29 f2 sub %esi,%edx
159 400367: 83 ff 65 cmp $0x65,%edi
160 40036a: 89 d6 mov %edx,%esi
161 40036c: 48 0f 42 c6 cmovb %rsi,%rax
162 400370: c3 retq
165 Usefulness :
166 ----------
168 This pass reduces the dynamic instruction count of a compression benchmark
169 by 2.8% and improves its run time by about 1%. The compression benchmark
170 had the following code sequence in a very hot region of code before ZEE
171 optimized it :
173 shr $0x5, %edx
174 mov %edx, %edx --> Useless zero-extend */
177 #include "config.h"
178 #include "system.h"
179 #include "coretypes.h"
180 #include "tm.h"
181 #include "rtl.h"
182 #include "tree.h"
183 #include "tm_p.h"
184 #include "flags.h"
185 #include "regs.h"
186 #include "hard-reg-set.h"
187 #include "basic-block.h"
188 #include "insn-config.h"
189 #include "function.h"
190 #include "expr.h"
191 #include "insn-attr.h"
192 #include "recog.h"
193 #include "diagnostic-core.h"
194 #include "toplev.h"
195 #include "target.h"
196 #include "timevar.h"
197 #include "optabs.h"
198 #include "insn-codes.h"
199 #include "rtlhooks-def.h"
200 /* Include output.h for dump_file. */
201 #include "output.h"
202 #include "params.h"
203 #include "timevar.h"
204 #include "tree-pass.h"
205 #include "df.h"
206 #include "cgraph.h"
208 /* This says if a register is newly created for the purpose of
209 zero-extension. */
211 enum insn_merge_code
213 MERGE_NOT_ATTEMPTED = 0,
214 MERGE_SUCCESS
217 /* This says if a INSN UID or its definition has already been merged
218 with a zero-extend or not. */
220 static enum insn_merge_code *is_insn_merge_attempted;
221 static int max_insn_uid;
223 /* Returns the merge code status for INSN. */
225 static enum insn_merge_code
226 get_insn_status (rtx insn)
228 gcc_assert (INSN_UID (insn) < max_insn_uid);
229 return is_insn_merge_attempted[INSN_UID (insn)];
232 /* Sets the merge code status of INSN to CODE. */
234 static void
235 set_insn_status (rtx insn, enum insn_merge_code code)
237 gcc_assert (INSN_UID (insn) < max_insn_uid);
238 is_insn_merge_attempted[INSN_UID (insn)] = code;
241 /* Given a insn (CURR_INSN) and a pointer to the SET rtx (ORIG_SET)
242 that needs to be modified, this code modifies the SET rtx to a
243 new SET rtx that zero_extends the right hand expression into a DImode
244 register (NEWREG) on the left hand side. Note that multiple
245 assumptions are made about the nature of the set that needs
246 to be true for this to work and is called from merge_def_and_ze.
248 Original :
249 (set (reg:SI a) (expression))
251 Transform :
252 (set (reg:DI a) (zero_extend (expression)))
254 Special Cases :
255 If the expression is a constant or another zero_extend directly
256 assign it to the DI mode register. */
258 static bool
259 combine_set_zero_extend (rtx curr_insn, rtx *orig_set, rtx newreg)
261 rtx temp_extension, simplified_temp_extension, new_set, new_const_int;
262 rtx orig_src;
263 HOST_WIDE_INT val;
264 unsigned int mask, delta_width;
266 /* Change the SET rtx and validate it. */
267 orig_src = SET_SRC (*orig_set);
268 new_set = NULL_RTX;
270 /* The right hand side can also be VOIDmode. These cases have to be
271 handled differently. */
273 if (GET_MODE (orig_src) != SImode)
275 /* Merge constants by directly moving the constant into the
276 DImode register under some conditions. */
278 if (GET_CODE (orig_src) == CONST_INT
279 && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (SImode))
281 if (INTVAL (orig_src) >= 0)
282 new_set = gen_rtx_SET (VOIDmode, newreg, orig_src);
283 else if (INTVAL (orig_src) < 0)
285 /* Zero-extending a negative SImode integer into DImode
286 makes it a positive integer. Convert the given negative
287 integer into the appropriate integer when zero-extended. */
289 delta_width = HOST_BITS_PER_WIDE_INT - GET_MODE_BITSIZE (SImode);
290 mask = (~(unsigned HOST_WIDE_INT) 0) >> delta_width;
291 val = INTVAL (orig_src);
292 val = val & mask;
293 new_const_int = gen_rtx_CONST_INT (VOIDmode, val);
294 new_set = gen_rtx_SET (VOIDmode, newreg, new_const_int);
296 else
297 return false;
299 else
301 /* This is mostly due to a call insn that should not be
302 optimized. */
304 return false;
307 else if (GET_CODE (orig_src) == ZERO_EXTEND)
309 /* Here a zero-extend is used to get to SI. Why not make it
310 all the way till DI. */
312 temp_extension = gen_rtx_ZERO_EXTEND (DImode, XEXP (orig_src, 0));
313 simplified_temp_extension = simplify_rtx (temp_extension);
314 if (simplified_temp_extension)
315 temp_extension = simplified_temp_extension;
316 new_set = gen_rtx_SET (VOIDmode, newreg, temp_extension);
318 else if (GET_CODE (orig_src) == IF_THEN_ELSE)
320 /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise,
321 in general, IF_THEN_ELSE should not be combined. */
323 return false;
325 else
327 /* This is the normal case we expect. */
329 temp_extension = gen_rtx_ZERO_EXTEND (DImode, orig_src);
330 simplified_temp_extension = simplify_rtx (temp_extension);
331 if (simplified_temp_extension)
332 temp_extension = simplified_temp_extension;
333 new_set = gen_rtx_SET (VOIDmode, newreg, temp_extension);
336 gcc_assert (new_set != NULL_RTX);
338 /* This change is a part of a group of changes. Hence,
339 validate_change will not try to commit the change. */
341 if (validate_change (curr_insn, orig_set, new_set, true))
343 if (dump_file)
345 fprintf (dump_file, "Merged Instruction with ZERO_EXTEND:\n");
346 print_rtl_single (dump_file, curr_insn);
348 return true;
350 return false;
353 /* This returns the DI mode for the SI register REG_SI. */
355 static rtx
356 get_reg_di (rtx reg_si)
358 rtx newreg;
360 newreg = gen_rtx_REG (DImode, REGNO (reg_si));
361 gcc_assert (newreg);
362 return newreg;
365 /* Treat if_then_else insns, where the operands of both branches
366 are registers, as copies. For instance,
367 Original :
368 (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
369 Transformed :
370 (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
371 DEF_INSN is the if_then_else insn. */
373 static bool
374 transform_ifelse (rtx def_insn)
376 rtx set_insn = PATTERN (def_insn);
377 rtx srcreg, dstreg, srcreg2;
378 rtx map_srcreg, map_dstreg, map_srcreg2;
379 rtx ifexpr;
380 rtx cond;
381 rtx new_set;
383 gcc_assert (GET_CODE (set_insn) == SET);
384 cond = XEXP (SET_SRC (set_insn), 0);
385 dstreg = SET_DEST (set_insn);
386 srcreg = XEXP (SET_SRC (set_insn), 1);
387 srcreg2 = XEXP (SET_SRC (set_insn), 2);
388 map_srcreg = get_reg_di (srcreg);
389 map_srcreg2 = get_reg_di (srcreg2);
390 map_dstreg = get_reg_di (dstreg);
391 ifexpr = gen_rtx_IF_THEN_ELSE (DImode, cond, map_srcreg, map_srcreg2);
392 new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr);
394 if (validate_change (def_insn, &PATTERN (def_insn), new_set, true))
396 if (dump_file)
398 fprintf (dump_file, "Cond_Move Instruction's mode extended :\n");
399 print_rtl_single (dump_file, def_insn);
401 return true;
403 else
404 return false;
407 /* Function to get all the immediate definitions of an instruction.
408 The reaching definitions are desired for WHICH_REG used in
409 CURR_INSN. This function returns 0 if there was an error getting
410 a definition. Upon success, this function returns the number of
411 definitions and stores the definitions in DEST. */
413 static int
414 get_defs (rtx curr_insn, rtx which_reg, VEC (rtx,heap) **dest)
416 df_ref reg_info, *defs;
417 struct df_link *def_chain;
418 int n_refs = 0;
420 defs = DF_INSN_USES (curr_insn);
421 reg_info = NULL;
423 while (*defs)
425 reg_info = *defs;
426 if (GET_CODE (DF_REF_REG (reg_info)) == SUBREG)
427 return 0;
428 if (REGNO (DF_REF_REG (reg_info)) == REGNO (which_reg))
429 break;
430 defs++;
433 gcc_assert (reg_info != NULL && defs != NULL);
434 def_chain = DF_REF_CHAIN (reg_info);
436 while (def_chain)
438 /* Problem getting some definition for this instruction. */
440 if (def_chain->ref == NULL)
441 return 0;
442 if (DF_REF_INSN_INFO (def_chain->ref) == NULL)
443 return 0;
444 def_chain = def_chain->next;
447 def_chain = DF_REF_CHAIN (reg_info);
449 if (dest == NULL)
450 return 1;
452 while (def_chain)
454 VEC_safe_push (rtx, heap, *dest, DF_REF_INSN (def_chain->ref));
455 def_chain = def_chain->next;
456 n_refs++;
458 return n_refs;
461 /* rtx function to check if this SET insn, EXPR, is a conditional copy insn :
462 (set (reg:SI a ) (IF_THEN_ELSE (cond) (reg:SI b) (reg:SI c)))
463 Called from is_insn_cond_copy. DATA stores the two registers on each
464 side of the condition. */
466 static int
467 is_this_a_cmove (rtx expr, void *data)
469 /* Check for conditional (if-then-else) copy. */
471 if (GET_CODE (expr) == SET
472 && GET_CODE (SET_DEST (expr)) == REG
473 && GET_MODE (SET_DEST (expr)) == SImode
474 && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE
475 && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
476 && GET_MODE (XEXP (SET_SRC (expr), 1)) == SImode
477 && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG
478 && GET_MODE (XEXP (SET_SRC (expr), 2)) == SImode)
480 ((rtx *)data)[0] = XEXP (SET_SRC (expr), 1);
481 ((rtx *)data)[1] = XEXP (SET_SRC (expr), 2);
482 return 1;
484 return 0;
487 /* This returns 1 if it found
488 (SET (reg:SI REGNO (def_reg)) (if_then_else (cond) (REG:SI x1) (REG:SI x2)))
489 in the DEF_INSN pattern. It stores the x1 and x2 in COPY_REG_1
490 and COPY_REG_2. */
492 static int
493 is_insn_cond_copy (rtx def_insn, rtx *copy_reg_1, rtx *copy_reg_2)
495 int type;
496 rtx set_expr;
497 rtx srcreg[2];
499 srcreg[0] = NULL_RTX;
500 srcreg[1] = NULL_RTX;
502 set_expr = single_set (def_insn);
504 if (set_expr == NULL_RTX)
505 return 0;
507 type = is_this_a_cmove (set_expr, (void *) srcreg);
509 if (type)
511 *copy_reg_1 = srcreg[0];
512 *copy_reg_2 = srcreg[1];
513 return type;
516 return 0;
519 /* Reaching Definitions of the zero-extended register could be conditional
520 copies or regular definitions. This function separates the two types into
521 two lists, DEFS_LIST and COPIES_LIST. This is necessary because, if a
522 reaching definition is a conditional copy, combining the zero_extend with
523 this definition is wrong. Conditional copies are merged by transitively
524 merging its definitions. The defs_list is populated with all the reaching
525 definitions of the zero-extension instruction (ZERO_EXTEND_INSN) which must
526 be merged with a zero_extend. The copies_list contains all the conditional
527 moves that will later be extended into a DI mode conditonal move if all the
528 merges are successful. The function returns false when there is a failure
529 in getting some definitions, like that of parameters. It returns 1 upon
530 success, 0 upon failure and 2 when all definitions of the ZERO_EXTEND_INSN
531 were merged previously. */
533 static int
534 make_defs_and_copies_lists (rtx zero_extend_insn, rtx set_pat,
535 VEC (rtx,heap) **defs_list,
536 VEC (rtx,heap) **copies_list)
538 bool *is_insn_visited;
539 VEC (rtx,heap) *work_list;
540 rtx srcreg, copy_reg_1, copy_reg_2;
541 rtx def_insn;
542 int n_defs = 0;
543 int vec_index = 0;
544 int n_worklist = 0;
545 int i, is_copy;
547 srcreg = XEXP (SET_SRC (set_pat), 0);
548 work_list = VEC_alloc (rtx, heap, 8);
550 /* Initialize the Work List */
551 n_worklist = get_defs (zero_extend_insn, srcreg, &work_list);
553 if (n_worklist == 0)
555 VEC_free (rtx, heap, work_list);
556 /* The number of defs being equal to zero can only imply that all of its
557 definitions have been previously merged. */
558 return 2;
561 is_insn_visited = XNEWVEC (bool, max_insn_uid);
563 for (i = 0; i < max_insn_uid; i++)
564 is_insn_visited[i] = false;
567 /* Perform transitive closure for conditional copies. */
568 while (n_worklist > vec_index)
570 def_insn = VEC_index (rtx, work_list, vec_index);
571 gcc_assert (INSN_UID (def_insn) < max_insn_uid);
573 if (is_insn_visited[INSN_UID (def_insn)])
575 vec_index++;
576 continue;
579 is_insn_visited[INSN_UID (def_insn)] = true;
580 copy_reg_1 = copy_reg_2 = NULL_RTX;
581 is_copy = is_insn_cond_copy (def_insn, &copy_reg_1, &copy_reg_2);
582 if (is_copy)
584 gcc_assert (copy_reg_1 && copy_reg_2);
586 /* Push it into the copy list first. */
588 VEC_safe_push (rtx, heap, *copies_list, def_insn);
590 /* Perform transitive closure here */
592 n_defs = get_defs (def_insn, copy_reg_1, &work_list);
594 if (n_defs == 0)
596 VEC_free (rtx, heap, work_list);
597 XDELETEVEC (is_insn_visited);
598 return 0;
600 n_worklist += n_defs;
602 n_defs = get_defs (def_insn, copy_reg_2, &work_list);
603 if (n_defs == 0)
605 VEC_free (rtx, heap, work_list);
606 XDELETEVEC (is_insn_visited);
607 return 0;
609 n_worklist += n_defs;
611 else
613 VEC_safe_push (rtx, heap, *defs_list, def_insn);
615 vec_index++;
618 VEC_free (rtx, heap, work_list);
619 XDELETEVEC (is_insn_visited);
620 return 1;
623 /* Merge the DEF_INSN with a zero-extend. Calls combine_set_zero_extend
624 on the SET pattern. */
626 static bool
627 merge_def_and_ze (rtx def_insn)
629 enum rtx_code code;
630 rtx setreg;
631 rtx *sub_rtx;
632 rtx s_expr;
633 int i;
635 code = GET_CODE (PATTERN (def_insn));
636 sub_rtx = NULL;
638 if (code == PARALLEL)
640 for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
642 s_expr = XVECEXP (PATTERN (def_insn), 0, i);
643 if (GET_CODE (s_expr) != SET)
644 continue;
646 if (sub_rtx == NULL)
647 sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
648 else
650 /* PARALLEL with multiple SETs. */
651 return false;
655 else if (code == SET)
656 sub_rtx = &PATTERN (def_insn);
657 else
659 /* It is not a PARALLEL or a SET, what could it be ? */
660 return false;
663 gcc_assert (sub_rtx != NULL);
665 if (GET_CODE (SET_DEST (*sub_rtx)) == REG
666 && GET_MODE (SET_DEST (*sub_rtx)) == SImode)
668 setreg = get_reg_di (SET_DEST (*sub_rtx));
669 return combine_set_zero_extend (def_insn, sub_rtx, setreg);
671 else
672 return false;
673 return true;
676 /* This function goes through all reaching defs of the source
677 of the zero extension instruction (ZERO_EXTEND_INSN) and
678 tries to combine the zero extension with the definition
679 instruction. The changes are made as a group so that even
680 if one definition cannot be merged, all reaching definitions
681 end up not being merged. When a conditional copy is encountered,
682 merging is attempted transitively on its definitions. It returns
683 true upon success and false upon failure. */
685 static bool
686 combine_reaching_defs (rtx zero_extend_insn, rtx set_pat)
688 rtx def_insn;
689 bool merge_successful = true;
690 int i;
691 int defs_ix;
692 int outcome;
694 /* To store the definitions that have been merged. */
696 VEC (rtx, heap) *defs_list, *copies_list, *vec;
697 enum insn_merge_code merge_code;
699 defs_list = VEC_alloc (rtx, heap, 8);
700 copies_list = VEC_alloc (rtx, heap, 8);
702 outcome = make_defs_and_copies_lists (zero_extend_insn,
703 set_pat, &defs_list, &copies_list);
705 /* outcome == 2 implies that all the definitions for this zero_extend were
706 merged while previously when handling other zero_extends. */
708 if (outcome == 2)
710 VEC_free (rtx, heap, defs_list);
711 VEC_free (rtx, heap, copies_list);
712 if (dump_file)
713 fprintf (dump_file, "All definitions have been merged previously.\n");
714 return true;
717 if (outcome == 0)
719 VEC_free (rtx, heap, defs_list);
720 VEC_free (rtx, heap, copies_list);
721 return false;
724 merge_successful = true;
726 /* Go through the defs vector and try to merge all the definitions
727 in this vector. */
729 vec = VEC_alloc (rtx, heap, 8);
730 FOR_EACH_VEC_ELT (rtx, defs_list, defs_ix, def_insn)
732 merge_code = get_insn_status (def_insn);
733 gcc_assert (merge_code == MERGE_NOT_ATTEMPTED);
735 if (merge_def_and_ze (def_insn))
736 VEC_safe_push (rtx, heap, vec, def_insn);
737 else
739 merge_successful = false;
740 break;
744 /* Now go through the conditional copies vector and try to merge all
745 the copies in this vector. */
747 if (merge_successful)
749 FOR_EACH_VEC_ELT (rtx, copies_list, i, def_insn)
751 if (transform_ifelse (def_insn))
753 VEC_safe_push (rtx, heap, vec, def_insn);
755 else
757 merge_successful = false;
758 break;
763 if (merge_successful)
765 /* Commit the changes here if possible */
766 /* XXX : Now, it is an all or nothing scenario. Even if one definition
767 cannot be merged we totally bail. In future, allow zero-extensions to
768 be partially eliminated along those paths where the definitions could
769 be merged. */
771 if (apply_change_group ())
773 if (dump_file)
774 fprintf (dump_file, "All merges were successful ....\n");
776 FOR_EACH_VEC_ELT (rtx, vec, i, def_insn)
778 set_insn_status (def_insn, MERGE_SUCCESS);
781 VEC_free (rtx, heap, vec);
782 VEC_free (rtx, heap, defs_list);
783 VEC_free (rtx, heap, copies_list);
784 return true;
786 else
788 /* Changes need not be cancelled explicitly as apply_change_group
789 does it. Print list of definitions in the dump_file for debug
790 purposes. This zero-extension cannot be deleted. */
792 if (dump_file)
794 FOR_EACH_VEC_ELT (rtx, vec, i, def_insn)
796 fprintf (dump_file, " Ummergable definitions : \n");
797 print_rtl_single (dump_file, def_insn);
802 else
804 /* Cancel any changes that have been made so far. */
805 cancel_changes (0);
808 VEC_free (rtx, heap, vec);
809 VEC_free (rtx, heap, defs_list);
810 VEC_free (rtx, heap, copies_list);
811 return false;
814 /* Carry information about zero-extensions while walking the RTL. */
816 struct zero_extend_info
818 /* The insn where the zero-extension is. */
819 rtx insn;
821 /* The list of candidates. */
822 VEC (rtx, heap) *insn_list;
825 /* Add a zero-extend pattern that could be eliminated. This is called via
826 note_stores from find_removable_zero_extends. */
828 static void
829 add_removable_zero_extend (rtx x ATTRIBUTE_UNUSED, const_rtx expr, void *data)
831 struct zero_extend_info *zei = (struct zero_extend_info *)data;
832 rtx src, dest;
834 /* We are looking for SET (REG:DI N) (ZERO_EXTEND (REG:SI N)). */
835 if (GET_CODE (expr) != SET)
836 return;
838 src = SET_SRC (expr);
839 dest = SET_DEST (expr);
841 if (REG_P (dest)
842 && GET_MODE (dest) == DImode
843 && GET_CODE (src) == ZERO_EXTEND
844 && REG_P (XEXP (src, 0))
845 && GET_MODE (XEXP (src, 0)) == SImode
846 && REGNO (dest) == REGNO (XEXP (src, 0)))
848 if (get_defs (zei->insn, XEXP (src, 0), NULL))
849 VEC_safe_push (rtx, heap, zei->insn_list, zei->insn);
850 else if (dump_file)
852 fprintf (dump_file, "Cannot eliminate zero-extension: \n");
853 print_rtl_single (dump_file, zei->insn);
854 fprintf (dump_file, "No defs. Could be extending parameters.\n");
859 /* Traverse the instruction stream looking for zero-extends and return the
860 list of candidates. */
862 static VEC (rtx,heap)*
863 find_removable_zero_extends (void)
865 struct zero_extend_info zei;
866 basic_block bb;
867 rtx insn;
869 zei.insn_list = VEC_alloc (rtx, heap, 8);
871 FOR_EACH_BB (bb)
872 FOR_BB_INSNS (bb, insn)
874 if (!NONDEBUG_INSN_P (insn))
875 continue;
877 zei.insn = insn;
878 note_stores (PATTERN (insn), add_removable_zero_extend, &zei);
881 return zei.insn_list;
884 /* This is the main function that checks the insn stream for redundant
885 zero extensions and tries to remove them if possible. */
887 static unsigned int
888 find_and_remove_ze (void)
890 rtx curr_insn = NULL_RTX;
891 int i;
892 int ix;
893 long long num_realized = 0;
894 long long num_ze_opportunities = 0;
895 VEC (rtx, heap) *zeinsn_list;
896 VEC (rtx, heap) *zeinsn_del_list;
898 /* Construct DU chain to get all reaching definitions of each
899 zero-extension instruction. */
901 df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
902 df_analyze ();
904 max_insn_uid = get_max_uid ();
906 is_insn_merge_attempted
907 = XNEWVEC (enum insn_merge_code,
908 sizeof (enum insn_merge_code) * max_insn_uid);
910 for (i = 0; i < max_insn_uid; i++)
911 is_insn_merge_attempted[i] = MERGE_NOT_ATTEMPTED;
913 num_ze_opportunities = num_realized = 0;
915 zeinsn_del_list = VEC_alloc (rtx, heap, 4);
917 zeinsn_list = find_removable_zero_extends ();
919 FOR_EACH_VEC_ELT (rtx, zeinsn_list, ix, curr_insn)
921 num_ze_opportunities++;
922 /* Try to combine the zero-extends with the definition here. */
924 if (dump_file)
926 fprintf (dump_file, "Trying to eliminate zero extension : \n");
927 print_rtl_single (dump_file, curr_insn);
930 if (combine_reaching_defs (curr_insn, PATTERN (curr_insn)))
932 if (dump_file)
933 fprintf (dump_file, "Eliminated the zero extension...\n");
934 num_realized++;
935 VEC_safe_push (rtx, heap, zeinsn_del_list, curr_insn);
939 /* Delete all useless zero extensions here in one sweep. */
940 FOR_EACH_VEC_ELT (rtx, zeinsn_del_list, ix, curr_insn)
941 delete_insn (curr_insn);
943 free (is_insn_merge_attempted);
944 VEC_free (rtx, heap, zeinsn_list);
945 VEC_free (rtx, heap, zeinsn_del_list);
947 if (dump_file && num_ze_opportunities > 0)
948 fprintf (dump_file, "\n %s : num_zee_opportunities = %lld "
949 "num_realized = %lld \n",
950 current_function_name (),
951 num_ze_opportunities, num_realized);
953 df_finish_pass (false);
954 return 0;
957 /* Find and remove redundant zero extensions. */
959 static unsigned int
960 rest_of_handle_zee (void)
962 timevar_push (TV_ZEE);
963 find_and_remove_ze ();
964 timevar_pop (TV_ZEE);
965 return 0;
968 /* Run zee pass when flag_zee is set at optimization level > 0. */
970 static bool
971 gate_handle_zee (void)
973 return (optimize > 0 && flag_zee);
976 struct rtl_opt_pass pass_implicit_zee =
979 RTL_PASS,
980 "zee", /* name */
981 gate_handle_zee, /* gate */
982 rest_of_handle_zee, /* execute */
983 NULL, /* sub */
984 NULL, /* next */
985 0, /* static_pass_number */
986 TV_ZEE, /* tv_id */
987 0, /* properties_required */
988 0, /* properties_provided */
989 0, /* properties_destroyed */
990 0, /* todo_flags_start */
991 TODO_ggc_collect |
992 TODO_dump_func |
993 TODO_verify_rtl_sharing, /* todo_flags_finish */