1 /* Redundant Zero-extension elimination for targets that implicitly
2 zero-extend writes to the lower 32-bit portion of 64-bit registers.
3 Copyright (C) 2010 Free Software Foundation, Inc.
4 Contributed by Sriraman Tallam (tmsriram@google.com) and
5 Silvius Rus (rus@google.com)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
24 /* Problem Description :
26 This pass is intended to be applicable only to targets that implicitly
27 zero-extend 64-bit registers after writing to their lower 32-bit half.
28 For instance, x86_64 zero-extends the upper bits of a register
29 implicitly whenever an instruction writes to its lower 32-bit half.
30 For example, the instruction *add edi,eax* also zero-extends the upper
31 32-bits of rax after doing the addition. These zero extensions come
32 for free and GCC does not always exploit this well. That is, it has
33 been observed that there are plenty of cases where GCC explicitly
34 zero-extends registers for x86_64 that are actually useless because
35 these registers were already implicitly zero-extended in a prior
36 instruction. This pass tries to eliminate such useless zero extension
39 How does this pass work ?
40 --------------------------
42 This pass is run after register allocation. Hence, all registers that
43 this pass deals with are hard registers. This pass first looks for a
44 zero-extension instruction that could possibly be redundant. Such zero
45 extension instructions show up in RTL with the pattern :
46 (set (reg:DI x) (zero_extend:DI (reg:SI x))).
47 where x can be any one of the 64-bit hard registers.
48 Now, this pass tries to eliminate this instruction by merging the
49 zero-extension with the definitions of register x. For instance, if
50 one of the definitions of register x was :
51 (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
52 then the combination converts this into :
53 (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
54 If all the merged definitions are recognizable assembly instructions,
55 the zero-extension is effectively eliminated. For example, in x86_64,
56 implicit zero-extensions are captured with appropriate patterns in the
57 i386.md file. Hence, these merged definition can be matched to a single
58 assembly instruction. The original zero-extension instruction is then
59 deleted if all the definitions can be merged.
61 However, there are cases where the definition instruction cannot be
62 merged with a zero-extend. Examples are CALL instructions. In such
63 cases, the original zero extension is not redundant and this pass does
66 Handling conditional moves :
67 ----------------------------
69 Architectures like x86_64 support conditional moves whose semantics for
70 zero-extension differ from the other instructions. For instance, the
71 instruction *cmov ebx, eax*
72 zero-extends eax onto rax only when the move from ebx to eax happens.
73 Otherwise, eax may not be zero-extended. Conditional moves appear as
74 RTL instructions of the form
75 (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
76 This pass tries to merge a zero-extension with a conditional move by
77 actually merging the defintions of y and z with a zero-extend and then
78 converting the conditional move into :
79 (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
80 Since registers y and z are zero-extended, register x will also be
81 zero-extended after the conditional move. Note that this step has to
82 be done transitively since the definition of a conditional copy can be
83 another conditional copy.
85 Motivating Example I :
88 **********************************************
101 **********************************************
103 $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination)
105 400315: b8 4e 00 00 00 mov $0x4e,%eax
106 40031a: 0f af f8 imul %eax,%edi
107 40031d: 89 ff mov %edi,%edi --> Useless extend
108 40031f: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
111 400330: ba 2d 00 00 00 mov $0x2d,%edx
112 400335: 0f af fa imul %edx,%edi
113 400338: 89 ff mov %edi,%edi --> Useless extend
114 40033a: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
117 $ gcc -O2 -fzee bad_code.c
119 400315: 6b ff 4e imul $0x4e,%edi,%edi
120 400318: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
122 400320: 6b ff 2d imul $0x2d,%edi,%edi
123 400323: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
126 Motivating Example II :
127 ---------------------
129 Here is an example with a conditional move.
132 **********************************************
134 unsigned long long foo(unsigned x , unsigned y)
141 return (unsigned long long)(z);
144 $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination)
146 400360: 8d 14 3e lea (%rsi,%rdi,1),%edx
147 400363: 89 f8 mov %edi,%eax
148 400365: 29 f0 sub %esi,%eax
149 400367: 83 ff 65 cmp $0x65,%edi
150 40036a: 0f 43 c2 cmovae %edx,%eax
151 40036d: 89 c0 mov %eax,%eax --> Useless extend
154 $ gcc -O2 -fzee bad_code.c
156 400360: 89 fa mov %edi,%edx
157 400362: 8d 04 3e lea (%rsi,%rdi,1),%eax
158 400365: 29 f2 sub %esi,%edx
159 400367: 83 ff 65 cmp $0x65,%edi
160 40036a: 89 d6 mov %edx,%esi
161 40036c: 48 0f 42 c6 cmovb %rsi,%rax
168 This pass reduces the dynamic instruction count of a compression benchmark
169 by 2.8% and improves its run time by about 1%. The compression benchmark
170 had the following code sequence in a very hot region of code before ZEE
174 mov %edx, %edx --> Useless zero-extend */
179 #include "coretypes.h"
186 #include "hard-reg-set.h"
187 #include "basic-block.h"
188 #include "insn-config.h"
189 #include "function.h"
191 #include "insn-attr.h"
193 #include "diagnostic-core.h"
197 #include "insn-codes.h"
198 #include "rtlhooks-def.h"
199 /* Include output.h for dump_file. */
203 #include "tree-pass.h"
207 /* This says if a register is newly created for the purpose of
212 MERGE_NOT_ATTEMPTED
= 0,
216 /* This says if a INSN UID or its definition has already been merged
217 with a zero-extend or not. */
219 static enum insn_merge_code
*is_insn_merge_attempted
;
220 static int max_insn_uid
;
222 /* Returns the merge code status for INSN. */
224 static enum insn_merge_code
225 get_insn_status (rtx insn
)
227 gcc_assert (INSN_UID (insn
) < max_insn_uid
);
228 return is_insn_merge_attempted
[INSN_UID (insn
)];
231 /* Sets the merge code status of INSN to CODE. */
234 set_insn_status (rtx insn
, enum insn_merge_code code
)
236 gcc_assert (INSN_UID (insn
) < max_insn_uid
);
237 is_insn_merge_attempted
[INSN_UID (insn
)] = code
;
240 /* Given a insn (CURR_INSN) and a pointer to the SET rtx (ORIG_SET)
241 that needs to be modified, this code modifies the SET rtx to a
242 new SET rtx that zero_extends the right hand expression into a DImode
243 register (NEWREG) on the left hand side. Note that multiple
244 assumptions are made about the nature of the set that needs
245 to be true for this to work and is called from merge_def_and_ze.
248 (set (reg:SI a) (expression))
251 (set (reg:DI a) (zero_extend (expression)))
254 If the expression is a constant or another zero_extend directly
255 assign it to the DI mode register. */
258 combine_set_zero_extend (rtx curr_insn
, rtx
*orig_set
, rtx newreg
)
260 rtx temp_extension
, simplified_temp_extension
, new_set
, new_const_int
;
263 unsigned int mask
, delta_width
;
265 /* Change the SET rtx and validate it. */
266 orig_src
= SET_SRC (*orig_set
);
269 /* The right hand side can also be VOIDmode. These cases have to be
270 handled differently. */
272 if (GET_MODE (orig_src
) != SImode
)
274 /* Merge constants by directly moving the constant into the
275 DImode register under some conditions. */
277 if (GET_CODE (orig_src
) == CONST_INT
278 && HOST_BITS_PER_WIDE_INT
>= GET_MODE_BITSIZE (SImode
))
280 if (INTVAL (orig_src
) >= 0)
281 new_set
= gen_rtx_SET (VOIDmode
, newreg
, orig_src
);
282 else if (INTVAL (orig_src
) < 0)
284 /* Zero-extending a negative SImode integer into DImode
285 makes it a positive integer. Convert the given negative
286 integer into the appropriate integer when zero-extended. */
288 delta_width
= HOST_BITS_PER_WIDE_INT
- GET_MODE_BITSIZE (SImode
);
289 mask
= (~(unsigned HOST_WIDE_INT
) 0) >> delta_width
;
290 val
= INTVAL (orig_src
);
292 new_const_int
= gen_rtx_CONST_INT (VOIDmode
, val
);
293 new_set
= gen_rtx_SET (VOIDmode
, newreg
, new_const_int
);
300 /* This is mostly due to a call insn that should not be
306 else if (GET_CODE (orig_src
) == ZERO_EXTEND
)
308 /* Here a zero-extend is used to get to SI. Why not make it
309 all the way till DI. */
311 temp_extension
= gen_rtx_ZERO_EXTEND (DImode
, XEXP (orig_src
, 0));
312 simplified_temp_extension
= simplify_rtx (temp_extension
);
313 if (simplified_temp_extension
)
314 temp_extension
= simplified_temp_extension
;
315 new_set
= gen_rtx_SET (VOIDmode
, newreg
, temp_extension
);
317 else if (GET_CODE (orig_src
) == IF_THEN_ELSE
)
319 /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise,
320 in general, IF_THEN_ELSE should not be combined. */
326 /* This is the normal case we expect. */
328 temp_extension
= gen_rtx_ZERO_EXTEND (DImode
, orig_src
);
329 simplified_temp_extension
= simplify_rtx (temp_extension
);
330 if (simplified_temp_extension
)
331 temp_extension
= simplified_temp_extension
;
332 new_set
= gen_rtx_SET (VOIDmode
, newreg
, temp_extension
);
335 gcc_assert (new_set
!= NULL_RTX
);
337 /* This change is a part of a group of changes. Hence,
338 validate_change will not try to commit the change. */
340 if (validate_change (curr_insn
, orig_set
, new_set
, true))
344 fprintf (dump_file
, "Merged Instruction with ZERO_EXTEND:\n");
345 print_rtl_single (dump_file
, curr_insn
);
352 /* This returns the DI mode for the SI register REG_SI. */
355 get_reg_di (rtx reg_si
)
359 newreg
= gen_rtx_REG (DImode
, REGNO (reg_si
));
364 /* Treat if_then_else insns, where the operands of both branches
365 are registers, as copies. For instance,
367 (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
369 (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
370 DEF_INSN is the if_then_else insn. */
373 transform_ifelse (rtx def_insn
)
375 rtx set_insn
= PATTERN (def_insn
);
376 rtx srcreg
, dstreg
, srcreg2
;
377 rtx map_srcreg
, map_dstreg
, map_srcreg2
;
382 gcc_assert (GET_CODE (set_insn
) == SET
);
383 cond
= XEXP (SET_SRC (set_insn
), 0);
384 dstreg
= SET_DEST (set_insn
);
385 srcreg
= XEXP (SET_SRC (set_insn
), 1);
386 srcreg2
= XEXP (SET_SRC (set_insn
), 2);
387 map_srcreg
= get_reg_di (srcreg
);
388 map_srcreg2
= get_reg_di (srcreg2
);
389 map_dstreg
= get_reg_di (dstreg
);
390 ifexpr
= gen_rtx_IF_THEN_ELSE (DImode
, cond
, map_srcreg
, map_srcreg2
);
391 new_set
= gen_rtx_SET (VOIDmode
, map_dstreg
, ifexpr
);
393 if (validate_change (def_insn
, &PATTERN (def_insn
), new_set
, true))
397 fprintf (dump_file
, "Cond_Move Instruction's mode extended :\n");
398 print_rtl_single (dump_file
, def_insn
);
406 /* Function to get all the immediate definitions of an instruction.
407 The reaching definitions are desired for WHICH_REG used in
408 CURR_INSN. This function returns 0 if there was an error getting
409 a definition. Upon success, this function returns the number of
410 definitions and stores the definitions in DEST. */
413 get_defs (rtx curr_insn
, rtx which_reg
, VEC (rtx
,heap
) **dest
)
415 df_ref reg_info
, *defs
;
416 struct df_link
*def_chain
;
419 defs
= DF_INSN_USES (curr_insn
);
425 if (GET_CODE (DF_REF_REG (reg_info
)) == SUBREG
)
427 if (REGNO (DF_REF_REG (reg_info
)) == REGNO (which_reg
))
432 gcc_assert (reg_info
!= NULL
&& defs
!= NULL
);
433 def_chain
= DF_REF_CHAIN (reg_info
);
437 /* Problem getting some definition for this instruction. */
439 if (def_chain
->ref
== NULL
)
441 if (DF_REF_INSN_INFO (def_chain
->ref
) == NULL
)
443 def_chain
= def_chain
->next
;
446 def_chain
= DF_REF_CHAIN (reg_info
);
453 VEC_safe_push (rtx
, heap
, *dest
, DF_REF_INSN (def_chain
->ref
));
454 def_chain
= def_chain
->next
;
460 /* rtx function to check if this SET insn, EXPR, is a conditional copy insn :
461 (set (reg:SI a ) (IF_THEN_ELSE (cond) (reg:SI b) (reg:SI c)))
462 Called from is_insn_cond_copy. DATA stores the two registers on each
463 side of the condition. */
466 is_this_a_cmove (rtx expr
, void *data
)
468 /* Check for conditional (if-then-else) copy. */
470 if (GET_CODE (expr
) == SET
471 && GET_CODE (SET_DEST (expr
)) == REG
472 && GET_MODE (SET_DEST (expr
)) == SImode
473 && GET_CODE (SET_SRC (expr
)) == IF_THEN_ELSE
474 && GET_CODE (XEXP (SET_SRC (expr
), 1)) == REG
475 && GET_MODE (XEXP (SET_SRC (expr
), 1)) == SImode
476 && GET_CODE (XEXP (SET_SRC (expr
), 2)) == REG
477 && GET_MODE (XEXP (SET_SRC (expr
), 2)) == SImode
)
479 ((rtx
*)data
)[0] = XEXP (SET_SRC (expr
), 1);
480 ((rtx
*)data
)[1] = XEXP (SET_SRC (expr
), 2);
486 /* This returns 1 if it found
487 (SET (reg:SI REGNO (def_reg)) (if_then_else (cond) (REG:SI x1) (REG:SI x2)))
488 in the DEF_INSN pattern. It stores the x1 and x2 in COPY_REG_1
492 is_insn_cond_copy (rtx def_insn
, rtx
*copy_reg_1
, rtx
*copy_reg_2
)
498 srcreg
[0] = NULL_RTX
;
499 srcreg
[1] = NULL_RTX
;
501 set_expr
= single_set (def_insn
);
503 if (set_expr
== NULL_RTX
)
506 type
= is_this_a_cmove (set_expr
, (void *) srcreg
);
510 *copy_reg_1
= srcreg
[0];
511 *copy_reg_2
= srcreg
[1];
518 /* Reaching Definitions of the zero-extended register could be conditional
519 copies or regular definitions. This function separates the two types into
520 two lists, DEFS_LIST and COPIES_LIST. This is necessary because, if a
521 reaching definition is a conditional copy, combining the zero_extend with
522 this definition is wrong. Conditional copies are merged by transitively
523 merging its definitions. The defs_list is populated with all the reaching
524 definitions of the zero-extension instruction (ZERO_EXTEND_INSN) which must
525 be merged with a zero_extend. The copies_list contains all the conditional
526 moves that will later be extended into a DI mode conditonal move if all the
527 merges are successful. The function returns false when there is a failure
528 in getting some definitions, like that of parameters. It returns 1 upon
529 success, 0 upon failure and 2 when all definitions of the ZERO_EXTEND_INSN
530 were merged previously. */
533 make_defs_and_copies_lists (rtx zero_extend_insn
, rtx set_pat
,
534 VEC (rtx
,heap
) **defs_list
,
535 VEC (rtx
,heap
) **copies_list
)
537 bool *is_insn_visited
;
538 VEC (rtx
,heap
) *work_list
;
539 rtx srcreg
, copy_reg_1
, copy_reg_2
;
546 srcreg
= XEXP (SET_SRC (set_pat
), 0);
547 work_list
= VEC_alloc (rtx
, heap
, 8);
549 /* Initialize the Work List */
550 n_worklist
= get_defs (zero_extend_insn
, srcreg
, &work_list
);
554 VEC_free (rtx
, heap
, work_list
);
555 /* The number of defs being equal to zero can only imply that all of its
556 definitions have been previously merged. */
560 is_insn_visited
= XNEWVEC (bool, max_insn_uid
);
562 for (i
= 0; i
< max_insn_uid
; i
++)
563 is_insn_visited
[i
] = false;
566 /* Perform transitive closure for conditional copies. */
567 while (n_worklist
> vec_index
)
569 def_insn
= VEC_index (rtx
, work_list
, vec_index
);
570 gcc_assert (INSN_UID (def_insn
) < max_insn_uid
);
572 if (is_insn_visited
[INSN_UID (def_insn
)])
578 is_insn_visited
[INSN_UID (def_insn
)] = true;
579 copy_reg_1
= copy_reg_2
= NULL_RTX
;
580 is_copy
= is_insn_cond_copy (def_insn
, ©_reg_1
, ©_reg_2
);
583 gcc_assert (copy_reg_1
&& copy_reg_2
);
585 /* Push it into the copy list first. */
587 VEC_safe_push (rtx
, heap
, *copies_list
, def_insn
);
589 /* Perform transitive closure here */
591 n_defs
= get_defs (def_insn
, copy_reg_1
, &work_list
);
595 VEC_free (rtx
, heap
, work_list
);
596 XDELETEVEC (is_insn_visited
);
599 n_worklist
+= n_defs
;
601 n_defs
= get_defs (def_insn
, copy_reg_2
, &work_list
);
604 VEC_free (rtx
, heap
, work_list
);
605 XDELETEVEC (is_insn_visited
);
608 n_worklist
+= n_defs
;
612 VEC_safe_push (rtx
, heap
, *defs_list
, def_insn
);
617 VEC_free (rtx
, heap
, work_list
);
618 XDELETEVEC (is_insn_visited
);
622 /* Merge the DEF_INSN with a zero-extend. Calls combine_set_zero_extend
623 on the SET pattern. */
626 merge_def_and_ze (rtx def_insn
)
634 code
= GET_CODE (PATTERN (def_insn
));
637 if (code
== PARALLEL
)
639 for (i
= 0; i
< XVECLEN (PATTERN (def_insn
), 0); i
++)
641 s_expr
= XVECEXP (PATTERN (def_insn
), 0, i
);
642 if (GET_CODE (s_expr
) != SET
)
646 sub_rtx
= &XVECEXP (PATTERN (def_insn
), 0, i
);
649 /* PARALLEL with multiple SETs. */
654 else if (code
== SET
)
655 sub_rtx
= &PATTERN (def_insn
);
658 /* It is not a PARALLEL or a SET, what could it be ? */
662 gcc_assert (sub_rtx
!= NULL
);
664 if (GET_CODE (SET_DEST (*sub_rtx
)) == REG
665 && GET_MODE (SET_DEST (*sub_rtx
)) == SImode
)
667 setreg
= get_reg_di (SET_DEST (*sub_rtx
));
668 return combine_set_zero_extend (def_insn
, sub_rtx
, setreg
);
675 /* This function goes through all reaching defs of the source
676 of the zero extension instruction (ZERO_EXTEND_INSN) and
677 tries to combine the zero extension with the definition
678 instruction. The changes are made as a group so that even
679 if one definition cannot be merged, all reaching definitions
680 end up not being merged. When a conditional copy is encountered,
681 merging is attempted transitively on its definitions. It returns
682 true upon success and false upon failure. */
685 combine_reaching_defs (rtx zero_extend_insn
, rtx set_pat
)
688 bool merge_successful
= true;
693 /* To store the definitions that have been merged. */
695 VEC (rtx
, heap
) *defs_list
, *copies_list
, *vec
;
696 enum insn_merge_code merge_code
;
698 defs_list
= VEC_alloc (rtx
, heap
, 8);
699 copies_list
= VEC_alloc (rtx
, heap
, 8);
701 outcome
= make_defs_and_copies_lists (zero_extend_insn
,
702 set_pat
, &defs_list
, &copies_list
);
704 /* outcome == 2 implies that all the definitions for this zero_extend were
705 merged while previously when handling other zero_extends. */
709 VEC_free (rtx
, heap
, defs_list
);
710 VEC_free (rtx
, heap
, copies_list
);
712 fprintf (dump_file
, "All definitions have been merged previously.\n");
718 VEC_free (rtx
, heap
, defs_list
);
719 VEC_free (rtx
, heap
, copies_list
);
723 merge_successful
= true;
725 /* Go through the defs vector and try to merge all the definitions
728 vec
= VEC_alloc (rtx
, heap
, 8);
729 FOR_EACH_VEC_ELT (rtx
, defs_list
, defs_ix
, def_insn
)
731 merge_code
= get_insn_status (def_insn
);
732 gcc_assert (merge_code
== MERGE_NOT_ATTEMPTED
);
734 if (merge_def_and_ze (def_insn
))
735 VEC_safe_push (rtx
, heap
, vec
, def_insn
);
738 merge_successful
= false;
743 /* Now go through the conditional copies vector and try to merge all
744 the copies in this vector. */
746 if (merge_successful
)
748 FOR_EACH_VEC_ELT (rtx
, copies_list
, i
, def_insn
)
750 if (transform_ifelse (def_insn
))
752 VEC_safe_push (rtx
, heap
, vec
, def_insn
);
756 merge_successful
= false;
762 if (merge_successful
)
764 /* Commit the changes here if possible */
765 /* XXX : Now, it is an all or nothing scenario. Even if one definition
766 cannot be merged we totally bail. In future, allow zero-extensions to
767 be partially eliminated along those paths where the definitions could
770 if (apply_change_group ())
773 fprintf (dump_file
, "All merges were successful ....\n");
775 FOR_EACH_VEC_ELT (rtx
, vec
, i
, def_insn
)
777 set_insn_status (def_insn
, MERGE_SUCCESS
);
780 VEC_free (rtx
, heap
, vec
);
781 VEC_free (rtx
, heap
, defs_list
);
782 VEC_free (rtx
, heap
, copies_list
);
787 /* Changes need not be cancelled explicitly as apply_change_group
788 does it. Print list of definitions in the dump_file for debug
789 purposes. This zero-extension cannot be deleted. */
793 FOR_EACH_VEC_ELT (rtx
, vec
, i
, def_insn
)
795 fprintf (dump_file
, " Ummergable definitions : \n");
796 print_rtl_single (dump_file
, def_insn
);
803 /* Cancel any changes that have been made so far. */
807 VEC_free (rtx
, heap
, vec
);
808 VEC_free (rtx
, heap
, defs_list
);
809 VEC_free (rtx
, heap
, copies_list
);
813 /* Carry information about zero-extensions while walking the RTL. */
815 struct zero_extend_info
817 /* The insn where the zero-extension is. */
820 /* The list of candidates. */
821 VEC (rtx
, heap
) *insn_list
;
824 /* Add a zero-extend pattern that could be eliminated. This is called via
825 note_stores from find_removable_zero_extends. */
828 add_removable_zero_extend (rtx x ATTRIBUTE_UNUSED
, const_rtx expr
, void *data
)
830 struct zero_extend_info
*zei
= (struct zero_extend_info
*)data
;
833 /* We are looking for SET (REG:DI N) (ZERO_EXTEND (REG:SI N)). */
834 if (GET_CODE (expr
) != SET
)
837 src
= SET_SRC (expr
);
838 dest
= SET_DEST (expr
);
841 && GET_MODE (dest
) == DImode
842 && GET_CODE (src
) == ZERO_EXTEND
843 && REG_P (XEXP (src
, 0))
844 && GET_MODE (XEXP (src
, 0)) == SImode
845 && REGNO (dest
) == REGNO (XEXP (src
, 0)))
847 if (get_defs (zei
->insn
, XEXP (src
, 0), NULL
))
848 VEC_safe_push (rtx
, heap
, zei
->insn_list
, zei
->insn
);
851 fprintf (dump_file
, "Cannot eliminate zero-extension: \n");
852 print_rtl_single (dump_file
, zei
->insn
);
853 fprintf (dump_file
, "No defs. Could be extending parameters.\n");
858 /* Traverse the instruction stream looking for zero-extends and return the
859 list of candidates. */
861 static VEC (rtx
,heap
)*
862 find_removable_zero_extends (void)
864 struct zero_extend_info zei
;
868 zei
.insn_list
= VEC_alloc (rtx
, heap
, 8);
871 FOR_BB_INSNS (bb
, insn
)
873 if (!NONDEBUG_INSN_P (insn
))
877 note_stores (PATTERN (insn
), add_removable_zero_extend
, &zei
);
880 return zei
.insn_list
;
883 /* This is the main function that checks the insn stream for redundant
884 zero extensions and tries to remove them if possible. */
887 find_and_remove_ze (void)
889 rtx curr_insn
= NULL_RTX
;
892 long long num_realized
= 0;
893 long long num_ze_opportunities
= 0;
894 VEC (rtx
, heap
) *zeinsn_list
;
895 VEC (rtx
, heap
) *zeinsn_del_list
;
897 /* Construct DU chain to get all reaching definitions of each
898 zero-extension instruction. */
900 df_chain_add_problem (DF_UD_CHAIN
+ DF_DU_CHAIN
);
903 max_insn_uid
= get_max_uid ();
905 is_insn_merge_attempted
906 = XNEWVEC (enum insn_merge_code
,
907 sizeof (enum insn_merge_code
) * max_insn_uid
);
909 for (i
= 0; i
< max_insn_uid
; i
++)
910 is_insn_merge_attempted
[i
] = MERGE_NOT_ATTEMPTED
;
912 num_ze_opportunities
= num_realized
= 0;
914 zeinsn_del_list
= VEC_alloc (rtx
, heap
, 4);
916 zeinsn_list
= find_removable_zero_extends ();
918 FOR_EACH_VEC_ELT (rtx
, zeinsn_list
, ix
, curr_insn
)
920 num_ze_opportunities
++;
921 /* Try to combine the zero-extends with the definition here. */
925 fprintf (dump_file
, "Trying to eliminate zero extension : \n");
926 print_rtl_single (dump_file
, curr_insn
);
929 if (combine_reaching_defs (curr_insn
, PATTERN (curr_insn
)))
932 fprintf (dump_file
, "Eliminated the zero extension...\n");
934 VEC_safe_push (rtx
, heap
, zeinsn_del_list
, curr_insn
);
938 /* Delete all useless zero extensions here in one sweep. */
939 FOR_EACH_VEC_ELT (rtx
, zeinsn_del_list
, ix
, curr_insn
)
940 delete_insn (curr_insn
);
942 free (is_insn_merge_attempted
);
943 VEC_free (rtx
, heap
, zeinsn_list
);
944 VEC_free (rtx
, heap
, zeinsn_del_list
);
946 if (dump_file
&& num_ze_opportunities
> 0)
947 fprintf (dump_file
, "\n %s : num_zee_opportunities = %lld "
948 "num_realized = %lld \n",
949 current_function_name (),
950 num_ze_opportunities
, num_realized
);
952 df_finish_pass (false);
956 /* Find and remove redundant zero extensions. */
959 rest_of_handle_zee (void)
961 timevar_push (TV_ZEE
);
962 find_and_remove_ze ();
963 timevar_pop (TV_ZEE
);
967 /* Run zee pass when flag_zee is set at optimization level > 0. */
970 gate_handle_zee (void)
972 return (optimize
> 0 && flag_zee
);
975 struct rtl_opt_pass pass_implicit_zee
=
980 gate_handle_zee
, /* gate */
981 rest_of_handle_zee
, /* execute */
984 0, /* static_pass_number */
986 0, /* properties_required */
987 0, /* properties_provided */
988 0, /* properties_destroyed */
989 0, /* todo_flags_start */
992 TODO_verify_rtl_sharing
, /* todo_flags_finish */