PR target/50465
[official-gcc.git] / gcc / implicit-zee.c
blobdb422482c901fc4091c5d04f48f5fb036d94d947
1 /* Redundant Zero-extension elimination for targets that implicitly
2 zero-extend writes to the lower 32-bit portion of 64-bit registers.
3 Copyright (C) 2010 Free Software Foundation, Inc.
4 Contributed by Sriraman Tallam (tmsriram@google.com) and
5 Silvius Rus (rus@google.com)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
24 /* Problem Description :
25 --------------------
26 This pass is intended to be applicable only to targets that implicitly
27 zero-extend 64-bit registers after writing to their lower 32-bit half.
28 For instance, x86_64 zero-extends the upper bits of a register
29 implicitly whenever an instruction writes to its lower 32-bit half.
30 For example, the instruction *add edi,eax* also zero-extends the upper
31 32-bits of rax after doing the addition. These zero extensions come
32 for free and GCC does not always exploit this well. That is, it has
33 been observed that there are plenty of cases where GCC explicitly
34 zero-extends registers for x86_64 that are actually useless because
35 these registers were already implicitly zero-extended in a prior
36 instruction. This pass tries to eliminate such useless zero extension
37 instructions.
39 How does this pass work ?
40 --------------------------
42 This pass is run after register allocation. Hence, all registers that
43 this pass deals with are hard registers. This pass first looks for a
44 zero-extension instruction that could possibly be redundant. Such zero
45 extension instructions show up in RTL with the pattern :
46 (set (reg:DI x) (zero_extend:DI (reg:SI x))).
47 where x can be any one of the 64-bit hard registers.
48 Now, this pass tries to eliminate this instruction by merging the
49 zero-extension with the definitions of register x. For instance, if
50 one of the definitions of register x was :
51 (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
52 then the combination converts this into :
53 (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
54 If all the merged definitions are recognizable assembly instructions,
55 the zero-extension is effectively eliminated. For example, in x86_64,
56 implicit zero-extensions are captured with appropriate patterns in the
57 i386.md file. Hence, these merged definition can be matched to a single
58 assembly instruction. The original zero-extension instruction is then
59 deleted if all the definitions can be merged.
61 However, there are cases where the definition instruction cannot be
62 merged with a zero-extend. Examples are CALL instructions. In such
63 cases, the original zero extension is not redundant and this pass does
64 not delete it.
66 Handling conditional moves :
67 ----------------------------
69 Architectures like x86_64 support conditional moves whose semantics for
70 zero-extension differ from the other instructions. For instance, the
71 instruction *cmov ebx, eax*
72 zero-extends eax onto rax only when the move from ebx to eax happens.
73 Otherwise, eax may not be zero-extended. Conditional moves appear as
74 RTL instructions of the form
75 (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
76 This pass tries to merge a zero-extension with a conditional move by
77 actually merging the defintions of y and z with a zero-extend and then
78 converting the conditional move into :
79 (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
80 Since registers y and z are zero-extended, register x will also be
81 zero-extended after the conditional move. Note that this step has to
82 be done transitively since the definition of a conditional copy can be
83 another conditional copy.
85 Motivating Example I :
86 ---------------------
87 For this program :
88 **********************************************
89 bad_code.c
91 int mask[1000];
93 int foo(unsigned x)
95 if (x < 10)
96 x = x * 45;
97 else
98 x = x * 78;
99 return mask[x];
101 **********************************************
103 $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination)
104 ........
105 400315: b8 4e 00 00 00 mov $0x4e,%eax
106 40031a: 0f af f8 imul %eax,%edi
107 40031d: 89 ff mov %edi,%edi --> Useless extend
108 40031f: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
109 400326: c3 retq
110 ......
111 400330: ba 2d 00 00 00 mov $0x2d,%edx
112 400335: 0f af fa imul %edx,%edi
113 400338: 89 ff mov %edi,%edi --> Useless extend
114 40033a: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
115 400341: c3 retq
117 $ gcc -O2 -fzee bad_code.c
118 ......
119 400315: 6b ff 4e imul $0x4e,%edi,%edi
120 400318: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
121 40031f: c3 retq
122 400320: 6b ff 2d imul $0x2d,%edi,%edi
123 400323: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
124 40032a: c3 retq
126 Motivating Example II :
127 ---------------------
129 Here is an example with a conditional move.
131 For this program :
132 **********************************************
134 unsigned long long foo(unsigned x , unsigned y)
136 unsigned z;
137 if (x > 100)
138 z = x + y;
139 else
140 z = x - y;
141 return (unsigned long long)(z);
144 $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination)
145 ............
146 400360: 8d 14 3e lea (%rsi,%rdi,1),%edx
147 400363: 89 f8 mov %edi,%eax
148 400365: 29 f0 sub %esi,%eax
149 400367: 83 ff 65 cmp $0x65,%edi
150 40036a: 0f 43 c2 cmovae %edx,%eax
151 40036d: 89 c0 mov %eax,%eax --> Useless extend
152 40036f: c3 retq
154 $ gcc -O2 -fzee bad_code.c
155 .............
156 400360: 89 fa mov %edi,%edx
157 400362: 8d 04 3e lea (%rsi,%rdi,1),%eax
158 400365: 29 f2 sub %esi,%edx
159 400367: 83 ff 65 cmp $0x65,%edi
160 40036a: 89 d6 mov %edx,%esi
161 40036c: 48 0f 42 c6 cmovb %rsi,%rax
162 400370: c3 retq
165 Usefulness :
166 ----------
168 This pass reduces the dynamic instruction count of a compression benchmark
169 by 2.8% and improves its run time by about 1%. The compression benchmark
170 had the following code sequence in a very hot region of code before ZEE
171 optimized it :
173 shr $0x5, %edx
174 mov %edx, %edx --> Useless zero-extend */
177 #include "config.h"
178 #include "system.h"
179 #include "coretypes.h"
180 #include "tm.h"
181 #include "rtl.h"
182 #include "tree.h"
183 #include "tm_p.h"
184 #include "flags.h"
185 #include "regs.h"
186 #include "hard-reg-set.h"
187 #include "basic-block.h"
188 #include "insn-config.h"
189 #include "function.h"
190 #include "expr.h"
191 #include "insn-attr.h"
192 #include "recog.h"
193 #include "diagnostic-core.h"
194 #include "target.h"
195 #include "timevar.h"
196 #include "optabs.h"
197 #include "insn-codes.h"
198 #include "rtlhooks-def.h"
199 /* Include output.h for dump_file. */
200 #include "output.h"
201 #include "params.h"
202 #include "timevar.h"
203 #include "tree-pass.h"
204 #include "df.h"
205 #include "cgraph.h"
207 /* This says if a register is newly created for the purpose of
208 zero-extension. */
210 enum insn_merge_code
212 MERGE_NOT_ATTEMPTED = 0,
213 MERGE_SUCCESS
216 /* This says if a INSN UID or its definition has already been merged
217 with a zero-extend or not. */
219 static enum insn_merge_code *is_insn_merge_attempted;
220 static int max_insn_uid;
222 /* Returns the merge code status for INSN. */
224 static enum insn_merge_code
225 get_insn_status (rtx insn)
227 gcc_assert (INSN_UID (insn) < max_insn_uid);
228 return is_insn_merge_attempted[INSN_UID (insn)];
231 /* Sets the merge code status of INSN to CODE. */
233 static void
234 set_insn_status (rtx insn, enum insn_merge_code code)
236 gcc_assert (INSN_UID (insn) < max_insn_uid);
237 is_insn_merge_attempted[INSN_UID (insn)] = code;
240 /* Given a insn (CURR_INSN) and a pointer to the SET rtx (ORIG_SET)
241 that needs to be modified, this code modifies the SET rtx to a
242 new SET rtx that zero_extends the right hand expression into a DImode
243 register (NEWREG) on the left hand side. Note that multiple
244 assumptions are made about the nature of the set that needs
245 to be true for this to work and is called from merge_def_and_ze.
247 Original :
248 (set (reg:SI a) (expression))
250 Transform :
251 (set (reg:DI a) (zero_extend (expression)))
253 Special Cases :
254 If the expression is a constant or another zero_extend directly
255 assign it to the DI mode register. */
257 static bool
258 combine_set_zero_extend (rtx curr_insn, rtx *orig_set, rtx newreg)
260 rtx temp_extension, simplified_temp_extension, new_set, new_const_int;
261 rtx orig_src;
262 HOST_WIDE_INT val;
263 unsigned int mask, delta_width;
265 /* Change the SET rtx and validate it. */
266 orig_src = SET_SRC (*orig_set);
267 new_set = NULL_RTX;
269 /* The right hand side can also be VOIDmode. These cases have to be
270 handled differently. */
272 if (GET_MODE (orig_src) != SImode)
274 /* Merge constants by directly moving the constant into the
275 DImode register under some conditions. */
277 if (GET_CODE (orig_src) == CONST_INT
278 && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (SImode))
280 if (INTVAL (orig_src) >= 0)
281 new_set = gen_rtx_SET (VOIDmode, newreg, orig_src);
282 else if (INTVAL (orig_src) < 0)
284 /* Zero-extending a negative SImode integer into DImode
285 makes it a positive integer. Convert the given negative
286 integer into the appropriate integer when zero-extended. */
288 delta_width = HOST_BITS_PER_WIDE_INT - GET_MODE_BITSIZE (SImode);
289 mask = (~(unsigned HOST_WIDE_INT) 0) >> delta_width;
290 val = INTVAL (orig_src);
291 val = val & mask;
292 new_const_int = gen_rtx_CONST_INT (VOIDmode, val);
293 new_set = gen_rtx_SET (VOIDmode, newreg, new_const_int);
295 else
296 return false;
298 else
300 /* This is mostly due to a call insn that should not be
301 optimized. */
303 return false;
306 else if (GET_CODE (orig_src) == ZERO_EXTEND)
308 /* Here a zero-extend is used to get to SI. Why not make it
309 all the way till DI. */
311 temp_extension = gen_rtx_ZERO_EXTEND (DImode, XEXP (orig_src, 0));
312 simplified_temp_extension = simplify_rtx (temp_extension);
313 if (simplified_temp_extension)
314 temp_extension = simplified_temp_extension;
315 new_set = gen_rtx_SET (VOIDmode, newreg, temp_extension);
317 else if (GET_CODE (orig_src) == IF_THEN_ELSE)
319 /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise,
320 in general, IF_THEN_ELSE should not be combined. */
322 return false;
324 else
326 /* This is the normal case we expect. */
328 temp_extension = gen_rtx_ZERO_EXTEND (DImode, orig_src);
329 simplified_temp_extension = simplify_rtx (temp_extension);
330 if (simplified_temp_extension)
331 temp_extension = simplified_temp_extension;
332 new_set = gen_rtx_SET (VOIDmode, newreg, temp_extension);
335 gcc_assert (new_set != NULL_RTX);
337 /* This change is a part of a group of changes. Hence,
338 validate_change will not try to commit the change. */
340 if (validate_change (curr_insn, orig_set, new_set, true))
342 if (dump_file)
344 fprintf (dump_file, "Merged Instruction with ZERO_EXTEND:\n");
345 print_rtl_single (dump_file, curr_insn);
347 return true;
349 return false;
352 /* This returns the DI mode for the SI register REG_SI. */
354 static rtx
355 get_reg_di (rtx reg_si)
357 rtx newreg;
359 newreg = gen_rtx_REG (DImode, REGNO (reg_si));
360 gcc_assert (newreg);
361 return newreg;
364 /* Treat if_then_else insns, where the operands of both branches
365 are registers, as copies. For instance,
366 Original :
367 (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
368 Transformed :
369 (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
370 DEF_INSN is the if_then_else insn. */
372 static bool
373 transform_ifelse (rtx def_insn)
375 rtx set_insn = PATTERN (def_insn);
376 rtx srcreg, dstreg, srcreg2;
377 rtx map_srcreg, map_dstreg, map_srcreg2;
378 rtx ifexpr;
379 rtx cond;
380 rtx new_set;
382 gcc_assert (GET_CODE (set_insn) == SET);
383 cond = XEXP (SET_SRC (set_insn), 0);
384 dstreg = SET_DEST (set_insn);
385 srcreg = XEXP (SET_SRC (set_insn), 1);
386 srcreg2 = XEXP (SET_SRC (set_insn), 2);
387 map_srcreg = get_reg_di (srcreg);
388 map_srcreg2 = get_reg_di (srcreg2);
389 map_dstreg = get_reg_di (dstreg);
390 ifexpr = gen_rtx_IF_THEN_ELSE (DImode, cond, map_srcreg, map_srcreg2);
391 new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr);
393 if (validate_change (def_insn, &PATTERN (def_insn), new_set, true))
395 if (dump_file)
397 fprintf (dump_file, "Cond_Move Instruction's mode extended :\n");
398 print_rtl_single (dump_file, def_insn);
400 return true;
402 else
403 return false;
406 /* Function to get all the immediate definitions of an instruction.
407 The reaching definitions are desired for WHICH_REG used in
408 CURR_INSN. This function returns 0 if there was an error getting
409 a definition. Upon success, this function returns the number of
410 definitions and stores the definitions in DEST. */
412 static int
413 get_defs (rtx curr_insn, rtx which_reg, VEC (rtx,heap) **dest)
415 df_ref reg_info, *defs;
416 struct df_link *def_chain;
417 int n_refs = 0;
419 defs = DF_INSN_USES (curr_insn);
420 reg_info = NULL;
422 while (*defs)
424 reg_info = *defs;
425 if (GET_CODE (DF_REF_REG (reg_info)) == SUBREG)
426 return 0;
427 if (REGNO (DF_REF_REG (reg_info)) == REGNO (which_reg))
428 break;
429 defs++;
432 gcc_assert (reg_info != NULL && defs != NULL);
433 def_chain = DF_REF_CHAIN (reg_info);
435 while (def_chain)
437 /* Problem getting some definition for this instruction. */
439 if (def_chain->ref == NULL)
440 return 0;
441 if (DF_REF_INSN_INFO (def_chain->ref) == NULL)
442 return 0;
443 def_chain = def_chain->next;
446 def_chain = DF_REF_CHAIN (reg_info);
448 if (dest == NULL)
449 return 1;
451 while (def_chain)
453 VEC_safe_push (rtx, heap, *dest, DF_REF_INSN (def_chain->ref));
454 def_chain = def_chain->next;
455 n_refs++;
457 return n_refs;
460 /* rtx function to check if this SET insn, EXPR, is a conditional copy insn :
461 (set (reg:SI a ) (IF_THEN_ELSE (cond) (reg:SI b) (reg:SI c)))
462 Called from is_insn_cond_copy. DATA stores the two registers on each
463 side of the condition. */
465 static int
466 is_this_a_cmove (rtx expr, void *data)
468 /* Check for conditional (if-then-else) copy. */
470 if (GET_CODE (expr) == SET
471 && GET_CODE (SET_DEST (expr)) == REG
472 && GET_MODE (SET_DEST (expr)) == SImode
473 && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE
474 && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
475 && GET_MODE (XEXP (SET_SRC (expr), 1)) == SImode
476 && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG
477 && GET_MODE (XEXP (SET_SRC (expr), 2)) == SImode)
479 ((rtx *)data)[0] = XEXP (SET_SRC (expr), 1);
480 ((rtx *)data)[1] = XEXP (SET_SRC (expr), 2);
481 return 1;
483 return 0;
486 /* This returns 1 if it found
487 (SET (reg:SI REGNO (def_reg)) (if_then_else (cond) (REG:SI x1) (REG:SI x2)))
488 in the DEF_INSN pattern. It stores the x1 and x2 in COPY_REG_1
489 and COPY_REG_2. */
491 static int
492 is_insn_cond_copy (rtx def_insn, rtx *copy_reg_1, rtx *copy_reg_2)
494 int type;
495 rtx set_expr;
496 rtx srcreg[2];
498 srcreg[0] = NULL_RTX;
499 srcreg[1] = NULL_RTX;
501 set_expr = single_set (def_insn);
503 if (set_expr == NULL_RTX)
504 return 0;
506 type = is_this_a_cmove (set_expr, (void *) srcreg);
508 if (type)
510 *copy_reg_1 = srcreg[0];
511 *copy_reg_2 = srcreg[1];
512 return type;
515 return 0;
518 /* Reaching Definitions of the zero-extended register could be conditional
519 copies or regular definitions. This function separates the two types into
520 two lists, DEFS_LIST and COPIES_LIST. This is necessary because, if a
521 reaching definition is a conditional copy, combining the zero_extend with
522 this definition is wrong. Conditional copies are merged by transitively
523 merging its definitions. The defs_list is populated with all the reaching
524 definitions of the zero-extension instruction (ZERO_EXTEND_INSN) which must
525 be merged with a zero_extend. The copies_list contains all the conditional
526 moves that will later be extended into a DI mode conditonal move if all the
527 merges are successful. The function returns false when there is a failure
528 in getting some definitions, like that of parameters. It returns 1 upon
529 success, 0 upon failure and 2 when all definitions of the ZERO_EXTEND_INSN
530 were merged previously. */
532 static int
533 make_defs_and_copies_lists (rtx zero_extend_insn, rtx set_pat,
534 VEC (rtx,heap) **defs_list,
535 VEC (rtx,heap) **copies_list)
537 bool *is_insn_visited;
538 VEC (rtx,heap) *work_list;
539 rtx srcreg, copy_reg_1, copy_reg_2;
540 rtx def_insn;
541 int n_defs = 0;
542 int vec_index = 0;
543 int n_worklist = 0;
544 int i, is_copy;
546 srcreg = XEXP (SET_SRC (set_pat), 0);
547 work_list = VEC_alloc (rtx, heap, 8);
549 /* Initialize the Work List */
550 n_worklist = get_defs (zero_extend_insn, srcreg, &work_list);
552 if (n_worklist == 0)
554 VEC_free (rtx, heap, work_list);
555 /* The number of defs being equal to zero can only imply that all of its
556 definitions have been previously merged. */
557 return 2;
560 is_insn_visited = XNEWVEC (bool, max_insn_uid);
562 for (i = 0; i < max_insn_uid; i++)
563 is_insn_visited[i] = false;
566 /* Perform transitive closure for conditional copies. */
567 while (n_worklist > vec_index)
569 def_insn = VEC_index (rtx, work_list, vec_index);
570 gcc_assert (INSN_UID (def_insn) < max_insn_uid);
572 if (is_insn_visited[INSN_UID (def_insn)])
574 vec_index++;
575 continue;
578 is_insn_visited[INSN_UID (def_insn)] = true;
579 copy_reg_1 = copy_reg_2 = NULL_RTX;
580 is_copy = is_insn_cond_copy (def_insn, &copy_reg_1, &copy_reg_2);
581 if (is_copy)
583 gcc_assert (copy_reg_1 && copy_reg_2);
585 /* Push it into the copy list first. */
587 VEC_safe_push (rtx, heap, *copies_list, def_insn);
589 /* Perform transitive closure here */
591 n_defs = get_defs (def_insn, copy_reg_1, &work_list);
593 if (n_defs == 0)
595 VEC_free (rtx, heap, work_list);
596 XDELETEVEC (is_insn_visited);
597 return 0;
599 n_worklist += n_defs;
601 n_defs = get_defs (def_insn, copy_reg_2, &work_list);
602 if (n_defs == 0)
604 VEC_free (rtx, heap, work_list);
605 XDELETEVEC (is_insn_visited);
606 return 0;
608 n_worklist += n_defs;
610 else
612 VEC_safe_push (rtx, heap, *defs_list, def_insn);
614 vec_index++;
617 VEC_free (rtx, heap, work_list);
618 XDELETEVEC (is_insn_visited);
619 return 1;
622 /* Merge the DEF_INSN with a zero-extend. Calls combine_set_zero_extend
623 on the SET pattern. */
625 static bool
626 merge_def_and_ze (rtx def_insn)
628 enum rtx_code code;
629 rtx setreg;
630 rtx *sub_rtx;
631 rtx s_expr;
632 int i;
634 code = GET_CODE (PATTERN (def_insn));
635 sub_rtx = NULL;
637 if (code == PARALLEL)
639 for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
641 s_expr = XVECEXP (PATTERN (def_insn), 0, i);
642 if (GET_CODE (s_expr) != SET)
643 continue;
645 if (sub_rtx == NULL)
646 sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
647 else
649 /* PARALLEL with multiple SETs. */
650 return false;
654 else if (code == SET)
655 sub_rtx = &PATTERN (def_insn);
656 else
658 /* It is not a PARALLEL or a SET, what could it be ? */
659 return false;
662 gcc_assert (sub_rtx != NULL);
664 if (GET_CODE (SET_DEST (*sub_rtx)) == REG
665 && GET_MODE (SET_DEST (*sub_rtx)) == SImode)
667 setreg = get_reg_di (SET_DEST (*sub_rtx));
668 return combine_set_zero_extend (def_insn, sub_rtx, setreg);
670 else
671 return false;
672 return true;
675 /* This function goes through all reaching defs of the source
676 of the zero extension instruction (ZERO_EXTEND_INSN) and
677 tries to combine the zero extension with the definition
678 instruction. The changes are made as a group so that even
679 if one definition cannot be merged, all reaching definitions
680 end up not being merged. When a conditional copy is encountered,
681 merging is attempted transitively on its definitions. It returns
682 true upon success and false upon failure. */
684 static bool
685 combine_reaching_defs (rtx zero_extend_insn, rtx set_pat)
687 rtx def_insn;
688 bool merge_successful = true;
689 int i;
690 int defs_ix;
691 int outcome;
693 /* To store the definitions that have been merged. */
695 VEC (rtx, heap) *defs_list, *copies_list, *vec;
696 enum insn_merge_code merge_code;
698 defs_list = VEC_alloc (rtx, heap, 8);
699 copies_list = VEC_alloc (rtx, heap, 8);
701 outcome = make_defs_and_copies_lists (zero_extend_insn,
702 set_pat, &defs_list, &copies_list);
704 /* outcome == 2 implies that all the definitions for this zero_extend were
705 merged while previously when handling other zero_extends. */
707 if (outcome == 2)
709 VEC_free (rtx, heap, defs_list);
710 VEC_free (rtx, heap, copies_list);
711 if (dump_file)
712 fprintf (dump_file, "All definitions have been merged previously.\n");
713 return true;
716 if (outcome == 0)
718 VEC_free (rtx, heap, defs_list);
719 VEC_free (rtx, heap, copies_list);
720 return false;
723 merge_successful = true;
725 /* Go through the defs vector and try to merge all the definitions
726 in this vector. */
728 vec = VEC_alloc (rtx, heap, 8);
729 FOR_EACH_VEC_ELT (rtx, defs_list, defs_ix, def_insn)
731 merge_code = get_insn_status (def_insn);
732 gcc_assert (merge_code == MERGE_NOT_ATTEMPTED);
734 if (merge_def_and_ze (def_insn))
735 VEC_safe_push (rtx, heap, vec, def_insn);
736 else
738 merge_successful = false;
739 break;
743 /* Now go through the conditional copies vector and try to merge all
744 the copies in this vector. */
746 if (merge_successful)
748 FOR_EACH_VEC_ELT (rtx, copies_list, i, def_insn)
750 if (transform_ifelse (def_insn))
752 VEC_safe_push (rtx, heap, vec, def_insn);
754 else
756 merge_successful = false;
757 break;
762 if (merge_successful)
764 /* Commit the changes here if possible */
765 /* XXX : Now, it is an all or nothing scenario. Even if one definition
766 cannot be merged we totally bail. In future, allow zero-extensions to
767 be partially eliminated along those paths where the definitions could
768 be merged. */
770 if (apply_change_group ())
772 if (dump_file)
773 fprintf (dump_file, "All merges were successful ....\n");
775 FOR_EACH_VEC_ELT (rtx, vec, i, def_insn)
777 set_insn_status (def_insn, MERGE_SUCCESS);
780 VEC_free (rtx, heap, vec);
781 VEC_free (rtx, heap, defs_list);
782 VEC_free (rtx, heap, copies_list);
783 return true;
785 else
787 /* Changes need not be cancelled explicitly as apply_change_group
788 does it. Print list of definitions in the dump_file for debug
789 purposes. This zero-extension cannot be deleted. */
791 if (dump_file)
793 FOR_EACH_VEC_ELT (rtx, vec, i, def_insn)
795 fprintf (dump_file, " Ummergable definitions : \n");
796 print_rtl_single (dump_file, def_insn);
801 else
803 /* Cancel any changes that have been made so far. */
804 cancel_changes (0);
807 VEC_free (rtx, heap, vec);
808 VEC_free (rtx, heap, defs_list);
809 VEC_free (rtx, heap, copies_list);
810 return false;
813 /* Carry information about zero-extensions while walking the RTL. */
815 struct zero_extend_info
817 /* The insn where the zero-extension is. */
818 rtx insn;
820 /* The list of candidates. */
821 VEC (rtx, heap) *insn_list;
824 /* Add a zero-extend pattern that could be eliminated. This is called via
825 note_stores from find_removable_zero_extends. */
827 static void
828 add_removable_zero_extend (rtx x ATTRIBUTE_UNUSED, const_rtx expr, void *data)
830 struct zero_extend_info *zei = (struct zero_extend_info *)data;
831 rtx src, dest;
833 /* We are looking for SET (REG:DI N) (ZERO_EXTEND (REG:SI N)). */
834 if (GET_CODE (expr) != SET)
835 return;
837 src = SET_SRC (expr);
838 dest = SET_DEST (expr);
840 if (REG_P (dest)
841 && GET_MODE (dest) == DImode
842 && GET_CODE (src) == ZERO_EXTEND
843 && REG_P (XEXP (src, 0))
844 && GET_MODE (XEXP (src, 0)) == SImode
845 && REGNO (dest) == REGNO (XEXP (src, 0)))
847 if (get_defs (zei->insn, XEXP (src, 0), NULL))
848 VEC_safe_push (rtx, heap, zei->insn_list, zei->insn);
849 else if (dump_file)
851 fprintf (dump_file, "Cannot eliminate zero-extension: \n");
852 print_rtl_single (dump_file, zei->insn);
853 fprintf (dump_file, "No defs. Could be extending parameters.\n");
858 /* Traverse the instruction stream looking for zero-extends and return the
859 list of candidates. */
861 static VEC (rtx,heap)*
862 find_removable_zero_extends (void)
864 struct zero_extend_info zei;
865 basic_block bb;
866 rtx insn;
868 zei.insn_list = VEC_alloc (rtx, heap, 8);
870 FOR_EACH_BB (bb)
871 FOR_BB_INSNS (bb, insn)
873 if (!NONDEBUG_INSN_P (insn))
874 continue;
876 zei.insn = insn;
877 note_stores (PATTERN (insn), add_removable_zero_extend, &zei);
880 return zei.insn_list;
883 /* This is the main function that checks the insn stream for redundant
884 zero extensions and tries to remove them if possible. */
886 static unsigned int
887 find_and_remove_ze (void)
889 rtx curr_insn = NULL_RTX;
890 int i;
891 int ix;
892 long long num_realized = 0;
893 long long num_ze_opportunities = 0;
894 VEC (rtx, heap) *zeinsn_list;
895 VEC (rtx, heap) *zeinsn_del_list;
897 /* Construct DU chain to get all reaching definitions of each
898 zero-extension instruction. */
900 df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
901 df_analyze ();
903 max_insn_uid = get_max_uid ();
905 is_insn_merge_attempted
906 = XNEWVEC (enum insn_merge_code,
907 sizeof (enum insn_merge_code) * max_insn_uid);
909 for (i = 0; i < max_insn_uid; i++)
910 is_insn_merge_attempted[i] = MERGE_NOT_ATTEMPTED;
912 num_ze_opportunities = num_realized = 0;
914 zeinsn_del_list = VEC_alloc (rtx, heap, 4);
916 zeinsn_list = find_removable_zero_extends ();
918 FOR_EACH_VEC_ELT (rtx, zeinsn_list, ix, curr_insn)
920 num_ze_opportunities++;
921 /* Try to combine the zero-extends with the definition here. */
923 if (dump_file)
925 fprintf (dump_file, "Trying to eliminate zero extension : \n");
926 print_rtl_single (dump_file, curr_insn);
929 if (combine_reaching_defs (curr_insn, PATTERN (curr_insn)))
931 if (dump_file)
932 fprintf (dump_file, "Eliminated the zero extension...\n");
933 num_realized++;
934 VEC_safe_push (rtx, heap, zeinsn_del_list, curr_insn);
938 /* Delete all useless zero extensions here in one sweep. */
939 FOR_EACH_VEC_ELT (rtx, zeinsn_del_list, ix, curr_insn)
940 delete_insn (curr_insn);
942 free (is_insn_merge_attempted);
943 VEC_free (rtx, heap, zeinsn_list);
944 VEC_free (rtx, heap, zeinsn_del_list);
946 if (dump_file && num_ze_opportunities > 0)
947 fprintf (dump_file, "\n %s : num_zee_opportunities = %lld "
948 "num_realized = %lld \n",
949 current_function_name (),
950 num_ze_opportunities, num_realized);
952 df_finish_pass (false);
953 return 0;
956 /* Find and remove redundant zero extensions. */
958 static unsigned int
959 rest_of_handle_zee (void)
961 timevar_push (TV_ZEE);
962 find_and_remove_ze ();
963 timevar_pop (TV_ZEE);
964 return 0;
967 /* Run zee pass when flag_zee is set at optimization level > 0. */
969 static bool
970 gate_handle_zee (void)
972 return (optimize > 0 && flag_zee);
975 struct rtl_opt_pass pass_implicit_zee =
978 RTL_PASS,
979 "zee", /* name */
980 gate_handle_zee, /* gate */
981 rest_of_handle_zee, /* execute */
982 NULL, /* sub */
983 NULL, /* next */
984 0, /* static_pass_number */
985 TV_ZEE, /* tv_id */
986 0, /* properties_required */
987 0, /* properties_provided */
988 0, /* properties_destroyed */
989 0, /* todo_flags_start */
990 TODO_ggc_collect |
991 TODO_verify_rtl_sharing, /* todo_flags_finish */