Remove outermost loop parameter.
[official-gcc/graphite-test-results.git] / gcc / implicit-zee.c
blob3344d7f8af2d2c738a62e0f9bcb7c1be426e1278
1 /* Redundant Zero-extension elimination for targets that implicitly
2 zero-extend writes to the lower 32-bit portion of 64-bit registers.
3 Copyright (C) 2010 Free Software Foundation, Inc.
4 Contributed by Sriraman Tallam (tmsriram@google.com) and
5 Silvius Rus (rus@google.com)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
24 /* Problem Description :
25 --------------------
26 This pass is intended to be applicable only to targets that implicitly
27 zero-extend 64-bit registers after writing to their lower 32-bit half.
28 For instance, x86_64 zero-extends the upper bits of a register
29 implicitly whenever an instruction writes to its lower 32-bit half.
30 For example, the instruction *add edi,eax* also zero-extends the upper
31 32-bits of rax after doing the addition. These zero extensions come
32 for free and GCC does not always exploit this well. That is, it has
33 been observed that there are plenty of cases where GCC explicitly
34 zero-extends registers for x86_64 that are actually useless because
35 these registers were already implicitly zero-extended in a prior
36 instruction. This pass tries to eliminate such useless zero extension
37 instructions.
39 How does this pass work ?
40 --------------------------
42 This pass is run after register allocation. Hence, all registers that
43 this pass deals with are hard registers. This pass first looks for a
44 zero-extension instruction that could possibly be redundant. Such zero
45 extension instructions show up in RTL with the pattern :
46 (set (reg:DI x) (zero_extend:DI (reg:SI x))).
47 where x can be any one of the 64-bit hard registers.
48 Now, this pass tries to eliminate this instruction by merging the
49 zero-extension with the definitions of register x. For instance, if
50 one of the definitions of register x was :
51 (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
52 then the combination converts this into :
53 (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
54 If all the merged definitions are recognizable assembly instructions,
55 the zero-extension is effectively eliminated. For example, in x86_64,
56 implicit zero-extensions are captured with appropriate patterns in the
57 i386.md file. Hence, these merged definition can be matched to a single
58 assembly instruction. The original zero-extension instruction is then
59 deleted if all the definitions can be merged.
61 However, there are cases where the definition instruction cannot be
62 merged with a zero-extend. Examples are CALL instructions. In such
63 cases, the original zero extension is not redundant and this pass does
64 not delete it.
66 Handling conditional moves :
67 ----------------------------
69 Architectures like x86_64 support conditional moves whose semantics for
70 zero-extension differ from the other instructions. For instance, the
71 instruction *cmov ebx, eax*
72 zero-extends eax onto rax only when the move from ebx to eax happens.
73 Otherwise, eax may not be zero-extended. Conditional moves appear as
74 RTL instructions of the form
75 (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
76 This pass tries to merge a zero-extension with a conditional move by
77 actually merging the defintions of y and z with a zero-extend and then
78 converting the conditional move into :
79 (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
80 Since registers y and z are zero-extended, register x will also be
81 zero-extended after the conditional move. Note that this step has to
82 be done transitively since the definition of a conditional copy can be
83 another conditional copy.
85 Motivating Example I :
86 ---------------------
87 For this program :
88 **********************************************
89 bad_code.c
91 int mask[1000];
93 int foo(unsigned x)
95 if (x < 10)
96 x = x * 45;
97 else
98 x = x * 78;
99 return mask[x];
101 **********************************************
103 $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination.)
104 ........
105 400315: b8 4e 00 00 00 mov $0x4e,%eax
106 40031a: 0f af f8 imul %eax,%edi
107 40031d: 89 ff mov %edi,%edi ---> Useless extend.
108 40031f: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
109 400326: c3 retq
110 ......
111 400330: ba 2d 00 00 00 mov $0x2d,%edx
112 400335: 0f af fa imul %edx,%edi
113 400338: 89 ff mov %edi,%edi ---> Useless extend.
114 40033a: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
115 400341: c3 retq
117 $ gcc -O2 -fzee bad_code.c
118 ......
119 400315: 6b ff 4e imul $0x4e,%edi,%edi
120 400318: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
121 40031f: c3 retq
122 400320: 6b ff 2d imul $0x2d,%edi,%edi
123 400323: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
124 40032a: c3 retq
126 Motivating Example II :
127 ---------------------
129 Here is an example with a conditional move.
131 For this program :
132 **********************************************
134 unsigned long long foo(unsigned x , unsigned y)
136 unsigned z;
137 if (x > 100)
138 z = x + y;
139 else
140 z = x - y;
141 return (unsigned long long)(z);
144 $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination.)
145 ............
146 400360: 8d 14 3e lea (%rsi,%rdi,1),%edx
147 400363: 89 f8 mov %edi,%eax
148 400365: 29 f0 sub %esi,%eax
149 400367: 83 ff 65 cmp $0x65,%edi
150 40036a: 0f 43 c2 cmovae %edx,%eax
151 40036d: 89 c0 mov %eax,%eax ---> Useless extend.
152 40036f: c3 retq
154 $ gcc -O2 -fzee bad_code.c
155 .............
156 400360: 89 fa mov %edi,%edx
157 400362: 8d 04 3e lea (%rsi,%rdi,1),%eax
158 400365: 29 f2 sub %esi,%edx
159 400367: 83 ff 65 cmp $0x65,%edi
160 40036a: 89 d6 mov %edx,%esi
161 40036c: 48 0f 42 c6 cmovb %rsi,%rax
162 400370: c3 retq
165 Usefulness :
166 ----------
168 This pass reduces the dynamic instruction count of a compression benchmark by
169 2.8% and improves its run-time by about 1%. The compression benchmark had the
170 following code sequence in a very hot region of code before ZEE optimized it :
172 shr $0x5, %edx
173 mov %edx, %edx --> Useless zero-extend.
175 How to turn on ?
176 ----------------
177 -fzee -O2. */
180 #include "config.h"
181 #include "system.h"
182 #include "coretypes.h"
183 #include "tm.h"
184 #include "rtl.h"
185 #include "tree.h"
186 #include "tm_p.h"
187 #include "flags.h"
188 #include "regs.h"
189 #include "hard-reg-set.h"
190 #include "basic-block.h"
191 #include "insn-config.h"
192 #include "function.h"
193 #include "expr.h"
194 #include "insn-attr.h"
195 #include "recog.h"
196 #include "toplev.h"
197 #include "target.h"
198 #include "timevar.h"
199 #include "optabs.h"
200 #include "insn-codes.h"
201 #include "rtlhooks-def.h"
202 /* Include output.h for dump_file. */
203 #include "output.h"
204 #include "params.h"
205 #include "timevar.h"
206 #include "tree-pass.h"
207 #include "df.h"
208 #include "cgraph.h"
210 /* This says if a register is newly created for the purpose of
211 zero-extension. */
213 enum insn_merge_code
215 MERGE_NOT_ATTEMPTED = 0,
216 MERGE_SUCCESS
219 /* This says if a INSN UID or its definition has already been merged
220 with a zero-extend or not. */
222 static enum insn_merge_code *is_insn_merge_attempted;
223 static int max_insn_uid;
225 /* Returns the merge code status for INSN. */
227 static enum insn_merge_code
228 get_insn_status (rtx insn)
230 gcc_assert (INSN_UID (insn) < max_insn_uid);
231 return is_insn_merge_attempted[INSN_UID (insn)];
234 /* Sets the merge code status of INSN to CODE. */
236 static void
237 set_insn_status (rtx insn, enum insn_merge_code code)
239 gcc_assert (INSN_UID (insn) < max_insn_uid);
240 is_insn_merge_attempted[INSN_UID (insn)] = code;
243 /* Check to see if this zero-extend matches a pattern
244 that could be eliminated. This is called via
245 for_each_rtx in function find_and_remove_ze. */
247 static int
248 is_set_with_extension_DI (rtx *expr, void *data)
250 /* Looking only for patterns of the type :
251 SET (REG:DI X) (ZERO_EXTEND (REG:SI x))
254 if (GET_CODE (*expr) == SET
255 && GET_MODE (SET_DEST (*expr)) == DImode
256 && GET_CODE (SET_DEST (*expr)) == REG
257 && GET_CODE (SET_SRC (*expr)) == ZERO_EXTEND
258 && GET_CODE (XEXP (SET_SRC (*expr),0)) == REG
259 && GET_MODE (XEXP (SET_SRC (*expr),0)) == SImode
260 && REGNO (SET_DEST (*expr)) == REGNO (XEXP (SET_SRC (*expr),0)))
262 *(rtx **)(data) = expr;
263 return 1;
265 return 0;
268 /* Given a insn (CURR_INSN) and a pointer to the SET rtx (ORIG_SET)
269 that needs to be modified, this code modifies the SET rtx to a
270 new SET rtx that zero_extends the right hand expression into a DImode
271 register (NEWREG) on the left hand side. Note that multiple
272 assumptions are made about the nature of the set that needs
273 to be true for this to work and is called from merge_def_and_ze.
275 Original :
276 (set (reg:SI a) (expression))
278 Transform :
279 (set (reg:DI a) (zero_extend (expression)))
281 Special Cases :
282 If the expression is a constant or another zero_extend directly
283 assign it to the DI mode register. */
285 static bool
286 combine_set_zero_extend (rtx curr_insn, rtx *orig_set, rtx newreg)
288 rtx temp_extension, simplified_temp_extension, new_set, new_const_int;
289 rtx orig_src;
290 HOST_WIDE_INT val;
291 unsigned int mask, delta_width;
293 /* Change the SET rtx and validate it. */
294 orig_src = SET_SRC (*orig_set);
295 new_set = NULL_RTX;
297 /* The right hand side can also be VOIDmode. These cases have to be
298 handled differently. */
300 if (GET_MODE (orig_src) != SImode)
302 /* Merge constants by directly moving the constant into the
303 DImode register under some conditions. */
305 if (GET_CODE (orig_src) == CONST_INT
306 && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (SImode))
308 if (INTVAL (orig_src) >= 0)
309 new_set = gen_rtx_SET (VOIDmode, newreg, orig_src);
310 else if (INTVAL (orig_src) < 0)
312 /* Zero-extending a negative SImode integer into DImode
313 makes it a positive integer. Convert the given negative
314 integer into the appropriate integer when zero-extended. */
316 delta_width = HOST_BITS_PER_WIDE_INT - GET_MODE_BITSIZE (SImode);
317 mask = (~(unsigned HOST_WIDE_INT) 0) >> delta_width;
318 val = INTVAL (orig_src);
319 val = val & mask;
320 new_const_int = gen_rtx_CONST_INT (VOIDmode, val);
321 new_set = gen_rtx_SET (VOIDmode, newreg, new_const_int);
323 else
324 return false;
326 else
328 /* This is mostly due to a call insn that should not be
329 optimized. */
331 return false;
334 else if (GET_CODE (orig_src) == ZERO_EXTEND)
336 /* Here a zero-extend is used to get to SI. Why not make it
337 all the way till DI. */
339 temp_extension = gen_rtx_ZERO_EXTEND (DImode, XEXP (orig_src, 0));
340 simplified_temp_extension = simplify_rtx (temp_extension);
341 if (simplified_temp_extension)
342 temp_extension = simplified_temp_extension;
343 new_set = gen_rtx_SET (VOIDmode, newreg, temp_extension);
345 else if (GET_CODE (orig_src) == IF_THEN_ELSE)
347 /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise,
348 in general, IF_THEN_ELSE should not be combined. */
350 return false;
352 else
354 /* This is the normal case we expect. */
356 temp_extension = gen_rtx_ZERO_EXTEND (DImode, orig_src);
357 simplified_temp_extension = simplify_rtx (temp_extension);
358 if (simplified_temp_extension)
359 temp_extension = simplified_temp_extension;
360 new_set = gen_rtx_SET (VOIDmode, newreg, temp_extension);
363 gcc_assert (new_set != NULL_RTX);
365 /* This change is a part of a group of changes. Hence,
366 validate_change will not try to commit the change. */
368 if (validate_change (curr_insn, orig_set, new_set, true))
370 if (dump_file)
372 fprintf (dump_file, "Merged Instruction with ZERO_EXTEND:\n");
373 print_rtl_single (dump_file, curr_insn);
375 return true;
377 return false;
380 /* This returns the DI mode for the SI register REG_SI. */
382 static rtx
383 get_reg_di (rtx reg_si)
385 rtx newreg;
387 newreg = gen_rtx_REG (DImode, REGNO (reg_si));
388 gcc_assert (newreg);
389 return newreg;
392 /* Treat if_then_else insns, where the operands of both branches
393 are registers, as copies. For instance,
394 Original :
395 (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
396 Transformed :
397 (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
398 DEF_INSN is the if_then_else insn. */
400 static bool
401 transform_ifelse (rtx def_insn)
403 rtx set_insn = PATTERN (def_insn);
404 rtx srcreg, dstreg, srcreg2;
405 rtx map_srcreg, map_dstreg, map_srcreg2;
406 rtx ifexpr;
407 rtx cond;
408 rtx new_set;
410 gcc_assert (GET_CODE (set_insn) == SET);
411 cond = XEXP (SET_SRC (set_insn), 0);
412 dstreg = SET_DEST (set_insn);
413 srcreg = XEXP (SET_SRC (set_insn), 1);
414 srcreg2 = XEXP (SET_SRC (set_insn), 2);
415 map_srcreg = get_reg_di (srcreg);
416 map_srcreg2 = get_reg_di (srcreg2);
417 map_dstreg = get_reg_di (dstreg);
418 ifexpr = gen_rtx_IF_THEN_ELSE (DImode, cond, map_srcreg, map_srcreg2);
419 new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr);
421 if (validate_change (def_insn, &PATTERN (def_insn), new_set, true))
423 if (dump_file)
425 fprintf (dump_file, "Cond_Move Instruction's mode extended :\n");
426 print_rtl_single (dump_file, def_insn);
428 return true;
430 else
431 return false;
434 /* Function to get all the immediate definitions of an instruction.
435 The reaching definitions are desired for WHICH_REG used in
436 CURR_INSN. This function returns 0 if there was an error getting
437 a definition. Upon success, this function returns the number of
438 definitions and stores the definitions in DEST. */
440 static int
441 get_defs (rtx curr_insn, rtx which_reg, VEC (rtx,heap) **dest)
443 df_ref reg_info, *defs;
444 struct df_link *def_chain;
445 int n_refs = 0;
447 defs = DF_INSN_USES (curr_insn);
448 reg_info = NULL;
450 while (*defs)
452 reg_info = *defs;
453 if (GET_CODE (DF_REF_REG (reg_info)) == SUBREG)
454 return 0;
455 if (REGNO (DF_REF_REG (reg_info)) == REGNO (which_reg))
456 break;
457 defs++;
460 gcc_assert (reg_info != NULL && defs != NULL);
461 def_chain = DF_REF_CHAIN (reg_info);
463 while (def_chain)
465 /* Problem getting some definition for this instruction. */
467 if (def_chain->ref == NULL)
468 return 0;
469 if (DF_REF_INSN_INFO (def_chain->ref) == NULL)
470 return 0;
471 def_chain = def_chain->next;
474 def_chain = DF_REF_CHAIN (reg_info);
476 if (dest == NULL)
477 return 1;
479 while (def_chain)
481 VEC_safe_push (rtx, heap, *dest, DF_REF_INSN (def_chain->ref));
482 def_chain = def_chain->next;
483 n_refs++;
485 return n_refs;
488 /* rtx function to check if this SET insn, EXPR, is a conditional copy insn :
489 (set (reg:SI a ) (IF_THEN_ELSE (cond) (reg:SI b) (reg:SI c)))
490 Called from is_insn_cond_copy. DATA stores the two registers on each
491 side of the condition. */
493 static int
494 is_this_a_cmove (rtx expr, void *data)
496 /* Check for conditional (if-then-else) copy. */
498 if (GET_CODE (expr) == SET
499 && GET_CODE (SET_DEST (expr)) == REG
500 && GET_MODE (SET_DEST (expr)) == SImode
501 && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE
502 && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
503 && GET_MODE (XEXP (SET_SRC (expr), 1)) == SImode
504 && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG
505 && GET_MODE (XEXP (SET_SRC (expr), 2)) == SImode)
507 ((rtx *)data)[0] = XEXP (SET_SRC (expr), 1);
508 ((rtx *)data)[1] = XEXP (SET_SRC (expr), 2);
509 return 1;
511 return 0;
514 /* This returns 1 if it found
515 (SET (reg:SI REGNO (def_reg)) (if_then_else (cond) (REG:SI x1) (REG:SI x2)))
516 in the DEF_INSN pattern. It stores the x1 and x2 in COPY_REG_1
517 and COPY_REG_2. */
519 static int
520 is_insn_cond_copy (rtx def_insn, rtx *copy_reg_1, rtx *copy_reg_2)
522 int type;
523 rtx set_expr;
524 rtx srcreg[2];
526 srcreg[0] = NULL_RTX;
527 srcreg[1] = NULL_RTX;
529 set_expr = single_set (def_insn);
531 if (set_expr == NULL_RTX)
532 return 0;
534 type = is_this_a_cmove (set_expr, (void *) srcreg);
536 if (type)
538 *copy_reg_1 = srcreg[0];
539 *copy_reg_2 = srcreg[1];
540 return type;
543 return 0;
546 /* Reaching Definitions of the zero-extended register could be conditional
547 copies or regular definitions. This function separates the two types into
548 two lists, DEFS_LIST and COPIES_LIST. This is necessary because, if a
549 reaching definition is a conditional copy, combining the zero_extend with
550 this definition is wrong. Conditional copies are merged by transitively
551 merging its definitions. The defs_list is populated with all the reaching
552 definitions of the zero-extension instruction (ZERO_EXTEND_INSN) which must
553 be merged with a zero_extend. The copies_list contains all the conditional
554 moves that will later be extended into a DI mode conditonal move if all the
555 merges are successful. The function returns false when there is a failure
556 in getting some definitions, like that of parameters. It returns 1 upon
557 success, 0 upon failure and 2 when all definitions of the ZERO_EXTEND_INSN
558 were merged previously. */
560 static int
561 make_defs_and_copies_lists (rtx zero_extend_insn, rtx set_pat,
562 VEC (rtx,heap) **defs_list,
563 VEC (rtx,heap) **copies_list)
565 bool *is_insn_visited;
566 VEC (rtx,heap) *work_list;
567 rtx srcreg, copy_reg_1, copy_reg_2;
568 rtx def_insn;
569 int n_defs = 0;
570 int vec_index = 0;
571 int n_worklist = 0;
572 int i, is_copy;
574 srcreg = XEXP (SET_SRC (set_pat), 0);
575 work_list = VEC_alloc (rtx, heap, 8);
577 /* Initialize the Work List */
578 n_worklist = get_defs (zero_extend_insn, srcreg, &work_list);
580 if (n_worklist == 0)
582 VEC_free (rtx, heap, work_list);
583 /* The number of defs being equal to zero can only imply that all of its
584 definitions have been previously merged. */
585 return 2;
588 is_insn_visited = XNEWVEC (bool, max_insn_uid);
590 for (i = 0; i < max_insn_uid; i++)
591 is_insn_visited[i] = false;
594 /* Perform transitive closure for conditional copies. */
595 while (n_worklist > vec_index)
597 def_insn = VEC_index (rtx, work_list, vec_index);
598 gcc_assert (INSN_UID (def_insn) < max_insn_uid);
600 if (is_insn_visited[INSN_UID (def_insn)])
602 vec_index++;
603 continue;
606 is_insn_visited[INSN_UID (def_insn)] = true;
607 copy_reg_1 = copy_reg_2 = NULL_RTX;
608 is_copy = is_insn_cond_copy (def_insn, &copy_reg_1, &copy_reg_2);
609 if (is_copy)
611 gcc_assert (copy_reg_1 && copy_reg_2);
613 /* Push it into the copy list first. */
615 VEC_safe_push (rtx, heap, *copies_list, def_insn);
617 /* Perform transitive closure here */
619 n_defs = get_defs (def_insn, copy_reg_1, &work_list);
621 if (n_defs == 0)
623 VEC_free (rtx, heap, work_list);
624 XDELETEVEC (is_insn_visited);
625 return 0;
627 n_worklist += n_defs;
629 n_defs = get_defs (def_insn, copy_reg_2, &work_list);
630 if (n_defs == 0)
632 VEC_free (rtx, heap, work_list);
633 XDELETEVEC (is_insn_visited);
634 return 0;
636 n_worklist += n_defs;
638 else
640 VEC_safe_push (rtx, heap, *defs_list, def_insn);
642 vec_index++;
645 VEC_free (rtx, heap, work_list);
646 XDELETEVEC (is_insn_visited);
647 return 1;
650 /* Merge the DEF_INSN with a zero-extend. Calls combine_set_zero_extend
651 on the SET pattern. */
653 static bool
654 merge_def_and_ze (rtx def_insn)
656 enum rtx_code code;
657 rtx setreg;
658 rtx *sub_rtx;
659 rtx s_expr;
660 int i;
662 code = GET_CODE (PATTERN (def_insn));
663 sub_rtx = NULL;
665 if (code == PARALLEL)
667 for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
669 s_expr = XVECEXP (PATTERN (def_insn), 0, i);
670 if (GET_CODE (s_expr) != SET)
671 continue;
673 if (sub_rtx == NULL)
674 sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
675 else
677 /* PARALLEL with multiple SETs. */
678 return false;
682 else if (code == SET)
683 sub_rtx = &PATTERN (def_insn);
684 else
686 /* It is not a PARALLEL or a SET, what could it be ? */
687 return false;
690 gcc_assert (sub_rtx != NULL);
692 if (GET_CODE (SET_DEST (*sub_rtx)) == REG
693 && GET_MODE (SET_DEST (*sub_rtx)) == SImode)
695 setreg = get_reg_di (SET_DEST (*sub_rtx));
696 return combine_set_zero_extend (def_insn, sub_rtx, setreg);
698 else
699 return false;
700 return true;
703 /* This function goes through all reaching defs of the source
704 of the zero extension instruction (ZERO_EXTEND_INSN) and
705 tries to combine the zero extension with the definition
706 instruction. The changes are made as a group so that even
707 if one definition cannot be merged, all reaching definitions
708 end up not being merged. When a conditional copy is encountered,
709 merging is attempted transitively on its definitions. It returns
710 true upon success and false upon failure. */
712 static bool
713 combine_reaching_defs (rtx zero_extend_insn, rtx set_pat)
715 rtx def_insn;
716 bool merge_successful = true;
717 int i;
718 int defs_ix;
719 int outcome;
721 /* To store the definitions that have been merged. */
723 VEC (rtx, heap) *defs_list, *copies_list, *vec;
724 enum insn_merge_code merge_code;
726 defs_list = VEC_alloc (rtx, heap, 8);
727 copies_list = VEC_alloc (rtx, heap, 8);
729 outcome = make_defs_and_copies_lists (zero_extend_insn,
730 set_pat, &defs_list, &copies_list);
732 /* outcome == 2 implies that all the definitions for this zero_extend were
733 merged while previously when handling other zero_extends. */
735 if (outcome == 2)
737 VEC_free (rtx, heap, defs_list);
738 VEC_free (rtx, heap, copies_list);
739 if (dump_file)
740 fprintf (dump_file, "All definitions have been merged previously...\n");
741 return true;
744 if (outcome == 0)
746 VEC_free (rtx, heap, defs_list);
747 VEC_free (rtx, heap, copies_list);
748 return false;
751 merge_successful = true;
753 /* Go through the defs vector and try to merge all the definitions
754 in this vector. */
756 vec = VEC_alloc (rtx, heap, 8);
757 for (defs_ix = 0; VEC_iterate (rtx, defs_list, defs_ix, def_insn); defs_ix++)
759 merge_code = get_insn_status (def_insn);
760 gcc_assert (merge_code == MERGE_NOT_ATTEMPTED);
762 if (merge_def_and_ze (def_insn))
763 VEC_safe_push (rtx, heap, vec, def_insn);
764 else
766 merge_successful = false;
767 break;
771 /* Now go through the conditional copies vector and try to merge all
772 the copies in this vector. */
774 if (merge_successful)
776 for (i = 0; VEC_iterate (rtx, copies_list, i, def_insn); i++)
778 if (transform_ifelse (def_insn))
780 VEC_safe_push (rtx, heap, vec, def_insn);
782 else
784 merge_successful = false;
785 break;
790 if (merge_successful)
792 /* Commit the changes here if possible */
793 /* XXX : Now, it is an all or nothing scenario. Even if one definition
794 cannot be merged we totally bail. In future, allow zero-extensions to
795 be partially eliminated along those paths where the definitions could
796 be merged. */
798 if (apply_change_group ())
800 if (dump_file)
801 fprintf (dump_file, "All merges were successful ....\n");
803 for (i = 0; VEC_iterate (rtx, vec, i, def_insn); i++)
805 set_insn_status (def_insn, MERGE_SUCCESS);
808 VEC_free (rtx, heap, vec);
809 VEC_free (rtx, heap, defs_list);
810 VEC_free (rtx, heap, copies_list);
811 return true;
813 else
815 /* Changes need not be cancelled explicitly as apply_change_group ()
816 does it. Print list of definitions in the dump_file for debug
817 purposes. This zero-extension cannot be deleted. */
819 if (dump_file)
821 for (i = 0; VEC_iterate (rtx, vec, i, def_insn); i++)
823 fprintf (dump_file, " Ummergable definitions : \n");
824 print_rtl_single (dump_file, def_insn);
829 else
831 /* Cancel any changes that have been made so far. */
832 cancel_changes (0);
835 VEC_free (rtx, heap, vec);
836 VEC_free (rtx, heap, defs_list);
837 VEC_free (rtx, heap, copies_list);
838 return false;
841 /* Goes through the instruction stream looking for zero-extends. If the zero
842 extension instruction has atleast one def it adds it to a list of possible
843 candidates for deletion. It returns the list of candidates. */
845 static VEC (rtx,heap)*
846 find_removable_zero_extends (void)
848 VEC (rtx, heap) *zeinsn_list;
849 basic_block curr_block;
850 rtx curr_insn;
851 rtx *set_insn;
852 rtx which_reg;
853 int type ;
854 int has_defs;
856 zeinsn_list = VEC_alloc (rtx, heap, 8);
857 FOR_EACH_BB (curr_block)
859 FOR_BB_INSNS (curr_block, curr_insn)
861 if (!INSN_P (curr_insn))
862 continue;
864 type = for_each_rtx (&PATTERN (curr_insn),
865 is_set_with_extension_DI,
866 (void *)&set_insn);
868 if (!type)
869 continue;
871 which_reg = XEXP (SET_SRC (*set_insn), 0);
872 has_defs = get_defs (curr_insn, which_reg, NULL);
873 if (has_defs)
874 VEC_safe_push (rtx, heap, zeinsn_list, curr_insn);
875 else if (dump_file)
877 fprintf (dump_file, "Cannot eliminate zero extension : \n");
878 print_rtl_single (dump_file, curr_insn);
879 fprintf (dump_file,
880 "This has no defs. Could be extending parameters.\n");
884 return zeinsn_list;
887 /* This is the main function that checks the insn stream for redundant
888 zero extensions and tries to remove them if possible. */
890 static unsigned int
891 find_and_remove_ze (void)
893 rtx curr_insn = NULL_RTX;
894 int i;
895 int ix;
896 long long num_realized = 0;
897 long long num_ze_opportunities = 0;
898 VEC (rtx, heap) *zeinsn_list;
899 VEC (rtx, heap) *zeinsn_del_list;
901 /* Construct DU chain to get all reaching definitions of each
902 zero-extension instruction. */
904 df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
905 df_analyze ();
907 max_insn_uid = get_max_uid ();
909 is_insn_merge_attempted = XNEWVEC (enum insn_merge_code,
910 sizeof (enum insn_merge_code)* max_insn_uid);
912 for (i = 0; i < max_insn_uid; i++)
914 is_insn_merge_attempted[i] = MERGE_NOT_ATTEMPTED;
917 num_ze_opportunities = num_realized = 0;
919 zeinsn_del_list = VEC_alloc (rtx, heap, 4);
921 zeinsn_list = find_removable_zero_extends ();
923 for (ix = 0; VEC_iterate (rtx, zeinsn_list, ix, curr_insn); ix++)
925 num_ze_opportunities++;
926 /* Try to combine the zero-extends with the definition here. */
928 if (dump_file)
930 fprintf (dump_file, "Trying to eliminate zero extension : \n");
931 print_rtl_single (dump_file, curr_insn);
934 if (combine_reaching_defs (curr_insn, PATTERN (curr_insn)))
936 if (dump_file)
937 fprintf (dump_file, "Eliminated the zero extension...\n");
938 num_realized++;
939 VEC_safe_push (rtx, heap, zeinsn_del_list, curr_insn);
943 /* Delete all useless zero extensions here in one sweep. */
944 for (ix = 0; VEC_iterate (rtx, zeinsn_del_list, ix, curr_insn); ix++)
946 delete_insn (curr_insn);
949 free (is_insn_merge_attempted);
950 VEC_free (rtx, heap, zeinsn_list);
951 VEC_free (rtx, heap, zeinsn_del_list);
953 if (dump_file && num_ze_opportunities > 0)
954 fprintf (dump_file, "\n %s : num_zee_opportunities = %lld "
955 "num_realized = %lld \n",
956 current_function_name (),
957 num_ze_opportunities, num_realized);
959 df_finish_pass (false);
960 return 0;
963 /* Find and remove redundant zero extensions. */
965 static unsigned int
966 rest_of_handle_zee (void)
968 timevar_push (TV_ZEE);
969 find_and_remove_ze ();
970 timevar_pop (TV_ZEE);
971 return 0;
974 /* Run zee pass when flag_zee is set at optimization level > 0. */
976 static bool
977 gate_handle_zee (void)
979 return (optimize > 0 && flag_zee);
982 struct rtl_opt_pass pass_implicit_zee =
985 RTL_PASS,
986 "zee", /* name */
987 gate_handle_zee, /* gate */
988 rest_of_handle_zee, /* execute */
989 NULL, /* sub */
990 NULL, /* next */
991 0, /* static_pass_number */
992 TV_ZEE, /* tv_id */
993 0, /* properties_required */
994 0, /* properties_provided */
995 0, /* properties_destroyed */
996 0, /* todo_flags_start */
997 TODO_ggc_collect |
998 TODO_dump_func |
999 TODO_verify_rtl_sharing, /* todo_flags_finish */