1 /* Sign extension elimination optimization for GNU compiler.
2 Copyright (C) 2005, 2007 Free Software Foundation, Inc.
3 Contributed by Leehod Baruch <leehod@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 -Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>.
23 In order to support 32bit computations on a 64bit machine, sign
24 extension instructions are generated to ensure the correctness of
26 A possible policy (as currently implemented) is to generate a sign
27 extension right after each 32bit computation.
28 Depending on the instruction set of the architecture, some of these
29 sign extension instructions may be redundant.
30 There are two cases in which the extension may be redundant:
33 The instruction that uses the 64bit operands that are sign
34 extended has a dual mode that works with 32bit operands.
44 cmpd a, b --> cmpw a, b //half word compare
47 The instruction that defines the 64bit operand (which is later sign
48 extended) has a dual mode that defines and sign-extends simultaneously
49 a 32bit operand. For example:
53 ld a --> lwa a // load half word and sign extend
59 General idea for solution:
60 --------------------------
61 First, try to merge the sign extension with the instruction that
62 defines the source of the extension and (separately) with the
63 instructions that uses the extended result. By doing this, both cases
64 of redundancies (as described above) will be eliminated.
66 Then, use partial redundancy elimination to place the non redundant
67 ones at optimal placements.
70 Implementation by example:
71 --------------------------
72 Note: The instruction stream is not changed till the last phase.
74 Phase 0: Initial code, as currently generated by gcc.
87 set ((reg:SI 10) (..def1rhs..))
88 set ((reg:DI 100) (sign_extend:DI (reg:SI 10)))
91 set ((reg:DI 100) (const_int 7))
94 set ((reg:SI 20) (..def3rhs..))
95 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
98 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
101 set ((...) (reg:DI 100))
103 Phase 1: Propagate extensions to uses.
118 From here, all of the subregs are lowpart !
120 def1, def2, def3: No change.
123 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
124 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
127 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
128 set ((...) (reg:DI 100))
131 Phase 2: Merge and eliminate locally redundant extensions.
147 The instructions that were changed at this phase are marked with
151 Remove the sign extension instruction, modify def1 and
152 insert a move instruction to assure to correctness of the code.
153 set ((subreg:SI (reg:DI 100)) (..def1rhs..))
154 set ((reg:SI 10) (subreg:SI (reg:DI 100)))
156 def2 + se: There is no need for merge.
157 Def2 is not changed but a sign extension instruction is
159 set ((reg:DI 100) (const_int 7))
160 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
162 *def3 + se3: Merge succeeded.
163 set ((reg:DI 100) (sign_extend:DI (..def3rhs..)))
164 set ((reg:SI 20) (reg:DI 100))
165 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
166 (The extension instruction is the original one).
168 *use1: Merge succeeded. Remove the sign extension instruction.
170 (compare:CC (subreg:SI (reg:DI 100)) (...)))
172 use2, use3: Merge failed. No change.
174 use4: The extension is locally redundant, therefore it is eliminated
178 Phase 3: Eliminate globally redundant extensions.
180 Following the LCM output:
195 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
198 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
201 Phase 4: Commit changes to the insn stream.
204 def1 def3 *def1 def2 *def3
205 se1 def2 se3 [se removed] [se removed]
207 | \ | / | ------> | \ | / |
208 | \ | / | ------> | se | / |
212 use1 use2 use3 *use1 use2 use3
215 The instructions that were changed during the whole optimization are
216 marked with asterisk.
221 [ set ((reg:SI 10) (..def1rhs..)) ] - Deleted
222 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 10))) ] - Deleted
223 set ((subreg:SI (reg:DI 100)) (..def3rhs..)) - Inserted
224 set ((reg:SI 10) (subreg:SI (reg:DI 100))) - Inserted
227 set ((reg:DI 100) (const_int 7)) - No change
230 [ set ((reg:SI 20) (..def3rhs..)) ] - Deleted
231 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) ] - Deleted
232 set ((reg:DI 100) (sign_extend:DI (..def3rhs..))) - Inserted
233 set ((reg:SI 20) (reg:DI 100)) - Inserted
236 [ set ((reg:CC...) (compare:CC (reg:DI 100) (...))) ] - Deleted
237 set ((reg:CC...) - Inserted
238 (compare:CC (subreg:SI (reg:DI 100)) (...)))
241 set ((...) (reg:DI 100)) - No change
244 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
246 Note: Most of the simple move instructions that were inserted will be
247 trivially dead and therefore eliminated.
249 The implementation outline:
250 ---------------------------
252 A web is RELEVANT if at the end of phase 1, his leader's
253 relevancy is {ZERO, SIGN}_EXTENDED_DEF. The source_mode of
254 the web is the source_mode of his leader.
255 A definition is a candidate for the optimization if it is part
256 of a RELEVANT web and his local source_mode is not narrower
257 then the source_mode of its web.
258 A use is a candidate for the optimization if it is part of a
260 A simple explicit extension is a single set instruction that
261 extends a register (or a subregister) to a register (or
263 A complex explicit extension is an explicit extension instruction
265 A def extension is a simple explicit extension that is
266 also a candidate for the optimization. This extension is part
267 of the instruction stream, it is not generated by this
269 A use extension is a simple explicit extension that is generated
270 and stored for candidate use during this optimization. It is
271 not emitted to the instruction stream till the last phase of
273 A reference is an instruction that satisfy at least on of these
275 - It contains a definition with EXTENDED_DEF relevancy in a RELEVANT web.
276 - It is followed by a def extension.
277 - It contains a candidate use.
279 Phase 1: Propagate extensions to uses.
280 In this phase, we find candidate extensions for the optimization
281 and we generate (but not emit) proper extensions "right before the
284 a. Build a DF object.
285 b. Traverse over all the instructions that contains a definition
286 and set their local relevancy and local source_mode like this:
287 - If the instruction is a simple explicit extension instruction,
288 mark it as {ZERO, SIGN}_EXTENDED_DEF according to the extension
289 type and mark its source_mode to be the mode of the quantity
290 that is been extended.
291 - Otherwise, If the instruction has an implicit extension,
292 which means that its high part is an extension of its low part,
293 or if it is a complicated explicit extension, mark it as
294 EXTENDED_DEF and set its source_mode to be the narrowest
295 mode that is been extended in the instruction.
296 c. Traverse over all the instructions that contains a use and set
297 their local relevancy to RELEVANT_USE (except for few corner
299 d. Produce the web. During union of two entries, update the
300 relevancy and source_mode of the leader. There are two major
301 guide lines for this update:
302 - If one of the entries is NOT_RELEVANT, mark the leader
304 - If one is ZERO_EXTENDED_DEF and the other is SIGN_EXTENDED_DEF
305 (or vice versa) mark the leader as NOT_RELEVANT. We don't
306 handle this kind of mixed webs.
307 (For more details about this update process,
308 see see_update_leader_extra_info ()).
309 e. Generate uses extensions according to the relevancy and
310 source_mode of the webs.
312 Phase 2: Merge and eliminate locally redundant extensions.
313 In this phase, we try to merge def extensions and use
314 extensions with their references, and eliminate redundant extensions
315 in the same basic block.
317 Traverse over all the references. Do this in basic block number and
318 luid number forward order.
319 For each reference do:
320 a. Peephole optimization - try to merge it with all its
321 def extensions and use extensions in the following
323 - Try to merge only the def extensions, one by one.
324 - Try to merge only the use extensions, one by one.
325 - Try to merge any couple of use extensions simultaneously.
326 - Try to merge any def extension with one or two uses
327 extensions simultaneously.
328 b. Handle each EXTENDED_DEF in it as if it was already merged with
331 During the merge process we save the following data for each
332 register in each basic block:
333 a. The first instruction that defines the register in the basic
335 b. The last instruction that defines the register in the basic
337 c. The first extension of this register before the first
338 instruction that defines it in the basic block.
339 c. The first extension of this register after the last
340 instruction that defines it in the basic block.
341 This data will help us eliminate (or more precisely, not generate)
342 locally redundant extensions, and will be useful in the next stage.
344 While merging extensions with their reference there are 4 possible
346 a. A use extension was merged with the reference:
347 Delete the extension instruction and save the merged reference
348 for phase 4. (For details, see see_use_extension_merged ())
349 b. A use extension failed to be merged with the reference:
350 If there is already such an extension in the same basic block
351 and it is not dead at this point, delete the unmerged extension
352 (it is locally redundant), otherwise properly update the above
354 (For details, see see_merge_one_use_extension ())
355 c. A def extension was merged with the reference:
356 Mark this extension as a merged_def extension and properly
357 update the above basic block data.
358 (For details, see see_merge_one_def_extension ())
359 d. A def extension failed to be merged with the reference:
360 Replace the definition of the NARROWmode register in the
361 reference with the proper subreg of WIDEmode register and save
362 the result as a merged reference. Also, properly update the
363 the above basic block data.
364 (For details, see see_def_extension_not_merged ())
366 Phase 3: Eliminate globally redundant extensions.
367 In this phase, we set the bit vectors input of the edge based LCM
368 using the recorded data on the registers in each basic block.
369 We also save pointers for all the anticipatable and available
370 occurrences of the relevant extensions. Then we run the LCM.
372 a. Initialize the comp, antloc, kill bit vectors to zero and the
373 transp bit vector to ones.
375 b. Traverse over all the references. Do this in basic block number
376 and luid number forward order. For each reference:
377 - Go over all its use extensions. For each such extension -
378 If it is not dead from the beginning of the basic block SET
379 the antloc bit of the current extension in the current
381 If it is not dead till the end of the basic block SET the
382 comp bit of the current extension in the current basic
384 - Go over all its def extensions that were merged with
385 it. For each such extension -
386 If it is not dead till the end of the basic block SET the
387 comp bit of the current extension in the current basic
389 RESET the proper transp and kill bits.
390 - Go over all its def extensions that were not merged
391 with it. For each such extension -
392 RESET the transp bit and SET the kill bit of the current
393 extension in the current basic block bits.
395 c. Run the edge based LCM.
397 Phase 4: Commit changes to the insn stream.
398 This is the only phase that actually changes the instruction stream.
399 Up to this point the optimization could be aborted at any time.
400 Here we insert extensions at their best placements and delete the
401 redundant ones according to the output of the LCM. We also replace
402 some of the instructions according to the second phase merges results.
404 a. Use the pre_delete_map (from the output of the LCM) in order to
405 delete redundant extensions. This will prevent them from been
406 emitted in the first place.
408 b. Insert extensions on edges where needed according to
409 pre_insert_map and edge_list (from the output of the LCM).
411 c. For each reference do-
412 - Emit all the uses extensions that were not deleted until now,
413 right before the reference.
414 - Delete all the merged and unmerged def extensions from
415 the instruction stream.
416 - Replace the reference with the merged one, if exist.
418 The implementation consists of four data structures:
420 Purpose: To handle the relevancy of the uses, definitions and webs.
421 Relevant structures: web_entry (from df.h), see_entry_extra_info.
422 Details: This is a disjoint-set data structure. Most of its functions are
423 implemented in web.c. Each definition and use in the code are
424 elements. A web_entry structure is allocated for each element to
425 hold the element's relevancy and source_mode. The union rules are
426 defined in see_update_leader_extra_info ().
428 Purpose: To store references and their extensions (uses and defs)
429 and to enable traverse over these references according to basic
431 Relevant structure: see_ref_s.
432 Details: This data structure consists of an array of splay trees. One splay
433 tree for each basic block. The splay tree nodes are references and
434 the keys are the luids of the references.
435 A see_ref_s structure is allocated for each reference. It holds the
436 reference itself, its def and uses extensions and later the merged
437 version of the reference.
438 Using this data structure we can traverse over all the references of
439 a basic block and their extensions in forward order.
440 - Data structure III.
441 Purpose: To store local properties of registers for each basic block.
442 This data will later help us build the LCM sbitmap_vectors
444 Relevant structure: see_register_properties.
445 Details: This data structure consists of an array of hash tables. One hash
446 for each basic block. The hash node are a register properties
447 and the keys are the numbers of the registers.
448 A see_register_properties structure is allocated for each register
449 that we might be interested in its properties.
450 Using this data structure we can easily find the properties of a
451 register in a specific basic block. This is necessary for locally
452 redundancy elimination and for setting up the LCM input.
454 Purpose: To store the extensions that are candidate for PRE and their
455 anticipatable and available occurrences.
456 Relevant structure: see_occr, see_pre_extension_expr.
457 Details: This data structure is a hash tables. Its nodes are the extensions
458 that are candidate for PRE.
459 A see_pre_extension_expr structure is allocated for each candidate
460 extension. It holds a copy of the extension and a linked list of all
461 the anticipatable and available occurrences of it.
462 We use this data structure when we read the output of the LCM. */
466 #include "coretypes.h"
473 #include "insn-config.h"
476 #include "splay-tree.h"
480 #include "tree-pass.h"
482 /* Used to classify defs and uses according to relevancy. */
491 /* Used to classify extensions in relevant webs. */
492 enum extension_type
{
494 EXPLICIT_DEF_EXTENSION
,
495 IMPLICIT_DEF_EXTENSION
,
499 /* Global data structures and flags. */
501 /* This structure will be assigned for each web_entry structure (defined
502 in df.h). It is placed in the extra_info field of a web_entry and holds the
503 relevancy and source mode of the web_entry. */
505 struct see_entry_extra_info
507 /* The relevancy of the ref. */
508 enum entry_type relevancy
;
509 /* The relevancy of the ref.
510 This field is updated only once - when this structure is created. */
511 enum entry_type local_relevancy
;
512 /* The source register mode. */
513 enum machine_mode source_mode
;
514 /* This field is used only if the relevancy is ZERO/SIGN_EXTENDED_DEF.
515 It is updated only once when this structure is created. */
516 enum machine_mode local_source_mode
;
517 /* This field is used only if the relevancy is EXTENDED_DEF.
518 It holds the narrowest mode that is sign extended. */
519 enum machine_mode source_mode_signed
;
520 /* This field is used only if the relevancy is EXTENDED_DEF.
521 It holds the narrowest mode that is zero extended. */
522 enum machine_mode source_mode_unsigned
;
525 /* There is one such structure for every reference. It stores the reference
526 itself as well as its extensions (uses and definitions).
527 Used as the value in splay_tree see_bb_splay_ar[]. */
530 /* The luid of the insn. */
532 /* The insn of the ref. */
534 /* The merged insn that was formed from the reference's insn and extensions.
535 If all merges failed, it remains NULL. */
537 /* The def extensions of the reference that were not merged with
539 htab_t unmerged_def_se_hash
;
540 /* The def extensions of the reference that were merged with
541 it. Implicit extensions of the reference will be stored here too. */
542 htab_t merged_def_se_hash
;
543 /* The uses extensions of reference. */
547 /* There is one such structure for every relevant extended register in a
548 specific basic block. This data will help us build the LCM sbitmap_vectors
550 struct see_register_properties
552 /* The register number. */
554 /* The last luid of the reference that defines this register in this basic
557 /* The luid of the reference that has the first extension of this register
558 that appears before any definition in this basic block. */
559 int first_se_before_any_def
;
560 /* The luid of the reference that has the first extension of this register
561 that appears after the last definition in this basic block. */
562 int first_se_after_last_def
;
565 /* Occurrence of an expression.
566 There must be at most one available occurrence and at most one anticipatable
567 occurrence per basic block. */
570 /* Next occurrence of this expression. */
571 struct see_occr
*next
;
572 /* The insn that computes the expression. */
577 /* There is one such structure for every relevant extension expression.
578 It holds a copy of this extension instruction as well as a linked lists of
579 pointers to all the antic and avail occurrences of it. */
580 struct see_pre_extension_expr
582 /* A copy of the extension instruction. */
584 /* Index in the available expression bitmaps. */
586 /* List of anticipatable occurrences in basic blocks in the function.
587 An "anticipatable occurrence" is the first occurrence in the basic block,
588 the operands are not modified in the basic block prior to the occurrence
589 and the output is not used between the start of the block and the
591 struct see_occr
*antic_occr
;
592 /* List of available occurrence in basic blocks in the function.
593 An "available occurrence" is the last occurrence in the basic block and
594 the operands are not modified by following statements in the basic block
595 [including this insn]. */
596 struct see_occr
*avail_occr
;
599 /* Helper structure for the note_uses and see_replace_src functions. */
600 struct see_replace_data
606 /* Helper structure for the note_uses and see_mentioned_reg functions. */
607 struct see_mentioned_reg_data
613 /* A data flow object that will be created once and used throughout the
615 static struct df
*df
= NULL
;
616 /* An array of web_entries. The i'th definition in the df object is associated
618 static struct web_entry
*def_entry
= NULL
;
619 /* An array of web_entries. The i'th use in the df object is associated with
621 static struct web_entry
*use_entry
= NULL
;
622 /* Array of splay_trees.
623 see_bb_splay_ar[i] refers to the splay tree of the i'th basic block.
624 The splay tree will hold see_ref_s structures. The key is the luid
625 of the insn. This way we can traverse over the references of each basic
626 block in forward or backward order. */
627 static splay_tree
*see_bb_splay_ar
= NULL
;
629 see_bb_hash_ar[i] refers to the hash of the i'th basic block.
630 The hash will hold see_register_properties structure. The key is regno. */
631 static htab_t
*see_bb_hash_ar
= NULL
;
632 /* Hash table that holds a copy of all the extensions. The key is the right
633 hand side of the se_insn field. */
634 static htab_t see_pre_extension_hash
= NULL
;
636 /* Local LCM properties of expressions. */
637 /* Nonzero for expressions that are transparent in the block. */
638 static sbitmap
*transp
= NULL
;
639 /* Nonzero for expressions that are computed (available) in the block. */
640 static sbitmap
*comp
= NULL
;
641 /* Nonzero for expressions that are locally anticipatable in the block. */
642 static sbitmap
*antloc
= NULL
;
643 /* Nonzero for expressions that are locally killed in the block. */
644 static sbitmap
*ae_kill
= NULL
;
645 /* Nonzero for expressions which should be inserted on a specific edge. */
646 static sbitmap
*pre_insert_map
= NULL
;
647 /* Nonzero for expressions which should be deleted in a specific block. */
648 static sbitmap
*pre_delete_map
= NULL
;
649 /* Contains the edge_list returned by pre_edge_lcm. */
650 static struct edge_list
*edge_list
= NULL
;
651 /* Records the last basic block at the beginning of the optimization. */
653 /* Records the number of uses at the beginning of the optimization. */
654 static unsigned int uses_num
;
655 /* Records the number of definitions at the beginning of the optimization. */
656 static unsigned int defs_num
;
658 #define ENTRY_EI(ENTRY) ((struct see_entry_extra_info *) (ENTRY)->extra_info)
660 /* Functions implementation. */
662 /* Verifies that EXTENSION's pattern is this:
664 set (reg/subreg reg1) (sign/zero_extend:WIDEmode (reg/subreg reg2))
666 If it doesn't have the expected pattern return NULL.
667 Otherwise, if RETURN_DEST_REG is set, return reg1 else return reg2. */
670 see_get_extension_reg (rtx extension
, bool return_dest_reg
)
676 /* Parallel pattern for extension not supported for the moment. */
677 if (GET_CODE (PATTERN (extension
)) == PARALLEL
)
680 set
= single_set (extension
);
683 lhs
= SET_DEST (set
);
688 else if (REG_P (SUBREG_REG (lhs
)))
689 reg1
= SUBREG_REG (lhs
);
693 if (GET_CODE (rhs
) != SIGN_EXTEND
&& GET_CODE (rhs
) != ZERO_EXTEND
)
699 else if (REG_P (SUBREG_REG (rhs
)))
700 reg2
= SUBREG_REG (rhs
);
709 /* Verifies that EXTENSION's pattern is this:
711 set (reg/subreg reg1) (sign/zero_extend: (...expr...)
713 If it doesn't have the expected pattern return UNKNOWN.
714 Otherwise, set SOURCE_MODE to be the mode of the extended expr and return
715 the rtx code of the extension. */
718 see_get_extension_data (rtx extension
, enum machine_mode
*source_mode
)
722 if (!extension
|| !INSN_P (extension
))
725 /* Parallel pattern for extension not supported for the moment. */
726 if (GET_CODE (PATTERN (extension
)) == PARALLEL
)
729 set
= single_set (extension
);
733 lhs
= SET_DEST (set
);
735 /* Don't handle extensions to something other then register or
737 if (!REG_P (lhs
) && !SUBREG_REG (lhs
))
740 if (GET_CODE (rhs
) != SIGN_EXTEND
&& GET_CODE (rhs
) != ZERO_EXTEND
)
743 if (!REG_P (XEXP (rhs
, 0))
744 && !(GET_CODE (XEXP (rhs
, 0)) == SUBREG
745 && REG_P (SUBREG_REG (XEXP (rhs
, 0)))))
748 *source_mode
= GET_MODE (XEXP (rhs
, 0));
750 if (GET_CODE (rhs
) == SIGN_EXTEND
)
756 /* Generate instruction with the pattern:
757 set ((reg r) (sign/zero_extend (subreg:mode (reg r))))
758 (the register r on both sides of the set is the same register).
760 If the recognition failed, this is very bad, return NULL (This will abort
761 the entire optimization).
762 Otherwise, return the generated instruction. */
765 see_gen_normalized_extension (rtx reg
, enum rtx_code extension_code
,
766 enum machine_mode mode
)
769 rtx extension
= NULL
;
773 || (extension_code
!= SIGN_EXTEND
&& extension_code
!= ZERO_EXTEND
))
776 subreg
= gen_lowpart_SUBREG (mode
, reg
);
777 if (extension_code
== SIGN_EXTEND
)
778 extension
= gen_rtx_SIGN_EXTEND (GET_MODE (reg
), subreg
);
780 extension
= gen_rtx_ZERO_EXTEND (GET_MODE (reg
), subreg
);
783 emit_insn (gen_rtx_SET (VOIDmode
, reg
, extension
));
787 if (insn_invalid_p (insn
))
788 /* Recognition failed, this is very bad for this optimization.
789 Abort the optimization. */
794 /* Hashes and splay_trees related functions implementation. */
796 /* Helper functions for the pre_extension hash.
797 This kind of hash will hold see_pre_extension_expr structures.
799 The key is the right hand side of the se_insn field.
800 Note that the se_insn is an expression that looks like:
802 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
803 (subreg:NARROWmode (reg:WIDEmode r2)))) */
805 /* Return TRUE if P1 has the same value in its rhs as P2.
806 Otherwise, return FALSE.
807 P1 and P2 are see_pre_extension_expr structures. */
810 eq_descriptor_pre_extension (const void *p1
, const void *p2
)
812 const struct see_pre_extension_expr
*extension1
= p1
;
813 const struct see_pre_extension_expr
*extension2
= p2
;
814 rtx set1
= single_set (extension1
->se_insn
);
815 rtx set2
= single_set (extension2
->se_insn
);
818 gcc_assert (set1
&& set2
);
819 rhs1
= SET_SRC (set1
);
820 rhs2
= SET_SRC (set2
);
822 return rtx_equal_p (rhs1
, rhs2
);
826 /* P is a see_pre_extension_expr struct, use the RHS of the se_insn field.
827 Note that the RHS is an expression that looks like this:
828 (sign_extend:WIDEmode (subreg:NARROWmode (reg:WIDEmode r))) */
831 hash_descriptor_pre_extension (const void *p
)
833 const struct see_pre_extension_expr
*extension
= p
;
834 rtx set
= single_set (extension
->se_insn
);
840 return hash_rtx (rhs
, GET_MODE (rhs
), 0, NULL
, 0);
844 /* Free the allocated memory of the current see_pre_extension_expr struct.
846 It frees the two linked list of the occurrences structures. */
849 hash_del_pre_extension (void *p
)
851 struct see_pre_extension_expr
*extension
= p
;
852 struct see_occr
*curr_occr
= extension
->antic_occr
;
853 struct see_occr
*next_occr
= NULL
;
855 /* Free the linked list of the anticipatable occurrences. */
858 next_occr
= curr_occr
->next
;
860 curr_occr
= next_occr
;
863 /* Free the linked list of the available occurrences. */
864 curr_occr
= extension
->avail_occr
;
867 next_occr
= curr_occr
->next
;
869 curr_occr
= next_occr
;
872 /* Free the see_pre_extension_expr structure itself. */
877 /* Helper functions for the register_properties hash.
878 This kind of hash will hold see_register_properties structures.
880 The value of the key is the regno field of the structure. */
882 /* Return TRUE if P1 has the same value in the regno field as P2.
883 Otherwise, return FALSE.
884 Where P1 and P2 are see_register_properties structures. */
887 eq_descriptor_properties (const void *p1
, const void *p2
)
889 const struct see_register_properties
*curr_prop1
= p1
;
890 const struct see_register_properties
*curr_prop2
= p2
;
892 return curr_prop1
->regno
== curr_prop2
->regno
;
896 /* P is a see_register_properties struct, use the register number in the
900 hash_descriptor_properties (const void *p
)
902 const struct see_register_properties
*curr_prop
= p
;
903 return curr_prop
->regno
;
907 /* Free the allocated memory of the current see_register_properties struct. */
909 hash_del_properties (void *p
)
911 struct see_register_properties
*curr_prop
= p
;
916 /* Helper functions for an extension hash.
917 This kind of hash will hold insns that look like:
919 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
920 (subreg:NARROWmode (reg:WIDEmode r2))))
922 set ((reg:WIDEmode r1) (sign_extend:WIDEmode (reg:NARROWmode r2)))
924 The value of the key is (REGNO (reg:WIDEmode r1))
925 It is possible to search this hash in two ways:
926 1. By a register rtx. The Value that is been compared to the keys is the
928 2. By an insn with the above pattern. The Value that is been compared to
929 the keys is the REGNO of the reg on the lhs. */
931 /* Return TRUE if P1 has the same value as P2. Otherwise, return FALSE.
932 Where P1 is an insn and P2 is an insn or a register. */
935 eq_descriptor_extension (const void *p1
, const void *p2
)
937 const rtx insn
= (rtx
) p1
;
938 const rtx element
= (rtx
) p2
;
939 rtx set1
= single_set (insn
);
942 rtx dest_reg2
= NULL
;
944 gcc_assert (set1
&& element
&& (REG_P (element
) || INSN_P (element
)));
946 dest_reg1
= SET_DEST (set1
);
948 if (INSN_P (element
))
950 set2
= single_set (element
);
951 dest_reg2
= SET_DEST (set2
);
956 return REGNO (dest_reg1
) == REGNO (dest_reg2
);
960 /* If P is an insn, use the register number of its lhs
961 otherwise, P is a register, use its number. */
964 hash_descriptor_extension (const void *p
)
966 const rtx r
= (rtx
) p
;
972 gcc_assert (r
&& INSN_P (r
));
973 set
= single_set (r
);
975 lhs
= SET_DEST (set
);
980 /* Helper function for a see_bb_splay_ar[i] splay tree.
981 It frees all the allocated memory of a struct see_ref_s pointer.
983 VALUE is the value of a splay tree node. */
986 see_free_ref_s (splay_tree_value value
)
988 struct see_ref_s
*ref_s
= (struct see_ref_s
*)value
;
990 if (ref_s
->unmerged_def_se_hash
)
991 htab_delete (ref_s
->unmerged_def_se_hash
);
992 if (ref_s
->merged_def_se_hash
)
993 htab_delete (ref_s
->merged_def_se_hash
);
994 if (ref_s
->use_se_hash
)
995 htab_delete (ref_s
->use_se_hash
);
1000 /* Rest of the implementation. */
1002 /* Search the extension hash for a suitable entry for EXTENSION.
1003 TYPE is the type of EXTENSION (USE_EXTENSION or DEF_EXTENSION).
1005 If TYPE is DEF_EXTENSION we need to normalize EXTENSION before searching the
1008 If a suitable entry was found, return the slot. Otherwise, store EXTENSION
1009 in the hash and return NULL. */
1011 static struct see_pre_extension_expr
*
1012 see_seek_pre_extension_expr (rtx extension
, enum extension_type type
)
1014 struct see_pre_extension_expr
**slot_pre_exp
, temp_pre_exp
;
1015 rtx dest_extension_reg
= see_get_extension_reg (extension
, 1);
1016 enum rtx_code extension_code
;
1017 enum machine_mode source_extension_mode
;
1019 if (type
== DEF_EXTENSION
)
1021 extension_code
= see_get_extension_data (extension
,
1022 &source_extension_mode
);
1023 gcc_assert (extension_code
!= UNKNOWN
);
1025 see_gen_normalized_extension (dest_extension_reg
, extension_code
,
1026 source_extension_mode
);
1028 temp_pre_exp
.se_insn
= extension
;
1030 (struct see_pre_extension_expr
**) htab_find_slot (see_pre_extension_hash
,
1031 &temp_pre_exp
, INSERT
);
1032 if (*slot_pre_exp
== NULL
)
1033 /* This is the first time this extension instruction is encountered. Store
1036 (*slot_pre_exp
) = xmalloc (sizeof (struct see_pre_extension_expr
));
1037 (*slot_pre_exp
)->se_insn
= extension
;
1038 (*slot_pre_exp
)->bitmap_index
=
1039 (htab_elements (see_pre_extension_hash
) - 1);
1040 (*slot_pre_exp
)->antic_occr
= NULL
;
1041 (*slot_pre_exp
)->avail_occr
= NULL
;
1044 return *slot_pre_exp
;
1048 /* This function defines how to update the extra_info of the web_entry.
1050 FIRST is the pointer of the extra_info of the first web_entry.
1051 SECOND is the pointer of the extra_info of the second web_entry.
1052 The first web_entry will be the predecessor (leader) of the second web_entry
1055 Return true if FIRST and SECOND points to the same web entry structure and
1056 nothing is done. Otherwise, return false. */
1059 see_update_leader_extra_info (struct web_entry
*first
, struct web_entry
*second
)
1061 struct see_entry_extra_info
*first_ei
, *second_ei
;
1063 first
= unionfind_root (first
);
1064 second
= unionfind_root (second
);
1066 if (unionfind_union (first
, second
))
1069 first_ei
= (struct see_entry_extra_info
*) first
->extra_info
;
1070 second_ei
= (struct see_entry_extra_info
*) second
->extra_info
;
1072 gcc_assert (first_ei
&& second_ei
);
1074 if (second_ei
->relevancy
== NOT_RELEVANT
)
1076 first_ei
->relevancy
= NOT_RELEVANT
;
1079 switch (first_ei
->relevancy
)
1084 switch (second_ei
->relevancy
)
1089 first_ei
->relevancy
= second_ei
->relevancy
;
1090 first_ei
->source_mode_signed
= second_ei
->source_mode_signed
;
1091 first_ei
->source_mode_unsigned
= second_ei
->source_mode_unsigned
;
1093 case SIGN_EXTENDED_DEF
:
1094 case ZERO_EXTENDED_DEF
:
1095 first_ei
->relevancy
= second_ei
->relevancy
;
1096 first_ei
->source_mode
= second_ei
->source_mode
;
1102 case SIGN_EXTENDED_DEF
:
1103 switch (second_ei
->relevancy
)
1105 case SIGN_EXTENDED_DEF
:
1106 /* The mode of the root should be the wider one in this case. */
1107 first_ei
->source_mode
=
1108 (first_ei
->source_mode
> second_ei
->source_mode
) ?
1109 first_ei
->source_mode
: second_ei
->source_mode
;
1113 case ZERO_EXTENDED_DEF
:
1114 /* Don't mix webs with zero extend and sign extend. */
1115 first_ei
->relevancy
= NOT_RELEVANT
;
1118 if (second_ei
->source_mode_signed
== MAX_MACHINE_MODE
)
1119 first_ei
->relevancy
= NOT_RELEVANT
;
1121 /* The mode of the root should be the wider one in this case. */
1122 first_ei
->source_mode
=
1123 (first_ei
->source_mode
> second_ei
->source_mode_signed
) ?
1124 first_ei
->source_mode
: second_ei
->source_mode_signed
;
1130 /* This case is similar to the previous one, with little changes. */
1131 case ZERO_EXTENDED_DEF
:
1132 switch (second_ei
->relevancy
)
1134 case SIGN_EXTENDED_DEF
:
1135 /* Don't mix webs with zero extend and sign extend. */
1136 first_ei
->relevancy
= NOT_RELEVANT
;
1140 case ZERO_EXTENDED_DEF
:
1141 /* The mode of the root should be the wider one in this case. */
1142 first_ei
->source_mode
=
1143 (first_ei
->source_mode
> second_ei
->source_mode
) ?
1144 first_ei
->source_mode
: second_ei
->source_mode
;
1147 if (second_ei
->source_mode_unsigned
== MAX_MACHINE_MODE
)
1148 first_ei
->relevancy
= NOT_RELEVANT
;
1150 /* The mode of the root should be the wider one in this case. */
1151 first_ei
->source_mode
=
1152 (first_ei
->source_mode
> second_ei
->source_mode_unsigned
) ?
1153 first_ei
->source_mode
: second_ei
->source_mode_unsigned
;
1160 if (first_ei
->source_mode_signed
!= MAX_MACHINE_MODE
1161 && first_ei
->source_mode_unsigned
!= MAX_MACHINE_MODE
)
1163 switch (second_ei
->relevancy
)
1165 case SIGN_EXTENDED_DEF
:
1166 first_ei
->relevancy
= SIGN_EXTENDED_DEF
;
1167 first_ei
->source_mode
=
1168 (first_ei
->source_mode_signed
> second_ei
->source_mode
) ?
1169 first_ei
->source_mode_signed
: second_ei
->source_mode
;
1173 case ZERO_EXTENDED_DEF
:
1174 first_ei
->relevancy
= ZERO_EXTENDED_DEF
;
1175 first_ei
->source_mode
=
1176 (first_ei
->source_mode_unsigned
> second_ei
->source_mode
) ?
1177 first_ei
->source_mode_unsigned
: second_ei
->source_mode
;
1180 if (second_ei
->source_mode_unsigned
!= MAX_MACHINE_MODE
)
1181 first_ei
->source_mode_unsigned
=
1182 (first_ei
->source_mode_unsigned
>
1183 second_ei
->source_mode_unsigned
) ?
1184 first_ei
->source_mode_unsigned
:
1185 second_ei
->source_mode_unsigned
;
1186 if (second_ei
->source_mode_signed
!= MAX_MACHINE_MODE
)
1187 first_ei
->source_mode_signed
=
1188 (first_ei
->source_mode_signed
>
1189 second_ei
->source_mode_signed
) ?
1190 first_ei
->source_mode_signed
: second_ei
->source_mode_signed
;
1196 else if (first_ei
->source_mode_signed
== MAX_MACHINE_MODE
)
1198 gcc_assert (first_ei
->source_mode_unsigned
!= MAX_MACHINE_MODE
);
1199 switch (second_ei
->relevancy
)
1201 case SIGN_EXTENDED_DEF
:
1202 first_ei
->relevancy
= NOT_RELEVANT
;
1206 case ZERO_EXTENDED_DEF
:
1207 first_ei
->relevancy
= ZERO_EXTENDED_DEF
;
1208 first_ei
->source_mode
=
1209 (first_ei
->source_mode_unsigned
> second_ei
->source_mode
) ?
1210 first_ei
->source_mode_unsigned
: second_ei
->source_mode
;
1213 if (second_ei
->source_mode_unsigned
== MAX_MACHINE_MODE
)
1214 first_ei
->relevancy
= NOT_RELEVANT
;
1216 first_ei
->source_mode_unsigned
=
1217 (first_ei
->source_mode_unsigned
>
1218 second_ei
->source_mode_unsigned
) ?
1219 first_ei
->source_mode_unsigned
:
1220 second_ei
->source_mode_unsigned
;
1228 gcc_assert (first_ei
->source_mode_unsigned
== MAX_MACHINE_MODE
);
1229 gcc_assert (first_ei
->source_mode_signed
!= MAX_MACHINE_MODE
);
1230 switch (second_ei
->relevancy
)
1232 case SIGN_EXTENDED_DEF
:
1233 first_ei
->relevancy
= SIGN_EXTENDED_DEF
;
1234 first_ei
->source_mode
=
1235 (first_ei
->source_mode_signed
> second_ei
->source_mode
) ?
1236 first_ei
->source_mode_signed
: second_ei
->source_mode
;
1240 case ZERO_EXTENDED_DEF
:
1241 first_ei
->relevancy
= NOT_RELEVANT
;
1244 if (second_ei
->source_mode_signed
== MAX_MACHINE_MODE
)
1245 first_ei
->relevancy
= NOT_RELEVANT
;
1247 first_ei
->source_mode_signed
=
1248 (first_ei
->source_mode_signed
>
1249 second_ei
->source_mode_signed
) ?
1250 first_ei
->source_mode_signed
: second_ei
->source_mode_signed
;
1258 /* Unknown patern type. */
1266 /* Free global data structures. */
1269 see_free_data_structures (void)
1274 /* Free the bitmap vectors. */
1277 sbitmap_vector_free (transp
);
1279 sbitmap_vector_free (comp
);
1281 sbitmap_vector_free (antloc
);
1283 sbitmap_vector_free (ae_kill
);
1288 sbitmap_vector_free (pre_insert_map
);
1289 pre_insert_map
= NULL
;
1293 sbitmap_vector_free (pre_delete_map
);
1294 pre_delete_map
= NULL
;
1298 free_edge_list (edge_list
);
1302 /* Free the extension hash. */
1303 htab_delete (see_pre_extension_hash
);
1305 /* Free the array of hashes. */
1306 for (i
= 0; i
< last_bb
; i
++)
1307 if (see_bb_hash_ar
[i
])
1308 htab_delete (see_bb_hash_ar
[i
]);
1309 free (see_bb_hash_ar
);
1311 /* Free the array of splay trees. */
1312 for (i
= 0; i
< last_bb
; i
++)
1313 if (see_bb_splay_ar
[i
])
1314 splay_tree_delete (see_bb_splay_ar
[i
]);
1315 free (see_bb_splay_ar
);
1317 /* Free the array of web entries and their extra info field. */
1318 for (j
= 0; j
< defs_num
; j
++)
1319 free (def_entry
[j
].extra_info
);
1321 for (j
= 0; j
< uses_num
; j
++)
1322 free (use_entry
[j
].extra_info
);
1327 /* Initialize global data structures and variables. */
1330 see_initialize_data_structures (void)
1332 /* Build the df object. */
1333 df
= df_init (DF_HARD_REGS
| DF_EQUIV_NOTES
| DF_SUBREGS
);
1334 df_rd_add_problem (df
, 0);
1335 df_chain_add_problem (df
, DF_DU_CHAIN
| DF_UD_CHAIN
);
1339 df_dump (df
, dump_file
);
1341 /* Record the last basic block at the beginning of the optimization. */
1342 last_bb
= last_basic_block
;
1343 /* Record the number of uses at the beginning of the optimization. */
1344 uses_num
= DF_USES_SIZE (df
);
1345 /* Record the number of definitions at the beginning of the optimization. */
1346 defs_num
= DF_DEFS_SIZE (df
);
1348 /* Allocate web entries array for the union-find data structure. */
1349 def_entry
= xcalloc (defs_num
, sizeof (struct web_entry
));
1350 use_entry
= xcalloc (uses_num
, sizeof (struct web_entry
));
1352 /* Allocate an array of splay trees.
1353 One splay tree for each basic block. */
1354 see_bb_splay_ar
= xcalloc (last_bb
, sizeof (splay_tree
));
1356 /* Allocate an array of hashes.
1357 One hash for each basic block. */
1358 see_bb_hash_ar
= xcalloc (last_bb
, sizeof (htab_t
));
1360 /* Allocate the extension hash. It will hold the extensions that we want
1362 see_pre_extension_hash
= htab_create (10,
1363 hash_descriptor_pre_extension
,
1364 eq_descriptor_pre_extension
,
1365 hash_del_pre_extension
);
1369 /* Function called by note_uses to check if a register is used in a
1372 X is a pointer to the subexpression and DATA is a pointer to a
1373 see_mentioned_reg_data structure that contains the register to look for and
1374 a place for the result. */
1377 see_mentioned_reg (rtx
*x
, void *data
)
1379 struct see_mentioned_reg_data
*d
1380 = (struct see_mentioned_reg_data
*) data
;
1382 if (reg_mentioned_p (d
->reg
, *x
))
1383 d
->mentioned
= true;
1387 /* We don't want to merge a use extension with a reference if the extended
1388 register is used only in a simple move instruction. We also don't want to
1389 merge a def extension with a reference if the source register of the
1390 extension is defined only in a simple move in the reference.
1392 REF is the reference instruction.
1393 EXTENSION is the use extension or def extension instruction.
1394 TYPE is the type of the extension (use or def).
1396 Return true if the reference is complicated enough, so we would like to merge
1397 it with the extension. Otherwise, return false. */
1400 see_want_to_be_merged_with_extension (rtx ref
, rtx extension
,
1401 enum extension_type type
)
1404 rtx dest_extension_reg
= see_get_extension_reg (extension
, 1);
1405 rtx source_extension_reg
= see_get_extension_reg (extension
, 0);
1407 struct see_mentioned_reg_data d
;
1410 pat
= PATTERN (ref
);
1411 code
= GET_CODE (pat
);
1413 if (code
== PARALLEL
)
1415 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
1417 rtx sub
= XVECEXP (pat
, 0, i
);
1419 if (GET_CODE (sub
) == SET
1420 && (REG_P (SET_DEST (sub
))
1421 || (GET_CODE (SET_DEST (sub
)) == SUBREG
1422 && REG_P (SUBREG_REG (SET_DEST (sub
)))))
1423 && (REG_P (SET_SRC (sub
))
1424 || (GET_CODE (SET_SRC (sub
)) == SUBREG
1425 && REG_P (SUBREG_REG (SET_SRC (sub
))))))
1427 /* This is a simple move SET. */
1428 if (type
== DEF_EXTENSION
1429 && reg_mentioned_p (source_extension_reg
, SET_DEST (sub
)))
1434 /* This is not a simple move SET.
1435 Check if it uses the source of the extension. */
1436 if (type
== USE_EXTENSION
)
1438 d
.reg
= dest_extension_reg
;
1439 d
.mentioned
= false;
1440 note_uses (&sub
, see_mentioned_reg
, &d
);
1446 if (type
== USE_EXTENSION
)
1452 && (REG_P (SET_DEST (pat
))
1453 || (GET_CODE (SET_DEST (pat
)) == SUBREG
1454 && REG_P (SUBREG_REG (SET_DEST (pat
)))))
1455 && (REG_P (SET_SRC (pat
))
1456 || (GET_CODE (SET_SRC (pat
)) == SUBREG
1457 && REG_P (SUBREG_REG (SET_SRC (pat
))))))
1458 /* This is a simple move SET. */
1466 /* Print the register number of the current see_register_properties
1469 This is a subroutine of see_main called via htab_traverse.
1470 SLOT contains the current see_register_properties structure pointer. */
1473 see_print_register_properties (void **slot
, void *b ATTRIBUTE_UNUSED
)
1475 struct see_register_properties
*prop
= *slot
;
1478 fprintf (dump_file
, "Property found for register %d\n", prop
->regno
);
1483 /* Print the extension instruction of the current see_register_properties
1486 This is a subroutine of see_main called via htab_traverse.
1487 SLOT contains the current see_pre_extension_expr structure pointer. */
1490 see_print_pre_extension_expr (void **slot
, void *b ATTRIBUTE_UNUSED
)
1492 struct see_pre_extension_expr
*pre_extension
= *slot
;
1494 gcc_assert (pre_extension
1495 && pre_extension
->se_insn
1496 && INSN_P (pre_extension
->se_insn
));
1498 fprintf (dump_file
, "Index %d for:\n", pre_extension
->bitmap_index
);
1499 print_rtl_single (dump_file
, pre_extension
->se_insn
);
1505 /* Phase 4 implementation: Commit changes to the insn stream. */
1507 /* Delete the merged def extension.
1509 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1511 SLOT contains the current def extension instruction.
1512 B is the see_ref_s structure pointer. */
1515 see_delete_merged_def_extension (void **slot
, void *b ATTRIBUTE_UNUSED
)
1521 fprintf (dump_file
, "Deleting merged def extension:\n");
1522 print_rtl_single (dump_file
, def_se
);
1525 if (INSN_DELETED_P (def_se
))
1526 /* This def extension is an implicit one. No need to delete it since
1527 it is not in the insn stream. */
1530 delete_insn (def_se
);
1535 /* Delete the unmerged def extension.
1537 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1539 SLOT contains the current def extension instruction.
1540 B is the see_ref_s structure pointer. */
1543 see_delete_unmerged_def_extension (void **slot
, void *b ATTRIBUTE_UNUSED
)
1549 fprintf (dump_file
, "Deleting unmerged def extension:\n");
1550 print_rtl_single (dump_file
, def_se
);
1553 delete_insn (def_se
);
1558 /* Emit the non-redundant use extension to the instruction stream.
1560 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1562 SLOT contains the current use extension instruction.
1563 B is the see_ref_s structure pointer. */
1566 see_emit_use_extension (void **slot
, void *b
)
1569 struct see_ref_s
*curr_ref_s
= (struct see_ref_s
*) b
;
1571 if (INSN_DELETED_P (use_se
))
1572 /* This use extension was previously removed according to the lcm
1578 fprintf (dump_file
, "Inserting use extension:\n");
1579 print_rtl_single (dump_file
, use_se
);
1582 add_insn_before (use_se
, curr_ref_s
->insn
);
1588 /* For each relevant reference:
1589 a. Emit the non-redundant use extensions.
1590 b. Delete the def extensions.
1591 c. Replace the original reference with the merged one (if exists) and add the
1592 move instructions that were generated.
1594 This is a subroutine of see_commit_changes called via splay_tree_foreach.
1596 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
1597 see_ref_s structure. */
1600 see_commit_ref_changes (splay_tree_node stn
,
1601 void *data ATTRIBUTE_UNUSED
)
1603 htab_t use_se_hash
= ((struct see_ref_s
*) (stn
->value
))->use_se_hash
;
1604 htab_t unmerged_def_se_hash
=
1605 ((struct see_ref_s
*) (stn
->value
))->unmerged_def_se_hash
;
1606 htab_t merged_def_se_hash
=
1607 ((struct see_ref_s
*) (stn
->value
))->merged_def_se_hash
;
1608 rtx ref
= ((struct see_ref_s
*) (stn
->value
))->insn
;
1609 rtx merged_ref
= ((struct see_ref_s
*) (stn
->value
))->merged_insn
;
1611 /* Emit the non-redundant use extensions. */
1613 htab_traverse_noresize (use_se_hash
, see_emit_use_extension
,
1614 (PTR
) (stn
->value
));
1616 /* Delete the def extensions. */
1617 if (unmerged_def_se_hash
)
1618 htab_traverse (unmerged_def_se_hash
, see_delete_unmerged_def_extension
,
1619 (PTR
) (stn
->value
));
1621 if (merged_def_se_hash
)
1622 htab_traverse (merged_def_se_hash
, see_delete_merged_def_extension
,
1623 (PTR
) (stn
->value
));
1625 /* Replace the original reference with the merged one (if exists) and add the
1626 move instructions that were generated. */
1627 if (merged_ref
&& !INSN_DELETED_P (ref
))
1631 fprintf (dump_file
, "Replacing orig reference:\n");
1632 print_rtl_single (dump_file
, ref
);
1633 fprintf (dump_file
, "With merged reference:\n");
1634 print_rtl_single (dump_file
, merged_ref
);
1636 emit_insn_after (merged_ref
, ref
);
1640 /* Continue to the next reference. */
1645 /* Insert partially redundant expressions on edges to make the expressions fully
1648 INDEX_MAP is a mapping of an index to an expression.
1649 Return true if an instruction was inserted on an edge.
1650 Otherwise, return false. */
1653 see_pre_insert_extensions (struct see_pre_extension_expr
**index_map
)
1655 int num_edges
= NUM_EDGES (edge_list
);
1656 int set_size
= pre_insert_map
[0]->size
;
1657 size_t pre_extension_num
= htab_elements (see_pre_extension_hash
);
1664 for (e
= 0; e
< num_edges
; e
++)
1667 basic_block bb
= INDEX_EDGE_PRED_BB (edge_list
, e
);
1669 for (i
= indx
= 0; i
< set_size
; i
++, indx
+= SBITMAP_ELT_BITS
)
1671 SBITMAP_ELT_TYPE insert
= pre_insert_map
[e
]->elms
[i
];
1673 for (j
= indx
; insert
&& j
< (int) pre_extension_num
;
1677 struct see_pre_extension_expr
*expr
= index_map
[j
];
1678 int idx
= expr
->bitmap_index
;
1680 edge eg
= INDEX_EDGE (edge_list
, e
);
1683 emit_insn (PATTERN (expr
->se_insn
));
1684 se_insn
= get_insns ();
1687 if (eg
->flags
& EDGE_ABNORMAL
)
1689 rtx new_insn
= NULL
;
1691 new_insn
= insert_insn_end_bb_new (se_insn
, bb
);
1692 gcc_assert (new_insn
&& INSN_P (new_insn
));
1697 "PRE: end of bb %d, insn %d, ",
1698 bb
->index
, INSN_UID (new_insn
));
1700 "inserting expression %d\n", idx
);
1705 insert_insn_on_edge (se_insn
, eg
);
1709 fprintf (dump_file
, "PRE: edge (%d,%d), ",
1711 INDEX_EDGE_SUCC_BB (edge_list
, e
)->index
);
1712 fprintf (dump_file
, "inserting expression %d\n", idx
);
1723 /* Since all the redundant extensions must be anticipatable, they must be a use
1724 extensions. Mark them as deleted. This will prevent them from been emitted
1727 This is a subroutine of see_commit_changes called via htab_traverse.
1729 SLOT contains the current see_pre_extension_expr structure pointer. */
1732 see_pre_delete_extension (void **slot
, void *b ATTRIBUTE_UNUSED
)
1734 struct see_pre_extension_expr
*expr
= *slot
;
1735 struct see_occr
*occr
;
1736 int indx
= expr
->bitmap_index
;
1738 for (occr
= expr
->antic_occr
; occr
!= NULL
; occr
= occr
->next
)
1740 if (TEST_BIT (pre_delete_map
[occr
->block_num
], indx
))
1742 /* Mark as deleted. */
1743 INSN_DELETED_P (occr
->insn
) = 1;
1746 fprintf (dump_file
,"Redundant extension deleted:\n");
1747 print_rtl_single (dump_file
, occr
->insn
);
1755 /* Create the index_map mapping of an index to an expression.
1757 This is a subroutine of see_commit_changes called via htab_traverse.
1759 SLOT contains the current see_pre_extension_expr structure pointer.
1760 B a pointer to see_pre_extension_expr structure pointer. */
1763 see_map_extension (void **slot
, void *b
)
1765 struct see_pre_extension_expr
*expr
= *slot
;
1766 struct see_pre_extension_expr
**index_map
=
1767 (struct see_pre_extension_expr
**) b
;
1769 index_map
[expr
->bitmap_index
] = expr
;
1775 /* Phase 4 top level function.
1776 In this phase we finally change the instruction stream.
1777 Here we insert extensions at their best placements and delete the
1778 redundant ones according to the output of the LCM. We also replace
1779 some of the instructions according to phase 2 merges results. */
1782 see_commit_changes (void)
1784 struct see_pre_extension_expr
**index_map
;
1785 size_t pre_extension_num
= htab_elements (see_pre_extension_hash
);
1786 bool did_insert
= false;
1789 index_map
= xcalloc (pre_extension_num
,
1790 sizeof (struct see_pre_extension_expr
*));
1794 "* Phase 4: Commit changes to the insn stream. *\n");
1796 /* Produce a mapping of all the pre_extensions. */
1797 htab_traverse (see_pre_extension_hash
, see_map_extension
, (PTR
) index_map
);
1799 /* Delete redundant extension. This will prevent them from been emitted in
1801 htab_traverse (see_pre_extension_hash
, see_pre_delete_extension
, NULL
);
1803 /* At this point, we must free the DF object, since the number of basic blocks
1808 /* Insert extensions on edges, according to the LCM result. */
1809 did_insert
= see_pre_insert_extensions (index_map
);
1812 commit_edge_insertions ();
1814 /* Commit the rest of the changes. */
1815 for (i
= 0; i
< last_bb
; i
++)
1817 if (see_bb_splay_ar
[i
])
1819 /* Traverse over all the references in the basic block in forward
1821 splay_tree_foreach (see_bb_splay_ar
[i
],
1822 see_commit_ref_changes
, NULL
);
1830 /* Phase 3 implementation: Eliminate globally redundant extensions. */
1832 /* Analyze the properties of a merged def extension for the LCM and record avail
1835 This is a subroutine of see_analyze_ref_local_prop called
1838 SLOT contains the current def extension instruction.
1839 B is the see_ref_s structure pointer. */
1842 see_analyze_merged_def_local_prop (void **slot
, void *b
)
1845 struct see_ref_s
*curr_ref_s
= (struct see_ref_s
*) b
;
1846 rtx ref
= curr_ref_s
->insn
;
1847 struct see_pre_extension_expr
*extension_expr
;
1849 int bb_num
= BLOCK_NUM (ref
);
1850 htab_t curr_bb_hash
;
1851 struct see_register_properties
*curr_prop
, **slot_prop
;
1852 struct see_register_properties temp_prop
;
1853 rtx dest_extension_reg
= see_get_extension_reg (def_se
, 1);
1854 struct see_occr
*curr_occr
= NULL
;
1855 struct see_occr
*tmp_occr
= NULL
;
1857 extension_expr
= see_seek_pre_extension_expr (def_se
, DEF_EXTENSION
);
1858 /* The extension_expr must be found. */
1859 gcc_assert (extension_expr
);
1861 curr_bb_hash
= see_bb_hash_ar
[bb_num
];
1862 gcc_assert (curr_bb_hash
);
1863 temp_prop
.regno
= REGNO (dest_extension_reg
);
1865 (struct see_register_properties
**) htab_find_slot (curr_bb_hash
,
1866 &temp_prop
, INSERT
);
1867 curr_prop
= *slot_prop
;
1868 gcc_assert (curr_prop
);
1870 indx
= extension_expr
->bitmap_index
;
1872 /* Reset the transparency bit. */
1873 RESET_BIT (transp
[bb_num
], indx
);
1874 /* Reset the killed bit. */
1875 RESET_BIT (ae_kill
[bb_num
], indx
);
1877 if (curr_prop
->first_se_after_last_def
== DF_INSN_LUID (df
, ref
))
1879 /* Set the available bit. */
1880 SET_BIT (comp
[bb_num
], indx
);
1881 /* Record the available occurrence. */
1882 curr_occr
= xmalloc (sizeof (struct see_occr
));
1883 curr_occr
->next
= NULL
;
1884 curr_occr
->insn
= def_se
;
1885 curr_occr
->block_num
= bb_num
;
1886 tmp_occr
= extension_expr
->avail_occr
;
1888 extension_expr
->avail_occr
= curr_occr
;
1891 while (tmp_occr
->next
)
1892 tmp_occr
= tmp_occr
->next
;
1893 tmp_occr
->next
= curr_occr
;
1901 /* Analyze the properties of a unmerged def extension for the LCM.
1903 This is a subroutine of see_analyze_ref_local_prop called
1906 SLOT contains the current def extension instruction.
1907 B is the see_ref_s structure pointer. */
1910 see_analyze_unmerged_def_local_prop (void **slot
, void *b
)
1913 struct see_ref_s
*curr_ref_s
= (struct see_ref_s
*) b
;
1914 rtx ref
= curr_ref_s
->insn
;
1915 struct see_pre_extension_expr
*extension_expr
;
1917 int bb_num
= BLOCK_NUM (ref
);
1918 htab_t curr_bb_hash
;
1919 struct see_register_properties
*curr_prop
, **slot_prop
;
1920 struct see_register_properties temp_prop
;
1921 rtx dest_extension_reg
= see_get_extension_reg (def_se
, 1);
1923 extension_expr
= see_seek_pre_extension_expr (def_se
, DEF_EXTENSION
);
1924 /* The extension_expr must be found. */
1925 gcc_assert (extension_expr
);
1927 curr_bb_hash
= see_bb_hash_ar
[bb_num
];
1928 gcc_assert (curr_bb_hash
);
1929 temp_prop
.regno
= REGNO (dest_extension_reg
);
1931 (struct see_register_properties
**) htab_find_slot (curr_bb_hash
,
1932 &temp_prop
, INSERT
);
1933 curr_prop
= *slot_prop
;
1934 gcc_assert (curr_prop
);
1936 indx
= extension_expr
->bitmap_index
;
1938 /* Reset the transparency bit. */
1939 RESET_BIT (transp
[bb_num
], indx
);
1940 /* Set the killed bit. */
1941 SET_BIT (ae_kill
[bb_num
], indx
);
1947 /* Analyze the properties of a use extension for the LCM and record anic and
1950 This is a subroutine of see_analyze_ref_local_prop called
1953 SLOT contains the current use extension instruction.
1954 B is the see_ref_s structure pointer. */
1957 see_analyze_use_local_prop (void **slot
, void *b
)
1959 struct see_ref_s
*curr_ref_s
= (struct see_ref_s
*) b
;
1961 rtx ref
= curr_ref_s
->insn
;
1962 rtx dest_extension_reg
= see_get_extension_reg (use_se
, 1);
1963 struct see_pre_extension_expr
*extension_expr
;
1964 struct see_register_properties
*curr_prop
, **slot_prop
;
1965 struct see_register_properties temp_prop
;
1966 struct see_occr
*curr_occr
= NULL
;
1967 struct see_occr
*tmp_occr
= NULL
;
1968 htab_t curr_bb_hash
;
1970 int bb_num
= BLOCK_NUM (ref
);
1972 extension_expr
= see_seek_pre_extension_expr (use_se
, USE_EXTENSION
);
1973 /* The extension_expr must be found. */
1974 gcc_assert (extension_expr
);
1976 curr_bb_hash
= see_bb_hash_ar
[bb_num
];
1977 gcc_assert (curr_bb_hash
);
1978 temp_prop
.regno
= REGNO (dest_extension_reg
);
1980 (struct see_register_properties
**) htab_find_slot (curr_bb_hash
,
1981 &temp_prop
, INSERT
);
1982 curr_prop
= *slot_prop
;
1983 gcc_assert (curr_prop
);
1985 indx
= extension_expr
->bitmap_index
;
1987 if (curr_prop
->first_se_before_any_def
== DF_INSN_LUID (df
, ref
))
1989 /* Set the anticipatable bit. */
1990 SET_BIT (antloc
[bb_num
], indx
);
1991 /* Record the anticipatable occurrence. */
1992 curr_occr
= xmalloc (sizeof (struct see_occr
));
1993 curr_occr
->next
= NULL
;
1994 curr_occr
->insn
= use_se
;
1995 curr_occr
->block_num
= bb_num
;
1996 tmp_occr
= extension_expr
->antic_occr
;
1998 extension_expr
->antic_occr
= curr_occr
;
2001 while (tmp_occr
->next
)
2002 tmp_occr
= tmp_occr
->next
;
2003 tmp_occr
->next
= curr_occr
;
2005 if (curr_prop
->last_def
< 0)
2007 /* Set the available bit. */
2008 SET_BIT (comp
[bb_num
], indx
);
2009 /* Record the available occurrence. */
2010 curr_occr
= xmalloc (sizeof (struct see_occr
));
2011 curr_occr
->next
= NULL
;
2012 curr_occr
->insn
= use_se
;
2013 curr_occr
->block_num
= bb_num
;
2014 tmp_occr
= extension_expr
->avail_occr
;
2016 extension_expr
->avail_occr
= curr_occr
;
2019 while (tmp_occr
->next
)
2020 tmp_occr
= tmp_occr
->next
;
2021 tmp_occr
->next
= curr_occr
;
2024 /* Note: there is no need to reset the killed bit since it must be zero at
2027 else if (curr_prop
->first_se_after_last_def
== DF_INSN_LUID (df
, ref
))
2029 /* Set the available bit. */
2030 SET_BIT (comp
[bb_num
], indx
);
2031 /* Reset the killed bit. */
2032 RESET_BIT (ae_kill
[bb_num
], indx
);
2033 /* Record the available occurrence. */
2034 curr_occr
= xmalloc (sizeof (struct see_occr
));
2035 curr_occr
->next
= NULL
;
2036 curr_occr
->insn
= use_se
;
2037 curr_occr
->block_num
= bb_num
;
2038 tmp_occr
= extension_expr
->avail_occr
;
2040 extension_expr
->avail_occr
= curr_occr
;
2043 while (tmp_occr
->next
)
2044 tmp_occr
= tmp_occr
->next
;
2045 tmp_occr
->next
= curr_occr
;
2052 /* Here we traverse over all the merged and unmerged extensions of the reference
2053 and analyze their properties for the LCM.
2055 This is a subroutine of see_execute_LCM called via splay_tree_foreach.
2057 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2058 see_ref_s structure. */
2061 see_analyze_ref_local_prop (splay_tree_node stn
,
2062 void *data ATTRIBUTE_UNUSED
)
2064 htab_t use_se_hash
= ((struct see_ref_s
*) (stn
->value
))->use_se_hash
;
2065 htab_t unmerged_def_se_hash
=
2066 ((struct see_ref_s
*) (stn
->value
))->unmerged_def_se_hash
;
2067 htab_t merged_def_se_hash
=
2068 ((struct see_ref_s
*) (stn
->value
))->merged_def_se_hash
;
2070 /* Analyze use extensions that were not merged with the reference. */
2072 htab_traverse_noresize (use_se_hash
, see_analyze_use_local_prop
,
2073 (PTR
) (stn
->value
));
2075 /* Analyze def extensions that were not merged with the reference. */
2076 if (unmerged_def_se_hash
)
2077 htab_traverse (unmerged_def_se_hash
, see_analyze_unmerged_def_local_prop
,
2078 (PTR
) (stn
->value
));
2080 /* Analyze def extensions that were merged with the reference. */
2081 if (merged_def_se_hash
)
2082 htab_traverse (merged_def_se_hash
, see_analyze_merged_def_local_prop
,
2083 (PTR
) (stn
->value
));
2085 /* Continue to the next definition. */
2090 /* Phase 3 top level function.
2091 In this phase, we set the input bit vectors of the LCM according to data
2092 gathered in phase 2.
2093 Then we run the edge based LCM. */
2096 see_execute_LCM (void)
2098 size_t pre_extension_num
= htab_elements (see_pre_extension_hash
);
2103 "* Phase 3: Eliminate globally redundant extensions. *\n");
2105 /* Initialize the global sbitmap vectors. */
2106 transp
= sbitmap_vector_alloc (last_bb
, pre_extension_num
);
2107 comp
= sbitmap_vector_alloc (last_bb
, pre_extension_num
);
2108 antloc
= sbitmap_vector_alloc (last_bb
, pre_extension_num
);
2109 ae_kill
= sbitmap_vector_alloc (last_bb
, pre_extension_num
);
2110 sbitmap_vector_ones (transp
, last_bb
);
2111 sbitmap_vector_zero (comp
, last_bb
);
2112 sbitmap_vector_zero (antloc
, last_bb
);
2113 sbitmap_vector_zero (ae_kill
, last_bb
);
2115 /* Traverse over all the splay trees of the basic blocks. */
2116 for (i
= 0; i
< last_bb
; i
++)
2118 if (see_bb_splay_ar
[i
])
2120 /* Traverse over all the references in the basic block in forward
2122 splay_tree_foreach (see_bb_splay_ar
[i
],
2123 see_analyze_ref_local_prop
, NULL
);
2127 /* Add fake exit edges before running the lcm. */
2128 add_noreturn_fake_exit_edges ();
2131 edge_list
= pre_edge_lcm (pre_extension_num
, transp
, comp
, antloc
,
2132 ae_kill
, &pre_insert_map
, &pre_delete_map
);
2134 /* Remove the fake edges. */
2135 remove_fake_exit_edges ();
2139 /* Phase 2 implementation: Merge and eliminate locally redundant extensions. */
2141 /* In this function we set the register properties for the register that is
2142 defined and extended in the reference.
2143 The properties are defined in see_register_properties structure which is
2144 allocated per basic block and per register.
2145 Later the extension is inserted into the see_pre_extension_hash for the next
2146 phase of the optimization.
2148 This is a subroutine of see_handle_extensions_for_one_ref called
2151 SLOT contains the current def extension instruction.
2152 B is the see_ref_s structure pointer. */
2155 see_set_prop_merged_def (void **slot
, void *b
)
2158 struct see_ref_s
*curr_ref_s
= (struct see_ref_s
*) b
;
2159 rtx insn
= curr_ref_s
->insn
;
2160 rtx dest_extension_reg
= see_get_extension_reg (def_se
, 1);
2161 htab_t curr_bb_hash
;
2162 struct see_register_properties
*curr_prop
= NULL
;
2163 struct see_register_properties
**slot_prop
;
2164 struct see_register_properties temp_prop
;
2165 int ref_luid
= DF_INSN_LUID (df
, insn
);
2167 curr_bb_hash
= see_bb_hash_ar
[BLOCK_NUM (curr_ref_s
->insn
)];
2170 /* The hash doesn't exist yet. Create it. */
2171 curr_bb_hash
= htab_create (10,
2172 hash_descriptor_properties
,
2173 eq_descriptor_properties
,
2174 hash_del_properties
);
2175 see_bb_hash_ar
[BLOCK_NUM (curr_ref_s
->insn
)] = curr_bb_hash
;
2178 /* Find the right register properties in the right basic block. */
2179 temp_prop
.regno
= REGNO (dest_extension_reg
);
2181 (struct see_register_properties
**) htab_find_slot (curr_bb_hash
,
2182 &temp_prop
, INSERT
);
2184 if (slot_prop
&& *slot_prop
!= NULL
)
2186 /* Property already exists. */
2187 curr_prop
= *slot_prop
;
2188 gcc_assert (curr_prop
->regno
== REGNO (dest_extension_reg
));
2190 curr_prop
->last_def
= ref_luid
;
2191 curr_prop
->first_se_after_last_def
= ref_luid
;
2195 /* Property doesn't exist yet. */
2196 curr_prop
= xmalloc (sizeof (struct see_register_properties
));
2197 curr_prop
->regno
= REGNO (dest_extension_reg
);
2198 curr_prop
->last_def
= ref_luid
;
2199 curr_prop
->first_se_before_any_def
= -1;
2200 curr_prop
->first_se_after_last_def
= ref_luid
;
2201 *slot_prop
= curr_prop
;
2204 /* Insert the def_se into see_pre_extension_hash if it isn't already
2206 see_seek_pre_extension_expr (def_se
, DEF_EXTENSION
);
2212 /* In this function we set the register properties for the register that is
2213 defined but not extended in the reference.
2214 The properties are defined in see_register_properties structure which is
2215 allocated per basic block and per register.
2216 Later the extension is inserted into the see_pre_extension_hash for the next
2217 phase of the optimization.
2219 This is a subroutine of see_handle_extensions_for_one_ref called
2222 SLOT contains the current def extension instruction.
2223 B is the see_ref_s structure pointer. */
2226 see_set_prop_unmerged_def (void **slot
, void *b
)
2229 struct see_ref_s
*curr_ref_s
= (struct see_ref_s
*) b
;
2230 rtx insn
= curr_ref_s
->insn
;
2231 rtx dest_extension_reg
= see_get_extension_reg (def_se
, 1);
2232 htab_t curr_bb_hash
;
2233 struct see_register_properties
*curr_prop
= NULL
;
2234 struct see_register_properties
**slot_prop
;
2235 struct see_register_properties temp_prop
;
2236 int ref_luid
= DF_INSN_LUID (df
, insn
);
2238 curr_bb_hash
= see_bb_hash_ar
[BLOCK_NUM (curr_ref_s
->insn
)];
2241 /* The hash doesn't exist yet. Create it. */
2242 curr_bb_hash
= htab_create (10,
2243 hash_descriptor_properties
,
2244 eq_descriptor_properties
,
2245 hash_del_properties
);
2246 see_bb_hash_ar
[BLOCK_NUM (curr_ref_s
->insn
)] = curr_bb_hash
;
2249 /* Find the right register properties in the right basic block. */
2250 temp_prop
.regno
= REGNO (dest_extension_reg
);
2252 (struct see_register_properties
**) htab_find_slot (curr_bb_hash
,
2253 &temp_prop
, INSERT
);
2255 if (slot_prop
&& *slot_prop
!= NULL
)
2257 /* Property already exists. */
2258 curr_prop
= *slot_prop
;
2259 gcc_assert (curr_prop
->regno
== REGNO (dest_extension_reg
));
2261 curr_prop
->last_def
= ref_luid
;
2262 curr_prop
->first_se_after_last_def
= -1;
2266 /* Property doesn't exist yet. */
2267 curr_prop
= xmalloc (sizeof (struct see_register_properties
));
2268 curr_prop
->regno
= REGNO (dest_extension_reg
);
2269 curr_prop
->last_def
= ref_luid
;
2270 curr_prop
->first_se_before_any_def
= -1;
2271 curr_prop
->first_se_after_last_def
= -1;
2272 *slot_prop
= curr_prop
;
2275 /* Insert the def_se into see_pre_extension_hash if it isn't already
2277 see_seek_pre_extension_expr (def_se
, DEF_EXTENSION
);
2283 /* In this function we set the register properties for the register that is used
2285 The properties are defined in see_register_properties structure which is
2286 allocated per basic block and per register.
2287 When a redundant use extension is found it is removed from the hash of the
2289 If the extension is non redundant it is inserted into the
2290 see_pre_extension_hash for the next phase of the optimization.
2292 This is a subroutine of see_handle_extensions_for_one_ref called
2295 SLOT contains the current use extension instruction.
2296 B is the see_ref_s structure pointer. */
2299 see_set_prop_unmerged_use (void **slot
, void *b
)
2302 struct see_ref_s
*curr_ref_s
= (struct see_ref_s
*) b
;
2303 rtx insn
= curr_ref_s
->insn
;
2304 rtx dest_extension_reg
= see_get_extension_reg (use_se
, 1);
2305 htab_t curr_bb_hash
;
2306 struct see_register_properties
*curr_prop
= NULL
;
2307 struct see_register_properties
**slot_prop
;
2308 struct see_register_properties temp_prop
;
2309 bool locally_redundant
= false;
2310 int ref_luid
= DF_INSN_LUID (df
, insn
);
2312 curr_bb_hash
= see_bb_hash_ar
[BLOCK_NUM (curr_ref_s
->insn
)];
2315 /* The hash doesn't exist yet. Create it. */
2316 curr_bb_hash
= htab_create (10,
2317 hash_descriptor_properties
,
2318 eq_descriptor_properties
,
2319 hash_del_properties
);
2320 see_bb_hash_ar
[BLOCK_NUM (curr_ref_s
->insn
)] = curr_bb_hash
;
2323 /* Find the right register properties in the right basic block. */
2324 temp_prop
.regno
= REGNO (dest_extension_reg
);
2326 (struct see_register_properties
**) htab_find_slot (curr_bb_hash
,
2327 &temp_prop
, INSERT
);
2329 if (slot_prop
&& *slot_prop
!= NULL
)
2331 /* Property already exists. */
2332 curr_prop
= *slot_prop
;
2333 gcc_assert (curr_prop
->regno
== REGNO (dest_extension_reg
));
2336 if (curr_prop
->last_def
< 0 && curr_prop
->first_se_before_any_def
< 0)
2337 curr_prop
->first_se_before_any_def
= ref_luid
;
2338 else if (curr_prop
->last_def
< 0
2339 && curr_prop
->first_se_before_any_def
>= 0)
2341 /* In this case the extension is locally redundant. */
2342 htab_clear_slot (curr_ref_s
->use_se_hash
, (PTR
*)slot
);
2343 locally_redundant
= true;
2345 else if (curr_prop
->last_def
>= 0
2346 && curr_prop
->first_se_after_last_def
< 0)
2347 curr_prop
->first_se_after_last_def
= ref_luid
;
2348 else if (curr_prop
->last_def
>= 0
2349 && curr_prop
->first_se_after_last_def
>= 0)
2351 /* In this case the extension is locally redundant. */
2352 htab_clear_slot (curr_ref_s
->use_se_hash
, (PTR
*)slot
);
2353 locally_redundant
= true;
2360 /* Property doesn't exist yet. Create a new one. */
2361 curr_prop
= xmalloc (sizeof (struct see_register_properties
));
2362 curr_prop
->regno
= REGNO (dest_extension_reg
);
2363 curr_prop
->last_def
= -1;
2364 curr_prop
->first_se_before_any_def
= ref_luid
;
2365 curr_prop
->first_se_after_last_def
= -1;
2366 *slot_prop
= curr_prop
;
2369 /* Insert the use_se into see_pre_extension_hash if it isn't already
2371 if (!locally_redundant
)
2372 see_seek_pre_extension_expr (use_se
, USE_EXTENSION
);
2373 if (locally_redundant
&& dump_file
)
2375 fprintf (dump_file
, "Locally redundant extension:\n");
2376 print_rtl_single (dump_file
, use_se
);
2382 /* Print an extension instruction.
2384 This is a subroutine of see_handle_extensions_for_one_ref called
2386 SLOT contains the extension instruction. */
2389 see_print_one_extension (void **slot
, void *b ATTRIBUTE_UNUSED
)
2393 gcc_assert (def_se
&& INSN_P (def_se
));
2394 print_rtl_single (dump_file
, def_se
);
2399 /* Function called by note_uses to replace used subexpressions.
2401 X is a pointer to the subexpression and DATA is a pointer to a
2402 see_replace_data structure that contains the data for the replacement. */
2405 see_replace_src (rtx
*x
, void *data
)
2407 struct see_replace_data
*d
2408 = (struct see_replace_data
*) data
;
2410 *x
= replace_rtx (*x
, d
->from
, d
->to
);
2414 /* At this point the pattern is expected to be:
2416 ref: set (dest_reg) (rhs)
2417 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2419 The merge of these two instructions didn't succeed.
2421 We try to generate the pattern:
2422 set (subreg (dest_extension_reg)) (rhs)
2424 We do this in 4 steps:
2425 a. Replace every use of dest_reg with a new pseudo register.
2426 b. Replace every instance of dest_reg with the subreg.
2427 c. Replace every use of the new pseudo register back to dest_reg.
2428 d. Try to recognize and simplify.
2430 If the manipulation failed, leave the original ref but try to generate and
2431 recognize a simple move instruction:
2432 set (subreg (dest_extension_reg)) (dest_reg)
2433 This move instruction will be emitted right after the ref to the instruction
2434 stream and assure the correctness of the code after def_se will be removed.
2436 CURR_REF_S is the current reference.
2437 DEF_SE is the extension that couldn't be merged. */
2440 see_def_extension_not_merged (struct see_ref_s
*curr_ref_s
, rtx def_se
)
2442 struct see_replace_data d
;
2443 /* If the original insn was already merged with an extension before,
2444 take the merged one. */
2445 rtx ref
= (curr_ref_s
->merged_insn
) ? curr_ref_s
->merged_insn
:
2447 rtx merged_ref_next
= (curr_ref_s
->merged_insn
) ?
2448 NEXT_INSN (curr_ref_s
->merged_insn
): NULL_RTX
;
2449 rtx ref_copy
= copy_rtx (ref
);
2450 rtx source_extension_reg
= see_get_extension_reg (def_se
, 0);
2451 rtx dest_extension_reg
= see_get_extension_reg (def_se
, 1);
2452 rtx move_insn
= NULL
;
2454 rtx dest_reg
, dest_real_reg
;
2455 rtx new_pseudo_reg
, subreg
;
2456 enum machine_mode source_extension_mode
= GET_MODE (source_extension_reg
);
2457 enum machine_mode dest_mode
;
2459 set
= single_set (def_se
);
2461 rhs
= SET_SRC (set
);
2462 gcc_assert (GET_CODE (rhs
) == SIGN_EXTEND
2463 || GET_CODE (rhs
) == ZERO_EXTEND
);
2464 dest_reg
= XEXP (rhs
, 0);
2465 gcc_assert (REG_P (dest_reg
)
2466 || (GET_CODE (dest_reg
) == SUBREG
2467 && REG_P (SUBREG_REG (dest_reg
))));
2468 dest_real_reg
= REG_P (dest_reg
) ? dest_reg
: SUBREG_REG (dest_reg
);
2469 dest_mode
= GET_MODE (dest_reg
);
2471 subreg
= gen_lowpart_SUBREG (dest_mode
, dest_extension_reg
);
2472 new_pseudo_reg
= gen_reg_rtx (source_extension_mode
);
2474 /* Step a: Replace every use of dest_real_reg with a new pseudo register. */
2475 d
.from
= dest_real_reg
;
2476 d
.to
= new_pseudo_reg
;
2477 note_uses (&PATTERN (ref_copy
), see_replace_src
, &d
);
2478 /* Step b: Replace every instance of dest_reg with the subreg. */
2479 ref_copy
= replace_rtx (ref_copy
, dest_reg
, subreg
);
2481 /* Step c: Replace every use of the new pseudo register back to
2483 d
.from
= new_pseudo_reg
;
2484 d
.to
= dest_real_reg
;
2485 note_uses (&PATTERN (ref_copy
), see_replace_src
, &d
);
2487 if (rtx_equal_p (PATTERN (ref
), PATTERN (ref_copy
))
2488 || insn_invalid_p (ref_copy
))
2490 /* The manipulation failed. */
2492 /* Create a new copy. */
2493 ref_copy
= copy_rtx (ref
);
2495 /* Create a simple move instruction that will replace the def_se. */
2497 emit_move_insn (subreg
, dest_reg
);
2498 move_insn
= get_insns ();
2501 /* Link the manipulated instruction to the newly created move instruction
2502 and to the former created move instructions. */
2503 PREV_INSN (ref_copy
) = NULL_RTX
;
2504 NEXT_INSN (ref_copy
) = move_insn
;
2505 PREV_INSN (move_insn
) = ref_copy
;
2506 NEXT_INSN (move_insn
) = merged_ref_next
;
2507 if (merged_ref_next
!= NULL_RTX
)
2508 PREV_INSN (merged_ref_next
) = move_insn
;
2509 curr_ref_s
->merged_insn
= ref_copy
;
2513 fprintf (dump_file
, "Following def merge failure a move ");
2514 fprintf (dump_file
, "insn was added after the ref.\n");
2515 fprintf (dump_file
, "Original ref:\n");
2516 print_rtl_single (dump_file
, ref
);
2517 fprintf (dump_file
, "Move insn that was added:\n");
2518 print_rtl_single (dump_file
, move_insn
);
2523 /* The manipulation succeeded. Store the new manipulated reference. */
2525 /* Try to simplify the new manipulated insn. */
2526 validate_simplify_insn (ref_copy
);
2528 /* Create a simple move instruction to assure the correctness of the code. */
2530 emit_move_insn (dest_reg
, subreg
);
2531 move_insn
= get_insns ();
2534 /* Link the manipulated instruction to the newly created move instruction and
2535 to the former created move instructions. */
2536 PREV_INSN (ref_copy
) = NULL_RTX
;
2537 NEXT_INSN (ref_copy
) = move_insn
;
2538 PREV_INSN (move_insn
) = ref_copy
;
2539 NEXT_INSN (move_insn
) = merged_ref_next
;
2540 if (merged_ref_next
!= NULL_RTX
)
2541 PREV_INSN (merged_ref_next
) = move_insn
;
2542 curr_ref_s
->merged_insn
= ref_copy
;
2546 fprintf (dump_file
, "Following merge failure the ref was transformed!\n");
2547 fprintf (dump_file
, "Original ref:\n");
2548 print_rtl_single (dump_file
, ref
);
2549 fprintf (dump_file
, "Transformed ref:\n");
2550 print_rtl_single (dump_file
, ref_copy
);
2551 fprintf (dump_file
, "Move insn that was added:\n");
2552 print_rtl_single (dump_file
, move_insn
);
2557 /* Merge the reference instruction (ref) with the current use extension.
2559 use_se extends a NARROWmode register to a WIDEmode register.
2560 ref uses the WIDEmode register.
2562 The pattern we try to merge is this:
2563 use_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2564 ref: use (dest_extension_reg)
2566 where dest_extension_reg and source_extension_reg can be subregs.
2568 The merge is done by generating, simplifying and recognizing the pattern:
2569 use (sign/zero_extend (source_extension_reg))
2571 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2572 we don't try to merge it with use_se and we continue as if the merge failed.
2574 This is a subroutine of see_handle_extensions_for_one_ref called
2576 SLOT contains the current use extension instruction.
2577 B is the see_ref_s structure pointer. */
2580 see_merge_one_use_extension (void **slot
, void *b
)
2582 struct see_ref_s
*curr_ref_s
= (struct see_ref_s
*) b
;
2584 rtx ref
= (curr_ref_s
->merged_insn
) ? curr_ref_s
->merged_insn
:
2586 rtx merged_ref_next
= (curr_ref_s
->merged_insn
) ?
2587 NEXT_INSN (curr_ref_s
->merged_insn
): NULL_RTX
;
2588 rtx ref_copy
= copy_rtx (ref
);
2589 rtx extension_set
= single_set (use_se
);
2590 rtx extension_rhs
= NULL
;
2591 rtx dest_extension_reg
= see_get_extension_reg (use_se
, 1);
2593 rtx simplified_note
= NULL
;
2595 gcc_assert (use_se
&& curr_ref_s
&& extension_set
);
2597 extension_rhs
= SET_SRC (extension_set
);
2599 /* In REG_EQUIV and REG_EQUAL notes that mention the register we need to
2600 replace the uses of the dest_extension_reg with the rhs of the extension
2601 instruction. This is necessary since there might not be an extension in
2602 the path between the definition and the note when this optimization is
2604 note
= find_reg_equal_equiv_note (ref_copy
);
2607 simplified_note
= simplify_replace_rtx (XEXP (note
, 0),
2610 if (rtx_equal_p (XEXP (note
, 0), simplified_note
))
2611 /* Replacement failed. Remove the note. */
2612 remove_note (ref_copy
, note
);
2614 XEXP (note
, 0) = simplified_note
;
2617 if (!see_want_to_be_merged_with_extension (ref
, use_se
, USE_EXTENSION
))
2619 /* The use in the reference is too simple. Don't try to merge. */
2622 fprintf (dump_file
, "Use merge skipped!\n");
2623 fprintf (dump_file
, "Original instructions:\n");
2624 print_rtl_single (dump_file
, use_se
);
2625 print_rtl_single (dump_file
, ref
);
2627 /* Don't remove the current use_se from the use_se_hash and continue to
2628 the next extension. */
2632 validate_replace_src_group (dest_extension_reg
, extension_rhs
, ref_copy
);
2634 if (!num_changes_pending ())
2635 /* In this case this is not a real use (the only use is/was in the notes
2636 list). Remove the use extension from the hash. This will prevent it
2637 from been emitted in the first place. */
2641 fprintf (dump_file
, "Use extension not necessary before:\n");
2642 print_rtl_single (dump_file
, ref
);
2644 htab_clear_slot (curr_ref_s
->use_se_hash
, (PTR
*)slot
);
2645 PREV_INSN (ref_copy
) = NULL_RTX
;
2646 NEXT_INSN (ref_copy
) = merged_ref_next
;
2647 if (merged_ref_next
!= NULL_RTX
)
2648 PREV_INSN (merged_ref_next
) = ref_copy
;
2649 curr_ref_s
->merged_insn
= ref_copy
;
2653 if (!apply_change_group ())
2655 /* The merge failed. */
2658 fprintf (dump_file
, "Use merge failed!\n");
2659 fprintf (dump_file
, "Original instructions:\n");
2660 print_rtl_single (dump_file
, use_se
);
2661 print_rtl_single (dump_file
, ref
);
2663 /* Don't remove the current use_se from the use_se_hash and continue to
2664 the next extension. */
2668 /* The merge succeeded! */
2670 /* Try to simplify the new merged insn. */
2671 validate_simplify_insn (ref_copy
);
2673 PREV_INSN (ref_copy
) = NULL_RTX
;
2674 NEXT_INSN (ref_copy
) = merged_ref_next
;
2675 if (merged_ref_next
!= NULL_RTX
)
2676 PREV_INSN (merged_ref_next
) = ref_copy
;
2677 curr_ref_s
->merged_insn
= ref_copy
;
2681 fprintf (dump_file
, "Use merge succeeded!\n");
2682 fprintf (dump_file
, "Original instructions:\n");
2683 print_rtl_single (dump_file
, use_se
);
2684 print_rtl_single (dump_file
, ref
);
2685 fprintf (dump_file
, "Merged instruction:\n");
2686 print_rtl_single (dump_file
, ref_copy
);
2689 /* Remove the current use_se from the use_se_hash. This will prevent it from
2690 been emitted in the first place. */
2691 htab_clear_slot (curr_ref_s
->use_se_hash
, (PTR
*)slot
);
2696 /* Merge the reference instruction (ref) with the extension that follows it
2697 in the same basic block (def_se).
2698 ref sets a NARROWmode register and def_se extends it to WIDEmode register.
2700 The pattern we try to merge is this:
2701 ref: set (dest_reg) (rhs)
2702 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2704 where dest_reg and source_extension_reg can both be subregs (together)
2705 and (REGNO (dest_reg) == REGNO (source_extension_reg))
2707 The merge is done by generating, simplifying and recognizing the pattern:
2708 set (dest_extension_reg) (sign/zero_extend (rhs))
2709 If ref is a parallel instruction we just replace the relevant set in it.
2711 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2712 we don't try to merge it with def_se and we continue as if the merge failed.
2714 This is a subroutine of see_handle_extensions_for_one_ref called
2717 SLOT contains the current def extension instruction.
2718 B is the see_ref_s structure pointer. */
2721 see_merge_one_def_extension (void **slot
, void *b
)
2723 struct see_ref_s
*curr_ref_s
= (struct see_ref_s
*) b
;
2725 /* If the original insn was already merged with an extension before,
2726 take the merged one. */
2727 rtx ref
= (curr_ref_s
->merged_insn
) ? curr_ref_s
->merged_insn
:
2729 rtx merged_ref_next
= (curr_ref_s
->merged_insn
) ?
2730 NEXT_INSN (curr_ref_s
->merged_insn
): NULL_RTX
;
2731 rtx ref_copy
= copy_rtx (ref
);
2733 rtx source_extension_reg
= see_get_extension_reg (def_se
, 0);
2734 rtx dest_extension_reg
= see_get_extension_reg (def_se
, 1);
2735 rtx move_insn
, *rtx_slot
, subreg
;
2736 rtx temp_extension
= NULL
;
2737 rtx simplified_temp_extension
= NULL
;
2740 enum rtx_code extension_code
;
2741 enum machine_mode source_extension_mode
;
2742 enum machine_mode source_mode
;
2743 enum machine_mode dest_extension_mode
;
2744 bool merge_success
= false;
2753 if (!see_want_to_be_merged_with_extension (ref
, def_se
, DEF_EXTENSION
))
2755 /* The definition in the reference is too simple. Don't try to merge. */
2758 fprintf (dump_file
, "Def merge skipped!\n");
2759 fprintf (dump_file
, "Original instructions:\n");
2760 print_rtl_single (dump_file
, ref
);
2761 print_rtl_single (dump_file
, def_se
);
2764 see_def_extension_not_merged (curr_ref_s
, def_se
);
2765 /* Continue to the next extension. */
2769 extension_code
= see_get_extension_data (def_se
, &source_mode
);
2771 /* Try to merge and simplify the extension. */
2772 source_extension_mode
= GET_MODE (source_extension_reg
);
2773 dest_extension_mode
= GET_MODE (dest_extension_reg
);
2775 pat
= &PATTERN (ref_copy
);
2776 code
= GET_CODE (*pat
);
2778 if (code
== PARALLEL
)
2780 bool need_to_apply_change
= false;
2782 for (i
= 0; i
< XVECLEN (*pat
, 0); i
++)
2784 rtx
*sub
= &XVECEXP (*pat
, 0, i
);
2786 if (GET_CODE (*sub
) == SET
2787 && GET_MODE (SET_SRC (*sub
)) != VOIDmode
2788 && GET_MODE (SET_DEST (*sub
)) == source_mode
2789 && ((REG_P (SET_DEST (*sub
))
2790 && REGNO (SET_DEST (*sub
)) == REGNO (source_extension_reg
))
2791 || (GET_CODE (SET_DEST (*sub
)) == SUBREG
2792 && REG_P (SUBREG_REG (SET_DEST (*sub
)))
2793 && (REGNO (SUBREG_REG (SET_DEST (*sub
))) ==
2794 REGNO (source_extension_reg
)))))
2796 rtx orig_src
= SET_SRC (*sub
);
2798 if (extension_code
== SIGN_EXTEND
)
2799 temp_extension
= gen_rtx_SIGN_EXTEND (dest_extension_mode
,
2802 temp_extension
= gen_rtx_ZERO_EXTEND (dest_extension_mode
,
2804 simplified_temp_extension
= simplify_rtx (temp_extension
);
2806 (simplified_temp_extension
) ? simplified_temp_extension
:
2808 new_set
= gen_rtx_SET (VOIDmode
, dest_extension_reg
,
2810 validate_change (ref_copy
, sub
, new_set
, 1);
2811 need_to_apply_change
= true;
2814 if (need_to_apply_change
)
2815 if (apply_change_group ())
2816 merge_success
= true;
2818 else if (code
== SET
2819 && GET_MODE (SET_SRC (*pat
)) != VOIDmode
2820 && GET_MODE (SET_DEST (*pat
)) == source_mode
2821 && ((REG_P (SET_DEST (*pat
))
2822 && REGNO (SET_DEST (*pat
)) == REGNO (source_extension_reg
))
2823 || (GET_CODE (SET_DEST (*pat
)) == SUBREG
2824 && REG_P (SUBREG_REG (SET_DEST (*pat
)))
2825 && (REGNO (SUBREG_REG (SET_DEST (*pat
))) ==
2826 REGNO (source_extension_reg
)))))
2828 rtx orig_src
= SET_SRC (*pat
);
2830 if (extension_code
== SIGN_EXTEND
)
2831 temp_extension
= gen_rtx_SIGN_EXTEND (dest_extension_mode
, orig_src
);
2833 temp_extension
= gen_rtx_ZERO_EXTEND (dest_extension_mode
, orig_src
);
2834 simplified_temp_extension
= simplify_rtx (temp_extension
);
2835 temp_extension
= (simplified_temp_extension
) ? simplified_temp_extension
:
2837 new_set
= gen_rtx_SET (VOIDmode
, dest_extension_reg
, temp_extension
);
2838 if (validate_change (ref_copy
, pat
, new_set
, 0))
2839 merge_success
= true;
2843 /* The merge failed. */
2846 fprintf (dump_file
, "Def merge failed!\n");
2847 fprintf (dump_file
, "Original instructions:\n");
2848 print_rtl_single (dump_file
, ref
);
2849 print_rtl_single (dump_file
, def_se
);
2852 see_def_extension_not_merged (curr_ref_s
, def_se
);
2853 /* Continue to the next extension. */
2857 /* The merge succeeded! */
2859 /* Create a simple move instruction to assure the correctness of the code. */
2860 subreg
= gen_lowpart_SUBREG (source_extension_mode
, dest_extension_reg
);
2862 emit_move_insn (source_extension_reg
, subreg
);
2863 move_insn
= get_insns ();
2866 /* Link the merged instruction to the newly created move instruction and
2867 to the former created move instructions. */
2868 PREV_INSN (ref_copy
) = NULL_RTX
;
2869 NEXT_INSN (ref_copy
) = move_insn
;
2870 PREV_INSN (move_insn
) = ref_copy
;
2871 NEXT_INSN (move_insn
) = merged_ref_next
;
2872 if (merged_ref_next
!= NULL_RTX
)
2873 PREV_INSN (merged_ref_next
) = move_insn
;
2874 curr_ref_s
->merged_insn
= ref_copy
;
2878 fprintf (dump_file
, "Def merge succeeded!\n");
2879 fprintf (dump_file
, "Original instructions:\n");
2880 print_rtl_single (dump_file
, ref
);
2881 print_rtl_single (dump_file
, def_se
);
2882 fprintf (dump_file
, "Merged instruction:\n");
2883 print_rtl_single (dump_file
, ref_copy
);
2884 fprintf (dump_file
, "Move instruction that was added:\n");
2885 print_rtl_single (dump_file
, move_insn
);
2888 /* Remove the current def_se from the unmerged_def_se_hash and insert it to
2889 the merged_def_se_hash. */
2890 htab_clear_slot (curr_ref_s
->unmerged_def_se_hash
, (PTR
*)slot
);
2891 if (!curr_ref_s
->merged_def_se_hash
)
2892 curr_ref_s
->merged_def_se_hash
= htab_create (10,
2893 hash_descriptor_extension
,
2894 eq_descriptor_extension
,
2896 rtx_slot
= (rtx
*) htab_find_slot (curr_ref_s
->merged_def_se_hash
,
2897 dest_extension_reg
, INSERT
);
2898 gcc_assert (*rtx_slot
== NULL
);
2905 /* Try to eliminate extensions in this order:
2906 a. Try to merge only the def extensions, one by one.
2907 b. Try to merge only the use extensions, one by one.
2910 Try to merge any couple of use extensions simultaneously.
2911 Try to merge any def extension with one or two uses extensions
2914 After all the merges are done, update the register properties for the basic
2915 block and eliminate locally redundant use extensions.
2917 This is a subroutine of see_merge_and_eliminate_extensions called
2918 via splay_tree_foreach.
2919 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2920 see_ref_s structure. */
2923 see_handle_extensions_for_one_ref (splay_tree_node stn
,
2924 void *data ATTRIBUTE_UNUSED
)
2926 htab_t use_se_hash
= ((struct see_ref_s
*) (stn
->value
))->use_se_hash
;
2927 htab_t unmerged_def_se_hash
=
2928 ((struct see_ref_s
*) (stn
->value
))->unmerged_def_se_hash
;
2929 htab_t merged_def_se_hash
;
2930 rtx ref
= ((struct see_ref_s
*) (stn
->value
))->insn
;
2934 fprintf (dump_file
, "Handling ref:\n");
2935 print_rtl_single (dump_file
, ref
);
2938 /* a. Try to eliminate only def extensions, one by one. */
2939 if (unmerged_def_se_hash
)
2940 htab_traverse_noresize (unmerged_def_se_hash
, see_merge_one_def_extension
,
2941 (PTR
) (stn
->value
));
2944 /* b. Try to eliminate only use extensions, one by one. */
2945 htab_traverse_noresize (use_se_hash
, see_merge_one_use_extension
,
2946 (PTR
) (stn
->value
));
2948 merged_def_se_hash
= ((struct see_ref_s
*) (stn
->value
))->merged_def_se_hash
;
2952 fprintf (dump_file
, "The hashes of the current reference:\n");
2953 if (unmerged_def_se_hash
)
2955 fprintf (dump_file
, "unmerged_def_se_hash:\n");
2956 htab_traverse (unmerged_def_se_hash
, see_print_one_extension
, NULL
);
2958 if (merged_def_se_hash
)
2960 fprintf (dump_file
, "merged_def_se_hash:\n");
2961 htab_traverse (merged_def_se_hash
, see_print_one_extension
, NULL
);
2965 fprintf (dump_file
, "use_se_hash:\n");
2966 htab_traverse (use_se_hash
, see_print_one_extension
, NULL
);
2970 /* Now that all the merges are done, update the register properties of the
2971 basic block and eliminate locally redundant extensions.
2972 It is important that we first traverse the use extensions hash and
2973 afterwards the def extensions hashes. */
2976 htab_traverse_noresize (use_se_hash
, see_set_prop_unmerged_use
,
2977 (PTR
) (stn
->value
));
2979 if (unmerged_def_se_hash
)
2980 htab_traverse (unmerged_def_se_hash
, see_set_prop_unmerged_def
,
2981 (PTR
) (stn
->value
));
2983 if (merged_def_se_hash
)
2984 htab_traverse (merged_def_se_hash
, see_set_prop_merged_def
,
2985 (PTR
) (stn
->value
));
2987 /* Continue to the next definition. */
2992 /* Phase 2 top level function.
2993 In this phase, we try to merge def extensions and use extensions with their
2994 references, and eliminate redundant extensions in the same basic block.
2995 We also gather information for the next phases. */
2998 see_merge_and_eliminate_extensions (void)
3004 "* Phase 2: Merge and eliminate locally redundant extensions. *\n");
3006 /* Traverse over all the splay trees of the basic blocks. */
3007 for (i
= 0; i
< last_bb
; i
++)
3009 if (see_bb_splay_ar
[i
])
3012 fprintf (dump_file
, "Handling references for bb %d\n", i
);
3013 /* Traverse over all the references in the basic block in forward
3015 splay_tree_foreach (see_bb_splay_ar
[i
],
3016 see_handle_extensions_for_one_ref
, NULL
);
3022 /* Phase 1 implementation: Propagate extensions to uses. */
3024 /* Insert REF_INSN into the splay tree of its basic block.
3025 SE_INSN is the extension to store in the proper hash according to TYPE.
3027 Return true if everything went well.
3028 Otherwise, return false (this will cause the optimization to be aborted). */
3031 see_store_reference_and_extension (rtx ref_insn
, rtx se_insn
,
3032 enum extension_type type
)
3036 splay_tree_node stn
= NULL
;
3037 htab_t se_hash
= NULL
;
3038 struct see_ref_s
*ref_s
= NULL
;
3040 /* Check the arguments. */
3041 gcc_assert (ref_insn
&& se_insn
);
3042 if (!see_bb_splay_ar
)
3045 curr_bb_num
= BLOCK_NUM (ref_insn
);
3046 gcc_assert (curr_bb_num
< last_bb
&& curr_bb_num
>= 0);
3048 /* Insert the reference to the splay tree of its basic block. */
3049 if (!see_bb_splay_ar
[curr_bb_num
])
3050 /* The splay tree for this block doesn't exist yet, create it. */
3051 see_bb_splay_ar
[curr_bb_num
] = splay_tree_new (splay_tree_compare_ints
,
3052 NULL
, see_free_ref_s
);
3054 /* Splay tree already exists, check if the current reference is already
3057 stn
= splay_tree_lookup (see_bb_splay_ar
[curr_bb_num
],
3058 DF_INSN_LUID (df
, ref_insn
));
3062 case EXPLICIT_DEF_EXTENSION
:
3064 ((struct see_ref_s
*) (stn
->value
))->unmerged_def_se_hash
;
3067 se_hash
= htab_create (10,
3068 hash_descriptor_extension
,
3069 eq_descriptor_extension
,
3071 ((struct see_ref_s
*) (stn
->value
))->unmerged_def_se_hash
=
3075 case IMPLICIT_DEF_EXTENSION
:
3076 se_hash
= ((struct see_ref_s
*) (stn
->value
))->merged_def_se_hash
;
3079 se_hash
= htab_create (10,
3080 hash_descriptor_extension
,
3081 eq_descriptor_extension
,
3083 ((struct see_ref_s
*) (stn
->value
))->merged_def_se_hash
=
3088 se_hash
= ((struct see_ref_s
*) (stn
->value
))->use_se_hash
;
3091 se_hash
= htab_create (10,
3092 hash_descriptor_extension
,
3093 eq_descriptor_extension
,
3095 ((struct see_ref_s
*) (stn
->value
))->use_se_hash
= se_hash
;
3103 /* Initialize a new see_ref_s structure and insert it to the splay
3107 ref_s
= xmalloc (sizeof (struct see_ref_s
));
3108 ref_s
->luid
= DF_INSN_LUID (df
, ref_insn
);
3109 ref_s
->insn
= ref_insn
;
3110 ref_s
->merged_insn
= NULL
;
3112 /* Initialize the hashes. */
3115 case EXPLICIT_DEF_EXTENSION
:
3116 ref_s
->unmerged_def_se_hash
= htab_create (10,
3117 hash_descriptor_extension
,
3118 eq_descriptor_extension
,
3120 se_hash
= ref_s
->unmerged_def_se_hash
;
3121 ref_s
->merged_def_se_hash
= NULL
;
3122 ref_s
->use_se_hash
= NULL
;
3124 case IMPLICIT_DEF_EXTENSION
:
3125 ref_s
->merged_def_se_hash
= htab_create (10,
3126 hash_descriptor_extension
,
3127 eq_descriptor_extension
,
3129 se_hash
= ref_s
->merged_def_se_hash
;
3130 ref_s
->unmerged_def_se_hash
= NULL
;
3131 ref_s
->use_se_hash
= NULL
;
3134 ref_s
->use_se_hash
= htab_create (10,
3135 hash_descriptor_extension
,
3136 eq_descriptor_extension
,
3138 se_hash
= ref_s
->use_se_hash
;
3139 ref_s
->unmerged_def_se_hash
= NULL
;
3140 ref_s
->merged_def_se_hash
= NULL
;
3147 /* Insert the new extension instruction into the correct se_hash of the
3148 current reference. */
3149 rtx_slot
= (rtx
*) htab_find_slot (se_hash
, se_insn
, INSERT
);
3150 if (*rtx_slot
!= NULL
)
3152 gcc_assert (type
== USE_EXTENSION
);
3153 gcc_assert (rtx_equal_p (PATTERN (*rtx_slot
), PATTERN (se_insn
)));
3156 *rtx_slot
= se_insn
;
3158 /* If this is a new reference, insert it into the splay_tree. */
3160 splay_tree_insert (see_bb_splay_ar
[curr_bb_num
],
3161 DF_INSN_LUID (df
, ref_insn
), (splay_tree_value
) ref_s
);
3166 /* Go over all the defs, for each relevant definition (defined below) store its
3167 instruction as a reference.
3169 A definition is relevant if its root has
3170 ((entry_type == SIGN_EXTENDED_DEF) || (entry_type == ZERO_EXTENDED_DEF)) and
3171 his source_mode is not narrower then the the roots source_mode.
3173 Return the number of relevant defs or negative number if something bad had
3174 happened and the optimization should be aborted. */
3177 see_handle_relevant_defs (void)
3182 rtx ref_insn
= NULL
;
3183 struct web_entry
*root_entry
= NULL
;
3185 int num_relevant_defs
= 0;
3186 enum rtx_code extension_code
;
3188 for (i
= 0; i
< defs_num
; i
++)
3190 insn
= DF_REF_INSN (DF_DEFS_GET (df
, i
));
3191 reg
= DF_REF_REAL_REG (DF_DEFS_GET (df
, i
));
3199 root_entry
= unionfind_root (&def_entry
[i
]);
3201 if (ENTRY_EI (root_entry
)->relevancy
!= SIGN_EXTENDED_DEF
3202 && ENTRY_EI (root_entry
)->relevancy
!= ZERO_EXTENDED_DEF
)
3203 /* The current web is not relevant. Continue to the next def. */
3206 if (root_entry
->reg
)
3207 /* It isn't possible to have two different register for the same
3209 gcc_assert (rtx_equal_p (root_entry
->reg
, reg
));
3211 root_entry
->reg
= reg
;
3213 /* The current definition is an EXTENDED_DEF or a definition that its
3214 source_mode is narrower then its web's source_mode.
3215 This means that we need to generate the implicit extension explicitly
3216 and store it in the current reference's merged_def_se_hash. */
3217 if (ENTRY_EI (&def_entry
[i
])->local_relevancy
== EXTENDED_DEF
3218 || (ENTRY_EI (&def_entry
[i
])->local_source_mode
<
3219 ENTRY_EI (root_entry
)->source_mode
))
3221 num_relevant_defs
++;
3223 if (ENTRY_EI (root_entry
)->relevancy
== SIGN_EXTENDED_DEF
)
3224 extension_code
= SIGN_EXTEND
;
3226 extension_code
= ZERO_EXTEND
;
3229 see_gen_normalized_extension (reg
, extension_code
,
3230 ENTRY_EI (root_entry
)->source_mode
);
3232 /* This is a dummy extension, mark it as deleted. */
3233 INSN_DELETED_P (se_insn
) = 1;
3235 if (!see_store_reference_and_extension (insn
, se_insn
,
3236 IMPLICIT_DEF_EXTENSION
))
3237 /* Something bad happened. Abort the optimization. */
3242 ref_insn
= PREV_INSN (insn
);
3243 gcc_assert (BLOCK_NUM (ref_insn
) == BLOCK_NUM (insn
));
3245 num_relevant_defs
++;
3247 if (!see_store_reference_and_extension (ref_insn
, insn
,
3248 EXPLICIT_DEF_EXTENSION
))
3249 /* Something bad happened. Abort the optimization. */
3252 return num_relevant_defs
;
3256 /* Go over all the uses, for each use in relevant web store its instruction as
3257 a reference and generate an extension before it.
3259 Return the number of relevant uses or negative number if something bad had
3260 happened and the optimization should be aborted. */
3263 see_handle_relevant_uses (void)
3267 struct web_entry
*root_entry
= NULL
;
3270 int num_relevant_uses
= 0;
3271 enum rtx_code extension_code
;
3273 for (i
= 0; i
< uses_num
; i
++)
3275 insn
= DF_REF_INSN (DF_USES_GET (df
, i
));
3276 reg
= DF_REF_REAL_REG (DF_USES_GET (df
, i
));
3284 root_entry
= unionfind_root (&use_entry
[i
]);
3286 if (ENTRY_EI (root_entry
)->relevancy
!= SIGN_EXTENDED_DEF
3287 && ENTRY_EI (root_entry
)->relevancy
!= ZERO_EXTENDED_DEF
)
3288 /* The current web is not relevant. Continue to the next use. */
3291 if (root_entry
->reg
)
3292 /* It isn't possible to have two different register for the same
3294 gcc_assert (rtx_equal_p (root_entry
->reg
, reg
));
3296 root_entry
->reg
= reg
;
3298 /* Generate the use extension. */
3299 if (ENTRY_EI (root_entry
)->relevancy
== SIGN_EXTENDED_DEF
)
3300 extension_code
= SIGN_EXTEND
;
3302 extension_code
= ZERO_EXTEND
;
3305 see_gen_normalized_extension (reg
, extension_code
,
3306 ENTRY_EI (root_entry
)->source_mode
);
3308 /* This is very bad, abort the transformation. */
3311 num_relevant_uses
++;
3313 if (!see_store_reference_and_extension (insn
, se_insn
,
3315 /* Something bad happened. Abort the optimization. */
3319 return num_relevant_uses
;
3323 /* Updates the relevancy of all the uses.
3324 The information of the i'th use is stored in use_entry[i].
3325 Currently all the uses are relevant for the optimization except for uses that
3326 are in LIBCALL or RETVAL instructions. */
3329 see_update_uses_relevancy (void)
3333 struct see_entry_extra_info
*curr_entry_extra_info
;
3337 if (!df
|| !use_entry
)
3340 for (i
= 0; i
< uses_num
; i
++)
3343 insn
= DF_REF_INSN (DF_USES_GET (df
, i
));
3344 reg
= DF_REF_REAL_REG (DF_USES_GET (df
, i
));
3352 if (insn
&& find_reg_note (insn
, REG_LIBCALL
, NULL_RTX
))
3354 if (find_reg_note (insn
, REG_RETVAL
, NULL_RTX
))
3362 fprintf (dump_file
, "u%i insn %i reg %i ",
3363 i
, (insn
? INSN_UID (insn
) : -1), REGNO (reg
));
3364 if (et
== NOT_RELEVANT
)
3365 fprintf (dump_file
, "NOT RELEVANT \n");
3367 fprintf (dump_file
, "RELEVANT USE \n");
3370 curr_entry_extra_info
= xmalloc (sizeof (struct see_entry_extra_info
));
3371 curr_entry_extra_info
->relevancy
= et
;
3372 curr_entry_extra_info
->local_relevancy
= et
;
3373 use_entry
[i
].extra_info
= curr_entry_extra_info
;
3374 use_entry
[i
].reg
= NULL
;
3375 use_entry
[i
].pred
= NULL
;
3380 /* A definition in a candidate for this optimization only if its pattern is
3381 recognized as relevant in this function.
3382 INSN is the instruction to be recognized.
3384 - If this is the pattern of a common sign extension after definition:
3385 PREV_INSN (INSN): def (reg:NARROWmode r)
3386 INSN: set ((reg:WIDEmode r')
3387 (sign_extend:WIDEmode (reg:NARROWmode r)))
3388 return SIGN_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3390 - If this is the pattern of a common zero extension after definition:
3391 PREV_INSN (INSN): def (reg:NARROWmode r)
3392 INSN: set ((reg:WIDEmode r')
3393 (zero_extend:WIDEmode (reg:NARROWmode r)))
3394 return ZERO_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3399 INSN: set ((reg:WIDEmode r) (sign_extend:WIDEmode (...expr...)))
3400 return EXTENDED_DEF and set SOURCE_MODE to the mode of expr.
3403 INSN: set ((reg:WIDEmode r) (zero_extend:WIDEmode (...expr...)))
3404 return EXTENDED_DEF and set SOURCE_MODE_UNSIGNED to the mode of expr.
3407 INSN: set ((reg:WIDEmode r) (CONST_INT (...)))
3408 return EXTENDED_DEF and set SOURCE_MODE(_UNSIGNED) to the narrowest mode that
3409 is implicitly sign(zero) extended to WIDEmode in the INSN.
3411 - FIXME: Extensions that are not adjacent to their definition and EXTENDED_DEF
3412 that is part of a PARALLEL instruction are not handled.
3413 These restriction can be relaxed. */
3415 static enum entry_type
3416 see_analyze_one_def (rtx insn
, enum machine_mode
*source_mode
,
3417 enum machine_mode
*source_mode_unsigned
)
3419 enum rtx_code extension_code
;
3423 rtx source_register
= NULL
;
3424 rtx prev_insn
= NULL
;
3425 rtx next_insn
= NULL
;
3426 enum machine_mode mode
;
3427 enum machine_mode next_source_mode
;
3428 HOST_WIDE_INT val
= 0;
3429 HOST_WIDE_INT val2
= 0;
3432 *source_mode
= MAX_MACHINE_MODE
;
3433 *source_mode_unsigned
= MAX_MACHINE_MODE
;
3436 return NOT_RELEVANT
;
3439 return NOT_RELEVANT
;
3441 extension_code
= see_get_extension_data (insn
, source_mode
);
3442 switch (extension_code
)
3446 source_register
= see_get_extension_reg (insn
, 0);
3447 /* FIXME: This restriction can be relaxed. The only thing that is
3448 important is that the reference would be inside the same basic block
3449 as the extension. */
3450 prev_insn
= PREV_INSN (insn
);
3451 if (!prev_insn
|| !INSN_P (prev_insn
))
3452 return NOT_RELEVANT
;
3454 if (!reg_set_between_p (source_register
, PREV_INSN (prev_insn
), insn
))
3455 return NOT_RELEVANT
;
3457 if (find_reg_note (prev_insn
, REG_LIBCALL
, NULL_RTX
))
3458 return NOT_RELEVANT
;
3460 if (find_reg_note (prev_insn
, REG_RETVAL
, NULL_RTX
))
3461 return NOT_RELEVANT
;
3463 /* If we can't use copy_rtx on the reference it can't be a reference. */
3464 if (GET_CODE (PATTERN (prev_insn
)) == PARALLEL
3465 && asm_noperands (PATTERN (prev_insn
)) >= 0)
3466 return NOT_RELEVANT
;
3468 /* Now, check if this extension is a reference itself. If so, it is not
3469 relevant. Handling this extension as relevant would make things much
3470 more complicated. */
3471 next_insn
= NEXT_INSN (insn
);
3473 && INSN_P (next_insn
)
3474 && (see_get_extension_data (next_insn
, &next_source_mode
) !=
3477 rtx curr_dest_register
= see_get_extension_reg (insn
, 1);
3478 rtx next_source_register
= see_get_extension_reg (next_insn
, 0);
3480 if (REGNO (curr_dest_register
) == REGNO (next_source_register
))
3481 return NOT_RELEVANT
;
3484 if (extension_code
== SIGN_EXTEND
)
3485 return SIGN_EXTENDED_DEF
;
3487 return ZERO_EXTENDED_DEF
;
3490 /* This may still be an EXTENDED_DEF. */
3492 /* FIXME: This restriction can be relaxed. It is possible to handle
3493 PARALLEL insns too. */
3494 set
= single_set (insn
);
3496 return NOT_RELEVANT
;
3497 rhs
= SET_SRC (set
);
3498 lhs
= SET_DEST (set
);
3500 /* Don't handle extensions to something other then register or
3502 if (!REG_P (lhs
) && !SUBREG_REG (lhs
))
3503 return NOT_RELEVANT
;
3505 switch (GET_CODE (rhs
))
3508 *source_mode
= GET_MODE (XEXP (rhs
, 0));
3509 *source_mode_unsigned
= MAX_MACHINE_MODE
;
3510 return EXTENDED_DEF
;
3512 *source_mode
= MAX_MACHINE_MODE
;
3513 *source_mode_unsigned
= GET_MODE (XEXP (rhs
, 0));
3514 return EXTENDED_DEF
;
3519 /* Find the narrowest mode, val could fit into. */
3520 for (mode
= GET_CLASS_NARROWEST_MODE (MODE_INT
), i
= 0;
3521 GET_MODE_BITSIZE (mode
) < BITS_PER_WORD
;
3522 mode
= GET_MODE_WIDER_MODE (mode
), i
++)
3524 val2
= trunc_int_for_mode (val
, mode
);
3525 if (val2
== val
&& *source_mode
== MAX_MACHINE_MODE
)
3526 *source_mode
= mode
;
3527 if (val
== (val
& (HOST_WIDE_INT
)GET_MODE_MASK (mode
))
3528 && *source_mode_unsigned
== MAX_MACHINE_MODE
)
3529 *source_mode_unsigned
= mode
;
3530 if (*source_mode
!= MAX_MACHINE_MODE
3531 && *source_mode_unsigned
!=MAX_MACHINE_MODE
)
3532 return EXTENDED_DEF
;
3534 if (*source_mode
!= MAX_MACHINE_MODE
3535 || *source_mode_unsigned
!=MAX_MACHINE_MODE
)
3536 return EXTENDED_DEF
;
3537 return NOT_RELEVANT
;
3539 return NOT_RELEVANT
;
3547 /* Updates the relevancy and source_mode of all the definitions.
3548 The information of the i'th definition is stored in def_entry[i]. */
3551 see_update_defs_relevancy (void)
3553 struct see_entry_extra_info
*curr_entry_extra_info
;
3558 enum machine_mode source_mode
;
3559 enum machine_mode source_mode_unsigned
;
3561 if (!df
|| !def_entry
)
3564 for (i
= 0; i
< defs_num
; i
++)
3566 insn
= DF_REF_INSN (DF_DEFS_GET (df
, i
));
3567 reg
= DF_REF_REAL_REG (DF_DEFS_GET (df
, i
));
3569 et
= see_analyze_one_def (insn
, &source_mode
, &source_mode_unsigned
);
3571 curr_entry_extra_info
= xmalloc (sizeof (struct see_entry_extra_info
));
3572 curr_entry_extra_info
->relevancy
= et
;
3573 curr_entry_extra_info
->local_relevancy
= et
;
3574 if (et
!= EXTENDED_DEF
)
3576 curr_entry_extra_info
->source_mode
= source_mode
;
3577 curr_entry_extra_info
->local_source_mode
= source_mode
;
3581 curr_entry_extra_info
->source_mode_signed
= source_mode
;
3582 curr_entry_extra_info
->source_mode_unsigned
= source_mode_unsigned
;
3584 def_entry
[i
].extra_info
= curr_entry_extra_info
;
3585 def_entry
[i
].reg
= NULL
;
3586 def_entry
[i
].pred
= NULL
;
3590 if (et
== NOT_RELEVANT
)
3592 fprintf (dump_file
, "d%i insn %i reg %i ",
3593 i
, (insn
? INSN_UID (insn
) : -1), REGNO (reg
));
3594 fprintf (dump_file
, "NOT RELEVANT \n");
3598 fprintf (dump_file
, "d%i insn %i reg %i ",
3599 i
,INSN_UID (insn
), REGNO (reg
));
3600 fprintf (dump_file
, "RELEVANT - ");
3603 case SIGN_EXTENDED_DEF
:
3604 fprintf (dump_file
, "SIGN_EXTENDED_DEF, source_mode = %s\n",
3605 GET_MODE_NAME (source_mode
));
3607 case ZERO_EXTENDED_DEF
:
3608 fprintf (dump_file
, "ZERO_EXTENDED_DEF, source_mode = %s\n",
3609 GET_MODE_NAME (source_mode
));
3612 fprintf (dump_file
, "EXTENDED_DEF, ");
3613 if (source_mode
!= MAX_MACHINE_MODE
3614 && source_mode_unsigned
!= MAX_MACHINE_MODE
)
3616 fprintf (dump_file
, "positive const, ");
3617 fprintf (dump_file
, "source_mode_signed = %s, ",
3618 GET_MODE_NAME (source_mode
));
3619 fprintf (dump_file
, "source_mode_unsigned = %s\n",
3620 GET_MODE_NAME (source_mode_unsigned
));
3622 else if (source_mode
!= MAX_MACHINE_MODE
)
3623 fprintf (dump_file
, "source_mode_signed = %s\n",
3624 GET_MODE_NAME (source_mode
));
3626 fprintf (dump_file
, "source_mode_unsigned = %s\n",
3627 GET_MODE_NAME (source_mode_unsigned
));
3638 /* Phase 1 top level function.
3639 In this phase the relevancy of all the definitions and uses are checked,
3640 later the webs are produces and the extensions are generated.
3641 These extensions are not emitted yet into the insns stream.
3643 returns true if at list one relevant web was found and there were no
3644 problems, otherwise return false. */
3647 see_propagate_extensions_to_uses (void)
3650 int num_relevant_uses
;
3651 int num_relevant_defs
;
3655 "* Phase 1: Propagate extensions to uses. *\n");
3657 /* Update the relevancy of references using the DF object. */
3658 see_update_defs_relevancy ();
3659 see_update_uses_relevancy ();
3661 /* Produce the webs and update the extra_info of the root.
3662 In general, a web is relevant if all its definitions and uses are relevant
3663 and there is at least one definition that was marked as SIGN_EXTENDED_DEF
3664 or ZERO_EXTENDED_DEF. */
3665 for (i
= 0; i
< uses_num
; i
++)
3666 union_defs (df
, DF_USES_GET (df
, i
), def_entry
, use_entry
,
3667 see_update_leader_extra_info
);
3669 /* Generate use extensions for references and insert these
3670 references to see_bb_splay_ar data structure. */
3671 num_relevant_uses
= see_handle_relevant_uses ();
3673 if (num_relevant_uses
< 0)
3676 /* Store the def extensions in their references structures and insert these
3677 references to see_bb_splay_ar data structure. */
3678 num_relevant_defs
= see_handle_relevant_defs ();
3680 if (num_relevant_defs
< 0)
3683 return num_relevant_uses
> 0 || num_relevant_defs
> 0;
3687 /* Main entry point for the sign extension elimination optimization. */
3695 /* Initialize global data structures. */
3696 see_initialize_data_structures ();
3698 /* Phase 1: Propagate extensions to uses. */
3699 cont
= see_propagate_extensions_to_uses ();
3705 /* Phase 2: Merge and eliminate locally redundant extensions. */
3706 see_merge_and_eliminate_extensions ();
3708 /* Phase 3: Eliminate globally redundant extensions. */
3711 /* Phase 4: Commit changes to the insn stream. */
3712 see_commit_changes ();
3716 /* For debug purpose only. */
3717 fprintf (dump_file
, "see_pre_extension_hash:\n");
3718 htab_traverse (see_pre_extension_hash
, see_print_pre_extension_expr
,
3721 for (i
= 0; i
< last_bb
; i
++)
3723 if (see_bb_hash_ar
[i
])
3724 /* Traverse over all the references in the basic block in
3728 "Searching register properties in bb %d\n", i
);
3729 htab_traverse (see_bb_hash_ar
[i
],
3730 see_print_register_properties
, NULL
);
3736 /* Free global data structures. */
3737 see_free_data_structures ();
3742 gate_handle_see (void)
3744 return optimize
> 1 && flag_see
;
3748 rest_of_handle_see (void)
3750 int no_new_pseudos_bcp
= no_new_pseudos
;
3754 no_new_pseudos
= no_new_pseudos_bcp
;
3756 delete_trivially_dead_insns (get_insns (), max_reg_num ());
3757 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES
,
3758 (PROP_DEATH_NOTES
));
3759 cleanup_cfg (CLEANUP_EXPENSIVE
);
3760 reg_scan (get_insns (), max_reg_num ());
3765 struct tree_opt_pass pass_see
=
3768 gate_handle_see
, /* gate */
3769 rest_of_handle_see
, /* execute */
3772 0, /* static_pass_number */
3774 0, /* properties_required */
3775 0, /* properties_provided */
3776 0, /* properties_destroyed */
3777 0, /* todo_flags_start */
3778 TODO_dump_func
, /* todo_flags_finish */