* team.c (gomp_team_end): Free team immediately if it has
[official-gcc.git] / gcc / see.c
blob3dfaa41245f8c0eee3ab59d81a235a9045e02666
1 /* Sign extension elimination optimization for GNU compiler.
2 Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Leehod Baruch <leehod@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 -Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>.
21 Problem description:
22 --------------------
23 In order to support 32bit computations on a 64bit machine, sign
24 extension instructions are generated to ensure the correctness of
25 the computation.
26 A possible policy (as currently implemented) is to generate a sign
27 extension right after each 32bit computation.
28 Depending on the instruction set of the architecture, some of these
29 sign extension instructions may be redundant.
30 There are two cases in which the extension may be redundant:
32 Case1:
33 The instruction that uses the 64bit operands that are sign
34 extended has a dual mode that works with 32bit operands.
35 For example:
37 int32 a, b;
39 a = .... --> a = ....
40 a = sign extend a -->
41 b = .... --> b = ....
42 b = sign extend a -->
43 -->
44 cmpd a, b --> cmpw a, b //half word compare
46 Case2:
47 The instruction that defines the 64bit operand (which is later sign
48 extended) has a dual mode that defines and sign-extends simultaneously
49 a 32bit operand. For example:
51 int32 a;
53 ld a --> lwa a // load half word and sign extend
54 a = sign extend a -->
55 -->
56 return a --> return a
59 General idea for solution:
60 --------------------------
61 First, try to merge the sign extension with the instruction that
62 defines the source of the extension and (separately) with the
63 instructions that uses the extended result. By doing this, both cases
64 of redundancies (as described above) will be eliminated.
66 Then, use partial redundancy elimination to place the non redundant
67 ones at optimal placements.
70 Implementation by example:
71 --------------------------
72 Note: The instruction stream is not changed till the last phase.
74 Phase 0: Initial code, as currently generated by gcc.
76 def1 def3
77 se1 def2 se3
78 | \ | / |
79 | \ | / |
80 | \ | / |
81 | \ | / |
82 | \ | / |
83 | \|/ |
84 use1 use2 use3
85 use4
86 def1 + se1:
87 set ((reg:SI 10) (..def1rhs..))
88 set ((reg:DI 100) (sign_extend:DI (reg:SI 10)))
90 def2:
91 set ((reg:DI 100) (const_int 7))
93 def3 + se3:
94 set ((reg:SI 20) (..def3rhs..))
95 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
97 use1:
98 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
100 use2, use3, use4:
101 set ((...) (reg:DI 100))
103 Phase 1: Propagate extensions to uses.
105 def1 def3
106 se1 def2 se3
107 | \ | / |
108 | \ | / |
109 | \ | / |
110 | \ | / |
111 | \ | / |
112 | \|/ |
113 se se se
114 use1 use2 use3
116 use4
118 From here, all of the subregs are lowpart !
120 def1, def2, def3: No change.
122 use1:
123 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
124 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
126 use2, use3, use4:
127 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
128 set ((...) (reg:DI 100))
131 Phase 2: Merge and eliminate locally redundant extensions.
134 *def1 def2 *def3
135 [se removed] se se3
136 | \ | / |
137 | \ | / |
138 | \ | / |
139 | \ | / |
140 | \ | / |
141 | \|/ |
142 [se removed] se se
143 *use1 use2 use3
144 [se removed]
145 use4
147 The instructions that were changed at this phase are marked with
148 asterisk.
150 *def1: Merge failed.
151 Remove the sign extension instruction, modify def1 and
152 insert a move instruction to assure to correctness of the code.
153 set ((subreg:SI (reg:DI 100)) (..def1rhs..))
154 set ((reg:SI 10) (subreg:SI (reg:DI 100)))
156 def2 + se: There is no need for merge.
157 Def2 is not changed but a sign extension instruction is
158 created.
159 set ((reg:DI 100) (const_int 7))
160 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
162 *def3 + se3: Merge succeeded.
163 set ((reg:DI 100) (sign_extend:DI (..def3rhs..)))
164 set ((reg:SI 20) (reg:DI 100))
165 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
166 (The extension instruction is the original one).
168 *use1: Merge succeeded. Remove the sign extension instruction.
169 set ((reg:CC...)
170 (compare:CC (subreg:SI (reg:DI 100)) (...)))
172 use2, use3: Merge failed. No change.
174 use4: The extension is locally redundant, therefore it is eliminated
175 at this point.
178 Phase 3: Eliminate globally redundant extensions.
180 Following the LCM output:
182 def1 def2 def3
183 se se3
184 | \ | / |
185 | \ | / |
186 | se | / |
187 | \ | / |
188 | \ | / |
189 | \|/ |
190 [ses removed]
191 use1 use2 use3
192 use4
195 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
197 se3:
198 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
201 Phase 4: Commit changes to the insn stream.
204 def1 def3 *def1 def2 *def3
205 se1 def2 se3 [se removed] [se removed]
206 | \ | / | | \ | / |
207 | \ | / | ------> | \ | / |
208 | \ | / | ------> | se | / |
209 | \ | / | | \ | / |
210 | \ | / | | \ | / |
211 | \|/ | | \|/ |
212 use1 use2 use3 *use1 use2 use3
213 use4 use4
215 The instructions that were changed during the whole optimization are
216 marked with asterisk.
218 The result:
220 def1 + se1:
221 [ set ((reg:SI 10) (..def1rhs..)) ] - Deleted
222 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 10))) ] - Deleted
223 set ((subreg:SI (reg:DI 100)) (..def3rhs..)) - Inserted
224 set ((reg:SI 10) (subreg:SI (reg:DI 100))) - Inserted
226 def2:
227 set ((reg:DI 100) (const_int 7)) - No change
229 def3 + se3:
230 [ set ((reg:SI 20) (..def3rhs..)) ] - Deleted
231 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) ] - Deleted
232 set ((reg:DI 100) (sign_extend:DI (..def3rhs..))) - Inserted
233 set ((reg:SI 20) (reg:DI 100)) - Inserted
235 use1:
236 [ set ((reg:CC...) (compare:CC (reg:DI 100) (...))) ] - Deleted
237 set ((reg:CC...) - Inserted
238 (compare:CC (subreg:SI (reg:DI 100)) (...)))
240 use2, use3, use4:
241 set ((...) (reg:DI 100)) - No change
243 se: - Inserted
244 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
246 Note: Most of the simple move instructions that were inserted will be
247 trivially dead and therefore eliminated.
249 The implementation outline:
250 ---------------------------
251 Some definitions:
252 A web is RELEVANT if at the end of phase 1, his leader's
253 relevancy is {ZERO, SIGN}_EXTENDED_DEF. The source_mode of
254 the web is the source_mode of his leader.
255 A definition is a candidate for the optimization if it is part
256 of a RELEVANT web and his local source_mode is not narrower
257 then the source_mode of its web.
258 A use is a candidate for the optimization if it is part of a
259 RELEVANT web.
260 A simple explicit extension is a single set instruction that
261 extends a register (or a subregister) to a register (or
262 subregister).
263 A complex explicit extension is an explicit extension instruction
264 that is not simple.
265 A def extension is a simple explicit extension that is
266 also a candidate for the optimization. This extension is part
267 of the instruction stream, it is not generated by this
268 optimization.
269 A use extension is a simple explicit extension that is generated
270 and stored for candidate use during this optimization. It is
271 not emitted to the instruction stream till the last phase of
272 the optimization.
273 A reference is an instruction that satisfy at least on of these
274 criteria:
275 - It contains a definition with EXTENDED_DEF relevancy in a RELEVANT web.
276 - It is followed by a def extension.
277 - It contains a candidate use.
279 Phase 1: Propagate extensions to uses.
280 In this phase, we find candidate extensions for the optimization
281 and we generate (but not emit) proper extensions "right before the
282 uses".
284 a. Build a DF object.
285 b. Traverse over all the instructions that contains a definition
286 and set their local relevancy and local source_mode like this:
287 - If the instruction is a simple explicit extension instruction,
288 mark it as {ZERO, SIGN}_EXTENDED_DEF according to the extension
289 type and mark its source_mode to be the mode of the quantity
290 that is been extended.
291 - Otherwise, If the instruction has an implicit extension,
292 which means that its high part is an extension of its low part,
293 or if it is a complicated explicit extension, mark it as
294 EXTENDED_DEF and set its source_mode to be the narrowest
295 mode that is been extended in the instruction.
296 c. Traverse over all the instructions that contains a use and set
297 their local relevancy to RELEVANT_USE (except for few corner
298 cases).
299 d. Produce the web. During union of two entries, update the
300 relevancy and source_mode of the leader. There are two major
301 guide lines for this update:
302 - If one of the entries is NOT_RELEVANT, mark the leader
303 NOT_RELEVANT.
304 - If one is ZERO_EXTENDED_DEF and the other is SIGN_EXTENDED_DEF
305 (or vice versa) mark the leader as NOT_RELEVANT. We don't
306 handle this kind of mixed webs.
307 (For more details about this update process,
308 see see_update_leader_extra_info ()).
309 e. Generate uses extensions according to the relevancy and
310 source_mode of the webs.
312 Phase 2: Merge and eliminate locally redundant extensions.
313 In this phase, we try to merge def extensions and use
314 extensions with their references, and eliminate redundant extensions
315 in the same basic block.
317 Traverse over all the references. Do this in basic block number and
318 luid number forward order.
319 For each reference do:
320 a. Peephole optimization - try to merge it with all its
321 def extensions and use extensions in the following
322 order:
323 - Try to merge only the def extensions, one by one.
324 - Try to merge only the use extensions, one by one.
325 - Try to merge any couple of use extensions simultaneously.
326 - Try to merge any def extension with one or two uses
327 extensions simultaneously.
328 b. Handle each EXTENDED_DEF in it as if it was already merged with
329 an extension.
331 During the merge process we save the following data for each
332 register in each basic block:
333 a. The first instruction that defines the register in the basic
334 block.
335 b. The last instruction that defines the register in the basic
336 block.
337 c. The first extension of this register before the first
338 instruction that defines it in the basic block.
339 c. The first extension of this register after the last
340 instruction that defines it in the basic block.
341 This data will help us eliminate (or more precisely, not generate)
342 locally redundant extensions, and will be useful in the next stage.
344 While merging extensions with their reference there are 4 possible
345 situations:
346 a. A use extension was merged with the reference:
347 Delete the extension instruction and save the merged reference
348 for phase 4. (For details, see see_use_extension_merged ())
349 b. A use extension failed to be merged with the reference:
350 If there is already such an extension in the same basic block
351 and it is not dead at this point, delete the unmerged extension
352 (it is locally redundant), otherwise properly update the above
353 basic block data.
354 (For details, see see_merge_one_use_extension ())
355 c. A def extension was merged with the reference:
356 Mark this extension as a merged_def extension and properly
357 update the above basic block data.
358 (For details, see see_merge_one_def_extension ())
359 d. A def extension failed to be merged with the reference:
360 Replace the definition of the NARROWmode register in the
361 reference with the proper subreg of WIDEmode register and save
362 the result as a merged reference. Also, properly update the
363 the above basic block data.
364 (For details, see see_def_extension_not_merged ())
366 Phase 3: Eliminate globally redundant extensions.
367 In this phase, we set the bit vectors input of the edge based LCM
368 using the recorded data on the registers in each basic block.
369 We also save pointers for all the anticipatable and available
370 occurrences of the relevant extensions. Then we run the LCM.
372 a. Initialize the comp, antloc, kill bit vectors to zero and the
373 transp bit vector to ones.
375 b. Traverse over all the references. Do this in basic block number
376 and luid number forward order. For each reference:
377 - Go over all its use extensions. For each such extension -
378 If it is not dead from the beginning of the basic block SET
379 the antloc bit of the current extension in the current
380 basic block bits.
381 If it is not dead till the end of the basic block SET the
382 comp bit of the current extension in the current basic
383 block bits.
384 - Go over all its def extensions that were merged with
385 it. For each such extension -
386 If it is not dead till the end of the basic block SET the
387 comp bit of the current extension in the current basic
388 block bits.
389 RESET the proper transp and kill bits.
390 - Go over all its def extensions that were not merged
391 with it. For each such extension -
392 RESET the transp bit and SET the kill bit of the current
393 extension in the current basic block bits.
395 c. Run the edge based LCM.
397 Phase 4: Commit changes to the insn stream.
398 This is the only phase that actually changes the instruction stream.
399 Up to this point the optimization could be aborted at any time.
400 Here we insert extensions at their best placements and delete the
401 redundant ones according to the output of the LCM. We also replace
402 some of the instructions according to the second phase merges results.
404 a. Use the pre_delete_map (from the output of the LCM) in order to
405 delete redundant extensions. This will prevent them from been
406 emitted in the first place.
408 b. Insert extensions on edges where needed according to
409 pre_insert_map and edge_list (from the output of the LCM).
411 c. For each reference do-
412 - Emit all the uses extensions that were not deleted until now,
413 right before the reference.
414 - Delete all the merged and unmerged def extensions from
415 the instruction stream.
416 - Replace the reference with the merged one, if exist.
418 The implementation consists of four data structures:
419 - Data structure I
420 Purpose: To handle the relevancy of the uses, definitions and webs.
421 Relevant structures: web_entry (from df.h), see_entry_extra_info.
422 Details: This is a disjoint-set data structure. Most of its functions are
423 implemented in web.c. Each definition and use in the code are
424 elements. A web_entry structure is allocated for each element to
425 hold the element's relevancy and source_mode. The union rules are
426 defined in see_update_leader_extra_info ().
427 - Data structure II
428 Purpose: To store references and their extensions (uses and defs)
429 and to enable traverse over these references according to basic
430 block order.
431 Relevant structure: see_ref_s.
432 Details: This data structure consists of an array of splay trees. One splay
433 tree for each basic block. The splay tree nodes are references and
434 the keys are the luids of the references.
435 A see_ref_s structure is allocated for each reference. It holds the
436 reference itself, its def and uses extensions and later the merged
437 version of the reference.
438 Using this data structure we can traverse over all the references of
439 a basic block and their extensions in forward order.
440 - Data structure III.
441 Purpose: To store local properties of registers for each basic block.
442 This data will later help us build the LCM sbitmap_vectors
443 input.
444 Relevant structure: see_register_properties.
445 Details: This data structure consists of an array of hash tables. One hash
446 for each basic block. The hash node are a register properties
447 and the keys are the numbers of the registers.
448 A see_register_properties structure is allocated for each register
449 that we might be interested in its properties.
450 Using this data structure we can easily find the properties of a
451 register in a specific basic block. This is necessary for locally
452 redundancy elimination and for setting up the LCM input.
453 - Data structure IV.
454 Purpose: To store the extensions that are candidate for PRE and their
455 anticipatable and available occurrences.
456 Relevant structure: see_occr, see_pre_extension_expr.
457 Details: This data structure is a hash tables. Its nodes are the extensions
458 that are candidate for PRE.
459 A see_pre_extension_expr structure is allocated for each candidate
460 extension. It holds a copy of the extension and a linked list of all
461 the anticipatable and available occurrences of it.
462 We use this data structure when we read the output of the LCM. */
464 #include "config.h"
465 #include "system.h"
466 #include "coretypes.h"
467 #include "tm.h"
469 #include "obstack.h"
470 #include "rtl.h"
471 #include "output.h"
472 #include "df.h"
473 #include "insn-config.h"
474 #include "recog.h"
475 #include "expr.h"
476 #include "splay-tree.h"
477 #include "hashtab.h"
478 #include "regs.h"
479 #include "timevar.h"
480 #include "tree-pass.h"
481 #include "dce.h"
483 /* Used to classify defs and uses according to relevancy. */
484 enum entry_type {
485 NOT_RELEVANT,
486 SIGN_EXTENDED_DEF,
487 ZERO_EXTENDED_DEF,
488 EXTENDED_DEF,
489 RELEVANT_USE
492 /* Used to classify extensions in relevant webs. */
493 enum extension_type {
494 DEF_EXTENSION,
495 EXPLICIT_DEF_EXTENSION,
496 IMPLICIT_DEF_EXTENSION,
497 USE_EXTENSION
500 /* Global data structures and flags. */
502 /* This structure will be assigned for each web_entry structure (defined
503 in df.h). It is placed in the extra_info field of a web_entry and holds the
504 relevancy and source mode of the web_entry. */
506 struct see_entry_extra_info
508 /* The relevancy of the ref. */
509 enum entry_type relevancy;
510 /* The relevancy of the ref.
511 This field is updated only once - when this structure is created. */
512 enum entry_type local_relevancy;
513 /* The source register mode. */
514 enum machine_mode source_mode;
515 /* This field is used only if the relevancy is ZERO/SIGN_EXTENDED_DEF.
516 It is updated only once when this structure is created. */
517 enum machine_mode local_source_mode;
518 /* This field is used only if the relevancy is EXTENDED_DEF.
519 It holds the narrowest mode that is sign extended. */
520 enum machine_mode source_mode_signed;
521 /* This field is used only if the relevancy is EXTENDED_DEF.
522 It holds the narrowest mode that is zero extended. */
523 enum machine_mode source_mode_unsigned;
526 /* There is one such structure for every reference. It stores the reference
527 itself as well as its extensions (uses and definitions).
528 Used as the value in splay_tree see_bb_splay_ar[]. */
529 struct see_ref_s
531 /* The luid of the insn. */
532 unsigned int luid;
533 /* The insn of the ref. */
534 rtx insn;
535 /* The merged insn that was formed from the reference's insn and extensions.
536 If all merges failed, it remains NULL. */
537 rtx merged_insn;
538 /* The def extensions of the reference that were not merged with
539 it. */
540 htab_t unmerged_def_se_hash;
541 /* The def extensions of the reference that were merged with
542 it. Implicit extensions of the reference will be stored here too. */
543 htab_t merged_def_se_hash;
544 /* The uses extensions of reference. */
545 htab_t use_se_hash;
548 /* There is one such structure for every relevant extended register in a
549 specific basic block. This data will help us build the LCM sbitmap_vectors
550 input. */
551 struct see_register_properties
553 /* The register number. */
554 unsigned int regno;
555 /* The last luid of the reference that defines this register in this basic
556 block. */
557 int last_def;
558 /* The luid of the reference that has the first extension of this register
559 that appears before any definition in this basic block. */
560 int first_se_before_any_def;
561 /* The luid of the reference that has the first extension of this register
562 that appears after the last definition in this basic block. */
563 int first_se_after_last_def;
566 /* Occurrence of an expression.
567 There must be at most one available occurrence and at most one anticipatable
568 occurrence per basic block. */
569 struct see_occr
571 /* Next occurrence of this expression. */
572 struct see_occr *next;
573 /* The insn that computes the expression. */
574 rtx insn;
575 int block_num;
578 /* There is one such structure for every relevant extension expression.
579 It holds a copy of this extension instruction as well as a linked lists of
580 pointers to all the antic and avail occurrences of it. */
581 struct see_pre_extension_expr
583 /* A copy of the extension instruction. */
584 rtx se_insn;
585 /* Index in the available expression bitmaps. */
586 int bitmap_index;
587 /* List of anticipatable occurrences in basic blocks in the function.
588 An "anticipatable occurrence" is the first occurrence in the basic block,
589 the operands are not modified in the basic block prior to the occurrence
590 and the output is not used between the start of the block and the
591 occurrence. */
592 struct see_occr *antic_occr;
593 /* List of available occurrence in basic blocks in the function.
594 An "available occurrence" is the last occurrence in the basic block and
595 the operands are not modified by following statements in the basic block
596 [including this insn]. */
597 struct see_occr *avail_occr;
600 /* Helper structure for the note_uses and see_replace_src functions. */
601 struct see_replace_data
603 rtx from;
604 rtx to;
607 /* Helper structure for the note_uses and see_mentioned_reg functions. */
608 struct see_mentioned_reg_data
610 rtx reg;
611 bool mentioned;
614 /* An array of web_entries. The i'th definition in the df object is associated
615 with def_entry[i] */
616 static struct web_entry *def_entry = NULL;
617 /* An array of web_entries. The i'th use in the df object is associated with
618 use_entry[i] */
619 static struct web_entry *use_entry = NULL;
620 /* Array of splay_trees.
621 see_bb_splay_ar[i] refers to the splay tree of the i'th basic block.
622 The splay tree will hold see_ref_s structures. The key is the luid
623 of the insn. This way we can traverse over the references of each basic
624 block in forward or backward order. */
625 static splay_tree *see_bb_splay_ar = NULL;
626 /* Array of hashes.
627 see_bb_hash_ar[i] refers to the hash of the i'th basic block.
628 The hash will hold see_register_properties structure. The key is regno. */
629 static htab_t *see_bb_hash_ar = NULL;
630 /* Hash table that holds a copy of all the extensions. The key is the right
631 hand side of the se_insn field. */
632 static htab_t see_pre_extension_hash = NULL;
634 /* Local LCM properties of expressions. */
635 /* Nonzero for expressions that are transparent in the block. */
636 static sbitmap *transp = NULL;
637 /* Nonzero for expressions that are computed (available) in the block. */
638 static sbitmap *comp = NULL;
639 /* Nonzero for expressions that are locally anticipatable in the block. */
640 static sbitmap *antloc = NULL;
641 /* Nonzero for expressions that are locally killed in the block. */
642 static sbitmap *ae_kill = NULL;
643 /* Nonzero for expressions which should be inserted on a specific edge. */
644 static sbitmap *pre_insert_map = NULL;
645 /* Nonzero for expressions which should be deleted in a specific block. */
646 static sbitmap *pre_delete_map = NULL;
647 /* Contains the edge_list returned by pre_edge_lcm. */
648 static struct edge_list *edge_list = NULL;
649 /* Records the last basic block at the beginning of the optimization. */
650 static int last_bb;
651 /* Records the number of uses at the beginning of the optimization. */
652 static unsigned int uses_num;
653 /* Records the number of definitions at the beginning of the optimization. */
654 static unsigned int defs_num;
656 #define ENTRY_EI(ENTRY) ((struct see_entry_extra_info *) (ENTRY)->extra_info)
658 /* Functions implementation. */
660 /* Verifies that EXTENSION's pattern is this:
662 set (reg/subreg reg1) (sign/zero_extend:WIDEmode (reg/subreg reg2))
664 If it doesn't have the expected pattern return NULL.
665 Otherwise, if RETURN_DEST_REG is set, return reg1 else return reg2. */
667 static rtx
668 see_get_extension_reg (rtx extension, bool return_dest_reg)
670 rtx set, rhs, lhs;
671 rtx reg1 = NULL;
672 rtx reg2 = NULL;
674 /* Parallel pattern for extension not supported for the moment. */
675 if (GET_CODE (PATTERN (extension)) == PARALLEL)
676 return NULL;
678 set = single_set (extension);
679 if (!set)
680 return NULL;
681 lhs = SET_DEST (set);
682 rhs = SET_SRC (set);
684 if (REG_P (lhs))
685 reg1 = lhs;
686 else if (REG_P (SUBREG_REG (lhs)))
687 reg1 = SUBREG_REG (lhs);
688 else
689 return NULL;
691 if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND)
692 return NULL;
694 rhs = XEXP (rhs, 0);
695 if (REG_P (rhs))
696 reg2 = rhs;
697 else if (REG_P (SUBREG_REG (rhs)))
698 reg2 = SUBREG_REG (rhs);
699 else
700 return NULL;
702 if (return_dest_reg)
703 return reg1;
704 return reg2;
707 /* Verifies that EXTENSION's pattern is this:
709 set (reg/subreg reg1) (sign/zero_extend: (...expr...)
711 If it doesn't have the expected pattern return UNKNOWN.
712 Otherwise, set SOURCE_MODE to be the mode of the extended expr and return
713 the rtx code of the extension. */
715 static enum rtx_code
716 see_get_extension_data (rtx extension, enum machine_mode *source_mode)
718 rtx rhs, lhs, set;
720 if (!extension || !INSN_P (extension))
721 return UNKNOWN;
723 /* Parallel pattern for extension not supported for the moment. */
724 if (GET_CODE (PATTERN (extension)) == PARALLEL)
725 return NOT_RELEVANT;
727 set = single_set (extension);
728 if (!set)
729 return NOT_RELEVANT;
730 rhs = SET_SRC (set);
731 lhs = SET_DEST (set);
733 /* Don't handle extensions to something other then register or
734 subregister. */
735 if (!REG_P (lhs) && GET_CODE (lhs) != SUBREG)
736 return UNKNOWN;
738 if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND)
739 return UNKNOWN;
741 if (!REG_P (XEXP (rhs, 0))
742 && !(GET_CODE (XEXP (rhs, 0)) == SUBREG
743 && REG_P (SUBREG_REG (XEXP (rhs, 0)))))
744 return UNKNOWN;
746 *source_mode = GET_MODE (XEXP (rhs, 0));
748 if (GET_CODE (rhs) == SIGN_EXTEND)
749 return SIGN_EXTEND;
750 return ZERO_EXTEND;
754 /* Generate instruction with the pattern:
755 set ((reg r) (sign/zero_extend (subreg:mode (reg r))))
756 (the register r on both sides of the set is the same register).
757 And recognize it.
758 If the recognition failed, this is very bad, return NULL (This will abort
759 the entire optimization).
760 Otherwise, return the generated instruction. */
762 static rtx
763 see_gen_normalized_extension (rtx reg, enum rtx_code extension_code,
764 enum machine_mode mode)
766 rtx subreg, insn;
767 rtx extension = NULL;
769 if (!reg
770 || !REG_P (reg)
771 || (extension_code != SIGN_EXTEND && extension_code != ZERO_EXTEND))
772 return NULL;
774 subreg = gen_lowpart_SUBREG (mode, reg);
775 if (extension_code == SIGN_EXTEND)
776 extension = gen_rtx_SIGN_EXTEND (GET_MODE (reg), subreg);
777 else
778 extension = gen_rtx_ZERO_EXTEND (GET_MODE (reg), subreg);
780 start_sequence ();
781 emit_insn (gen_rtx_SET (VOIDmode, reg, extension));
782 insn = get_insns ();
783 end_sequence ();
785 if (insn_invalid_p (insn))
786 /* Recognition failed, this is very bad for this optimization.
787 Abort the optimization. */
788 return NULL;
789 return insn;
792 /* Hashes and splay_trees related functions implementation. */
794 /* Helper functions for the pre_extension hash.
795 This kind of hash will hold see_pre_extension_expr structures.
797 The key is the right hand side of the se_insn field.
798 Note that the se_insn is an expression that looks like:
800 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
801 (subreg:NARROWmode (reg:WIDEmode r2)))) */
803 /* Return TRUE if P1 has the same value in its rhs as P2.
804 Otherwise, return FALSE.
805 P1 and P2 are see_pre_extension_expr structures. */
807 static int
808 eq_descriptor_pre_extension (const void *p1, const void *p2)
810 const struct see_pre_extension_expr *const extension1 =
811 (const struct see_pre_extension_expr *) p1;
812 const struct see_pre_extension_expr *const extension2 =
813 (const struct see_pre_extension_expr *) p2;
814 rtx set1 = single_set (extension1->se_insn);
815 rtx set2 = single_set (extension2->se_insn);
816 rtx rhs1, rhs2;
818 gcc_assert (set1 && set2);
819 rhs1 = SET_SRC (set1);
820 rhs2 = SET_SRC (set2);
822 return rtx_equal_p (rhs1, rhs2);
826 /* P is a see_pre_extension_expr struct, use the RHS of the se_insn field.
827 Note that the RHS is an expression that looks like this:
828 (sign_extend:WIDEmode (subreg:NARROWmode (reg:WIDEmode r))) */
830 static hashval_t
831 hash_descriptor_pre_extension (const void *p)
833 const struct see_pre_extension_expr *const extension =
834 (const struct see_pre_extension_expr *) p;
835 rtx set = single_set (extension->se_insn);
836 rtx rhs;
838 gcc_assert (set);
839 rhs = SET_SRC (set);
841 return hash_rtx (rhs, GET_MODE (rhs), 0, NULL, 0);
845 /* Free the allocated memory of the current see_pre_extension_expr struct.
847 It frees the two linked list of the occurrences structures. */
849 static void
850 hash_del_pre_extension (void *p)
852 struct see_pre_extension_expr *const extension =
853 (struct see_pre_extension_expr *) p;
854 struct see_occr *curr_occr = extension->antic_occr;
855 struct see_occr *next_occr = NULL;
857 /* Free the linked list of the anticipatable occurrences. */
858 while (curr_occr)
860 next_occr = curr_occr->next;
861 free (curr_occr);
862 curr_occr = next_occr;
865 /* Free the linked list of the available occurrences. */
866 curr_occr = extension->avail_occr;
867 while (curr_occr)
869 next_occr = curr_occr->next;
870 free (curr_occr);
871 curr_occr = next_occr;
874 /* Free the see_pre_extension_expr structure itself. */
875 free (extension);
879 /* Helper functions for the register_properties hash.
880 This kind of hash will hold see_register_properties structures.
882 The value of the key is the regno field of the structure. */
884 /* Return TRUE if P1 has the same value in the regno field as P2.
885 Otherwise, return FALSE.
886 Where P1 and P2 are see_register_properties structures. */
888 static int
889 eq_descriptor_properties (const void *p1, const void *p2)
891 const struct see_register_properties *const curr_prop1 =
892 (const struct see_register_properties *) p1;
893 const struct see_register_properties *const curr_prop2 =
894 (const struct see_register_properties *) p2;
896 return curr_prop1->regno == curr_prop2->regno;
900 /* P is a see_register_properties struct, use the register number in the
901 regno field. */
903 static hashval_t
904 hash_descriptor_properties (const void *p)
906 const struct see_register_properties *const curr_prop =
907 (const struct see_register_properties *) p;
908 return curr_prop->regno;
912 /* Free the allocated memory of the current see_register_properties struct. */
913 static void
914 hash_del_properties (void *p)
916 struct see_register_properties *const curr_prop =
917 (struct see_register_properties *) p;
918 free (curr_prop);
922 /* Helper functions for an extension hash.
923 This kind of hash will hold insns that look like:
925 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
926 (subreg:NARROWmode (reg:WIDEmode r2))))
928 set ((reg:WIDEmode r1) (sign_extend:WIDEmode (reg:NARROWmode r2)))
930 The value of the key is (REGNO (reg:WIDEmode r1))
931 It is possible to search this hash in two ways:
932 1. By a register rtx. The Value that is been compared to the keys is the
933 REGNO of it.
934 2. By an insn with the above pattern. The Value that is been compared to
935 the keys is the REGNO of the reg on the lhs. */
937 /* Return TRUE if P1 has the same value as P2. Otherwise, return FALSE.
938 Where P1 is an insn and P2 is an insn or a register. */
940 static int
941 eq_descriptor_extension (const void *p1, const void *p2)
943 const_rtx const insn = (const_rtx) p1;
944 const_rtx const element = (const_rtx) p2;
945 rtx set1 = single_set (insn);
946 rtx dest_reg1;
947 rtx set2 = NULL;
948 const_rtx dest_reg2 = NULL;
950 gcc_assert (set1 && element && (REG_P (element) || INSN_P (element)));
952 dest_reg1 = SET_DEST (set1);
954 if (INSN_P (element))
956 set2 = single_set (element);
957 dest_reg2 = SET_DEST (set2);
959 else
960 dest_reg2 = element;
962 return REGNO (dest_reg1) == REGNO (dest_reg2);
966 /* If P is an insn, use the register number of its lhs
967 otherwise, P is a register, use its number. */
969 static hashval_t
970 hash_descriptor_extension (const void *p)
972 const_rtx const r = (const_rtx) p;
973 rtx set, lhs;
975 if (r && REG_P (r))
976 return REGNO (r);
978 gcc_assert (r && INSN_P (r));
979 set = single_set (r);
980 gcc_assert (set);
981 lhs = SET_DEST (set);
982 return REGNO (lhs);
986 /* Helper function for a see_bb_splay_ar[i] splay tree.
987 It frees all the allocated memory of a struct see_ref_s pointer.
989 VALUE is the value of a splay tree node. */
991 static void
992 see_free_ref_s (splay_tree_value value)
994 struct see_ref_s *ref_s = (struct see_ref_s *)value;
996 if (ref_s->unmerged_def_se_hash)
997 htab_delete (ref_s->unmerged_def_se_hash);
998 if (ref_s->merged_def_se_hash)
999 htab_delete (ref_s->merged_def_se_hash);
1000 if (ref_s->use_se_hash)
1001 htab_delete (ref_s->use_se_hash);
1002 free (ref_s);
1006 /* Rest of the implementation. */
1008 /* Search the extension hash for a suitable entry for EXTENSION.
1009 TYPE is the type of EXTENSION (USE_EXTENSION or DEF_EXTENSION).
1011 If TYPE is DEF_EXTENSION we need to normalize EXTENSION before searching the
1012 extension hash.
1014 If a suitable entry was found, return the slot. Otherwise, store EXTENSION
1015 in the hash and return NULL. */
1017 static struct see_pre_extension_expr *
1018 see_seek_pre_extension_expr (rtx extension, enum extension_type type)
1020 struct see_pre_extension_expr **slot_pre_exp, temp_pre_exp;
1021 rtx dest_extension_reg = see_get_extension_reg (extension, 1);
1022 enum rtx_code extension_code;
1023 enum machine_mode source_extension_mode;
1025 if (type == DEF_EXTENSION)
1027 extension_code = see_get_extension_data (extension,
1028 &source_extension_mode);
1029 gcc_assert (extension_code != UNKNOWN);
1030 extension =
1031 see_gen_normalized_extension (dest_extension_reg, extension_code,
1032 source_extension_mode);
1034 temp_pre_exp.se_insn = extension;
1035 slot_pre_exp =
1036 (struct see_pre_extension_expr **) htab_find_slot (see_pre_extension_hash,
1037 &temp_pre_exp, INSERT);
1038 if (*slot_pre_exp == NULL)
1039 /* This is the first time this extension instruction is encountered. Store
1040 it in the hash. */
1042 (*slot_pre_exp) = XNEW (struct see_pre_extension_expr);
1043 (*slot_pre_exp)->se_insn = extension;
1044 (*slot_pre_exp)->bitmap_index =
1045 (htab_elements (see_pre_extension_hash) - 1);
1046 (*slot_pre_exp)->antic_occr = NULL;
1047 (*slot_pre_exp)->avail_occr = NULL;
1048 return NULL;
1050 return *slot_pre_exp;
1054 /* This function defines how to update the extra_info of the web_entry.
1056 FIRST is the pointer of the extra_info of the first web_entry.
1057 SECOND is the pointer of the extra_info of the second web_entry.
1058 The first web_entry will be the predecessor (leader) of the second web_entry
1059 after the union.
1061 Return true if FIRST and SECOND points to the same web entry structure and
1062 nothing is done. Otherwise, return false. */
1064 static bool
1065 see_update_leader_extra_info (struct web_entry *first, struct web_entry *second)
1067 struct see_entry_extra_info *first_ei, *second_ei;
1069 first = unionfind_root (first);
1070 second = unionfind_root (second);
1072 if (unionfind_union (first, second))
1073 return true;
1075 first_ei = (struct see_entry_extra_info *) first->extra_info;
1076 second_ei = (struct see_entry_extra_info *) second->extra_info;
1078 gcc_assert (first_ei && second_ei);
1080 if (second_ei->relevancy == NOT_RELEVANT)
1082 first_ei->relevancy = NOT_RELEVANT;
1083 return false;
1085 switch (first_ei->relevancy)
1087 case NOT_RELEVANT:
1088 break;
1089 case RELEVANT_USE:
1090 switch (second_ei->relevancy)
1092 case RELEVANT_USE:
1093 break;
1094 case EXTENDED_DEF:
1095 first_ei->relevancy = second_ei->relevancy;
1096 first_ei->source_mode_signed = second_ei->source_mode_signed;
1097 first_ei->source_mode_unsigned = second_ei->source_mode_unsigned;
1098 break;
1099 case SIGN_EXTENDED_DEF:
1100 case ZERO_EXTENDED_DEF:
1101 first_ei->relevancy = second_ei->relevancy;
1102 first_ei->source_mode = second_ei->source_mode;
1103 break;
1104 default:
1105 gcc_unreachable ();
1107 break;
1108 case SIGN_EXTENDED_DEF:
1109 switch (second_ei->relevancy)
1111 case SIGN_EXTENDED_DEF:
1112 /* The mode of the root should be the wider one in this case. */
1113 first_ei->source_mode =
1114 (first_ei->source_mode > second_ei->source_mode) ?
1115 first_ei->source_mode : second_ei->source_mode;
1116 break;
1117 case RELEVANT_USE:
1118 break;
1119 case ZERO_EXTENDED_DEF:
1120 /* Don't mix webs with zero extend and sign extend. */
1121 first_ei->relevancy = NOT_RELEVANT;
1122 break;
1123 case EXTENDED_DEF:
1124 if (second_ei->source_mode_signed == MAX_MACHINE_MODE)
1125 first_ei->relevancy = NOT_RELEVANT;
1126 else
1127 /* The mode of the root should be the wider one in this case. */
1128 first_ei->source_mode =
1129 (first_ei->source_mode > second_ei->source_mode_signed) ?
1130 first_ei->source_mode : second_ei->source_mode_signed;
1131 break;
1132 default:
1133 gcc_unreachable ();
1135 break;
1136 /* This case is similar to the previous one, with little changes. */
1137 case ZERO_EXTENDED_DEF:
1138 switch (second_ei->relevancy)
1140 case SIGN_EXTENDED_DEF:
1141 /* Don't mix webs with zero extend and sign extend. */
1142 first_ei->relevancy = NOT_RELEVANT;
1143 break;
1144 case RELEVANT_USE:
1145 break;
1146 case ZERO_EXTENDED_DEF:
1147 /* The mode of the root should be the wider one in this case. */
1148 first_ei->source_mode =
1149 (first_ei->source_mode > second_ei->source_mode) ?
1150 first_ei->source_mode : second_ei->source_mode;
1151 break;
1152 case EXTENDED_DEF:
1153 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE)
1154 first_ei->relevancy = NOT_RELEVANT;
1155 else
1156 /* The mode of the root should be the wider one in this case. */
1157 first_ei->source_mode =
1158 (first_ei->source_mode > second_ei->source_mode_unsigned) ?
1159 first_ei->source_mode : second_ei->source_mode_unsigned;
1160 break;
1161 default:
1162 gcc_unreachable ();
1164 break;
1165 case EXTENDED_DEF:
1166 if (first_ei->source_mode_signed != MAX_MACHINE_MODE
1167 && first_ei->source_mode_unsigned != MAX_MACHINE_MODE)
1169 switch (second_ei->relevancy)
1171 case SIGN_EXTENDED_DEF:
1172 first_ei->relevancy = SIGN_EXTENDED_DEF;
1173 first_ei->source_mode =
1174 (first_ei->source_mode_signed > second_ei->source_mode) ?
1175 first_ei->source_mode_signed : second_ei->source_mode;
1176 break;
1177 case RELEVANT_USE:
1178 break;
1179 case ZERO_EXTENDED_DEF:
1180 first_ei->relevancy = ZERO_EXTENDED_DEF;
1181 first_ei->source_mode =
1182 (first_ei->source_mode_unsigned > second_ei->source_mode) ?
1183 first_ei->source_mode_unsigned : second_ei->source_mode;
1184 break;
1185 case EXTENDED_DEF:
1186 if (second_ei->source_mode_unsigned != MAX_MACHINE_MODE)
1187 first_ei->source_mode_unsigned =
1188 (first_ei->source_mode_unsigned >
1189 second_ei->source_mode_unsigned) ?
1190 first_ei->source_mode_unsigned :
1191 second_ei->source_mode_unsigned;
1192 if (second_ei->source_mode_signed != MAX_MACHINE_MODE)
1193 first_ei->source_mode_signed =
1194 (first_ei->source_mode_signed >
1195 second_ei->source_mode_signed) ?
1196 first_ei->source_mode_signed : second_ei->source_mode_signed;
1197 break;
1198 default:
1199 gcc_unreachable ();
1202 else if (first_ei->source_mode_signed == MAX_MACHINE_MODE)
1204 gcc_assert (first_ei->source_mode_unsigned != MAX_MACHINE_MODE);
1205 switch (second_ei->relevancy)
1207 case SIGN_EXTENDED_DEF:
1208 first_ei->relevancy = NOT_RELEVANT;
1209 break;
1210 case RELEVANT_USE:
1211 break;
1212 case ZERO_EXTENDED_DEF:
1213 first_ei->relevancy = ZERO_EXTENDED_DEF;
1214 first_ei->source_mode =
1215 (first_ei->source_mode_unsigned > second_ei->source_mode) ?
1216 first_ei->source_mode_unsigned : second_ei->source_mode;
1217 break;
1218 case EXTENDED_DEF:
1219 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE)
1220 first_ei->relevancy = NOT_RELEVANT;
1221 else
1222 first_ei->source_mode_unsigned =
1223 (first_ei->source_mode_unsigned >
1224 second_ei->source_mode_unsigned) ?
1225 first_ei->source_mode_unsigned :
1226 second_ei->source_mode_unsigned;
1227 break;
1228 default:
1229 gcc_unreachable ();
1232 else
1234 gcc_assert (first_ei->source_mode_unsigned == MAX_MACHINE_MODE);
1235 gcc_assert (first_ei->source_mode_signed != MAX_MACHINE_MODE);
1236 switch (second_ei->relevancy)
1238 case SIGN_EXTENDED_DEF:
1239 first_ei->relevancy = SIGN_EXTENDED_DEF;
1240 first_ei->source_mode =
1241 (first_ei->source_mode_signed > second_ei->source_mode) ?
1242 first_ei->source_mode_signed : second_ei->source_mode;
1243 break;
1244 case RELEVANT_USE:
1245 break;
1246 case ZERO_EXTENDED_DEF:
1247 first_ei->relevancy = NOT_RELEVANT;
1248 break;
1249 case EXTENDED_DEF:
1250 if (second_ei->source_mode_signed == MAX_MACHINE_MODE)
1251 first_ei->relevancy = NOT_RELEVANT;
1252 else
1253 first_ei->source_mode_signed =
1254 (first_ei->source_mode_signed >
1255 second_ei->source_mode_signed) ?
1256 first_ei->source_mode_signed : second_ei->source_mode_signed;
1257 break;
1258 default:
1259 gcc_unreachable ();
1262 break;
1263 default:
1264 /* Unknown pattern type. */
1265 gcc_unreachable ();
1268 return false;
1272 /* Free global data structures. */
1274 static void
1275 see_free_data_structures (void)
1277 int i;
1278 unsigned int j;
1280 /* Free the bitmap vectors. */
1281 if (transp)
1283 sbitmap_vector_free (transp);
1284 transp = NULL;
1285 sbitmap_vector_free (comp);
1286 comp = NULL;
1287 sbitmap_vector_free (antloc);
1288 antloc = NULL;
1289 sbitmap_vector_free (ae_kill);
1290 ae_kill = NULL;
1292 if (pre_insert_map)
1294 sbitmap_vector_free (pre_insert_map);
1295 pre_insert_map = NULL;
1297 if (pre_delete_map)
1299 sbitmap_vector_free (pre_delete_map);
1300 pre_delete_map = NULL;
1302 if (edge_list)
1304 free_edge_list (edge_list);
1305 edge_list = NULL;
1308 /* Free the extension hash. */
1309 htab_delete (see_pre_extension_hash);
1311 /* Free the array of hashes. */
1312 for (i = 0; i < last_bb; i++)
1313 if (see_bb_hash_ar[i])
1314 htab_delete (see_bb_hash_ar[i]);
1315 free (see_bb_hash_ar);
1317 /* Free the array of splay trees. */
1318 for (i = 0; i < last_bb; i++)
1319 if (see_bb_splay_ar[i])
1320 splay_tree_delete (see_bb_splay_ar[i]);
1321 free (see_bb_splay_ar);
1323 /* Free the array of web entries and their extra info field. */
1324 for (j = 0; j < defs_num; j++)
1325 free (def_entry[j].extra_info);
1326 free (def_entry);
1327 for (j = 0; j < uses_num; j++)
1328 free (use_entry[j].extra_info);
1329 free (use_entry);
1333 /* Initialize global data structures and variables. */
1335 static void
1336 see_initialize_data_structures (void)
1338 unsigned int max_reg = max_reg_num ();
1339 unsigned int i;
1341 /* Build the df object. */
1342 df_set_flags (DF_EQ_NOTES);
1343 df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
1344 df_analyze ();
1345 df_set_flags (DF_DEFER_INSN_RESCAN);
1347 if (dump_file)
1348 df_dump (dump_file);
1350 /* Record the last basic block at the beginning of the optimization. */
1351 last_bb = last_basic_block;
1353 /* Record the number of uses and defs at the beginning of the optimization. */
1354 uses_num = 0;
1355 defs_num = 0;
1356 for (i = 0; i < max_reg; i++)
1358 uses_num += DF_REG_USE_COUNT (i) + DF_REG_EQ_USE_COUNT (i);
1359 defs_num += DF_REG_DEF_COUNT (i);
1362 /* Allocate web entries array for the union-find data structure. */
1363 def_entry = XCNEWVEC (struct web_entry, defs_num);
1364 use_entry = XCNEWVEC (struct web_entry, uses_num);
1366 /* Allocate an array of splay trees.
1367 One splay tree for each basic block. */
1368 see_bb_splay_ar = XCNEWVEC (splay_tree, last_bb);
1370 /* Allocate an array of hashes.
1371 One hash for each basic block. */
1372 see_bb_hash_ar = XCNEWVEC (htab_t, last_bb);
1374 /* Allocate the extension hash. It will hold the extensions that we want
1375 to PRE. */
1376 see_pre_extension_hash = htab_create (10,
1377 hash_descriptor_pre_extension,
1378 eq_descriptor_pre_extension,
1379 hash_del_pre_extension);
1383 /* Function called by note_uses to check if a register is used in a
1384 subexpressions.
1386 X is a pointer to the subexpression and DATA is a pointer to a
1387 see_mentioned_reg_data structure that contains the register to look for and
1388 a place for the result. */
1390 static void
1391 see_mentioned_reg (rtx *x, void *data)
1393 struct see_mentioned_reg_data *d
1394 = (struct see_mentioned_reg_data *) data;
1396 if (reg_mentioned_p (d->reg, *x))
1397 d->mentioned = true;
1401 /* We don't want to merge a use extension with a reference if the extended
1402 register is used only in a simple move instruction. We also don't want to
1403 merge a def extension with a reference if the source register of the
1404 extension is defined only in a simple move in the reference.
1406 REF is the reference instruction.
1407 EXTENSION is the use extension or def extension instruction.
1408 TYPE is the type of the extension (use or def).
1410 Return true if the reference is complicated enough, so we would like to merge
1411 it with the extension. Otherwise, return false. */
1413 static bool
1414 see_want_to_be_merged_with_extension (rtx ref, rtx extension,
1415 enum extension_type type)
1417 rtx pat;
1418 rtx dest_extension_reg = see_get_extension_reg (extension, 1);
1419 rtx source_extension_reg = see_get_extension_reg (extension, 0);
1420 enum rtx_code code;
1421 struct see_mentioned_reg_data d;
1422 int i;
1424 pat = PATTERN (ref);
1425 code = GET_CODE (pat);
1427 if (code == PARALLEL)
1429 for (i = 0; i < XVECLEN (pat, 0); i++)
1431 rtx sub = XVECEXP (pat, 0, i);
1433 if (GET_CODE (sub) == SET
1434 && (REG_P (SET_DEST (sub))
1435 || (GET_CODE (SET_DEST (sub)) == SUBREG
1436 && REG_P (SUBREG_REG (SET_DEST (sub)))))
1437 && (REG_P (SET_SRC (sub))
1438 || (GET_CODE (SET_SRC (sub)) == SUBREG
1439 && REG_P (SUBREG_REG (SET_SRC (sub))))))
1441 /* This is a simple move SET. */
1442 if (type == DEF_EXTENSION
1443 && reg_mentioned_p (source_extension_reg, SET_DEST (sub)))
1444 return false;
1446 else
1448 /* This is not a simple move SET.
1449 Check if it uses the source of the extension. */
1450 if (type == USE_EXTENSION)
1452 d.reg = dest_extension_reg;
1453 d.mentioned = false;
1454 note_uses (&sub, see_mentioned_reg, &d);
1455 if (d.mentioned)
1456 return true;
1460 if (type == USE_EXTENSION)
1461 return false;
1463 else
1465 if (code == SET
1466 && (REG_P (SET_DEST (pat))
1467 || (GET_CODE (SET_DEST (pat)) == SUBREG
1468 && REG_P (SUBREG_REG (SET_DEST (pat)))))
1469 && (REG_P (SET_SRC (pat))
1470 || (GET_CODE (SET_SRC (pat)) == SUBREG
1471 && REG_P (SUBREG_REG (SET_SRC (pat))))))
1472 /* This is a simple move SET. */
1473 return false;
1476 return true;
1480 /* Print the register number of the current see_register_properties
1481 structure.
1483 This is a subroutine of see_main called via htab_traverse.
1484 SLOT contains the current see_register_properties structure pointer. */
1486 static int
1487 see_print_register_properties (void **slot, void *b ATTRIBUTE_UNUSED)
1489 const struct see_register_properties *const prop =
1490 (const struct see_register_properties *) *slot;
1492 gcc_assert (prop);
1493 fprintf (dump_file, "Property found for register %d\n", prop->regno);
1494 return 1;
1498 /* Print the extension instruction of the current see_register_properties
1499 structure.
1501 This is a subroutine of see_main called via htab_traverse.
1502 SLOT contains the current see_pre_extension_expr structure pointer. */
1504 static int
1505 see_print_pre_extension_expr (void **slot, void *b ATTRIBUTE_UNUSED)
1507 const struct see_pre_extension_expr *const pre_extension =
1508 (const struct see_pre_extension_expr *) *slot;
1510 gcc_assert (pre_extension
1511 && pre_extension->se_insn
1512 && INSN_P (pre_extension->se_insn));
1514 fprintf (dump_file, "Index %d for:\n", pre_extension->bitmap_index);
1515 print_rtl_single (dump_file, pre_extension->se_insn);
1517 return 1;
1521 /* Phase 4 implementation: Commit changes to the insn stream. */
1523 /* Delete the merged def extension.
1525 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1527 SLOT contains the current def extension instruction.
1528 B is the see_ref_s structure pointer. */
1530 static int
1531 see_delete_merged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1533 rtx def_se = (rtx) *slot;
1535 if (dump_file)
1537 fprintf (dump_file, "Deleting merged def extension:\n");
1538 print_rtl_single (dump_file, def_se);
1541 if (INSN_DELETED_P (def_se))
1542 /* This def extension is an implicit one. No need to delete it since
1543 it is not in the insn stream. */
1544 return 1;
1546 delete_insn (def_se);
1547 return 1;
1551 /* Delete the unmerged def extension.
1553 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1555 SLOT contains the current def extension instruction.
1556 B is the see_ref_s structure pointer. */
1558 static int
1559 see_delete_unmerged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1561 rtx def_se = (rtx) *slot;
1563 if (dump_file)
1565 fprintf (dump_file, "Deleting unmerged def extension:\n");
1566 print_rtl_single (dump_file, def_se);
1569 delete_insn (def_se);
1570 return 1;
1574 /* Emit the non-redundant use extension to the instruction stream.
1576 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1578 SLOT contains the current use extension instruction.
1579 B is the see_ref_s structure pointer. */
1581 static int
1582 see_emit_use_extension (void **slot, void *b)
1584 rtx use_se = (rtx) *slot;
1585 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1587 if (INSN_DELETED_P (use_se))
1588 /* This use extension was previously removed according to the lcm
1589 output. */
1590 return 1;
1592 if (dump_file)
1594 fprintf (dump_file, "Inserting use extension:\n");
1595 print_rtl_single (dump_file, use_se);
1598 add_insn_before (use_se, curr_ref_s->insn, NULL);
1600 return 1;
1604 /* For each relevant reference:
1605 a. Emit the non-redundant use extensions.
1606 b. Delete the def extensions.
1607 c. Replace the original reference with the merged one (if exists) and add the
1608 move instructions that were generated.
1610 This is a subroutine of see_commit_changes called via splay_tree_foreach.
1612 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
1613 see_ref_s structure. */
1615 static int
1616 see_commit_ref_changes (splay_tree_node stn,
1617 void *data ATTRIBUTE_UNUSED)
1619 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
1620 htab_t unmerged_def_se_hash =
1621 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
1622 htab_t merged_def_se_hash =
1623 ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
1624 rtx ref = ((struct see_ref_s *) (stn->value))->insn;
1625 rtx merged_ref = ((struct see_ref_s *) (stn->value))->merged_insn;
1627 /* Emit the non-redundant use extensions. */
1628 if (use_se_hash)
1629 htab_traverse_noresize (use_se_hash, see_emit_use_extension,
1630 (PTR) (stn->value));
1632 /* Delete the def extensions. */
1633 if (unmerged_def_se_hash)
1634 htab_traverse (unmerged_def_se_hash, see_delete_unmerged_def_extension,
1635 (PTR) (stn->value));
1637 if (merged_def_se_hash)
1638 htab_traverse (merged_def_se_hash, see_delete_merged_def_extension,
1639 (PTR) (stn->value));
1641 /* Replace the original reference with the merged one (if exists) and add the
1642 move instructions that were generated. */
1643 if (merged_ref && !INSN_DELETED_P (ref))
1645 if (dump_file)
1647 fprintf (dump_file, "Replacing orig reference:\n");
1648 print_rtl_single (dump_file, ref);
1649 fprintf (dump_file, "With merged reference:\n");
1650 print_rtl_single (dump_file, merged_ref);
1652 emit_insn_after (merged_ref, ref);
1653 delete_insn (ref);
1656 /* Continue to the next reference. */
1657 return 0;
1661 /* Insert partially redundant expressions on edges to make the expressions fully
1662 redundant.
1664 INDEX_MAP is a mapping of an index to an expression.
1665 Return true if an instruction was inserted on an edge.
1666 Otherwise, return false. */
1668 static bool
1669 see_pre_insert_extensions (struct see_pre_extension_expr **index_map)
1671 int num_edges = NUM_EDGES (edge_list);
1672 int set_size = pre_insert_map[0]->size;
1673 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
1675 int did_insert = 0;
1676 int e;
1677 int i;
1678 int j;
1680 for (e = 0; e < num_edges; e++)
1682 int indx;
1683 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
1685 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
1687 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
1689 for (j = indx; insert && j < (int) pre_extension_num;
1690 j++, insert >>= 1)
1691 if (insert & 1)
1693 struct see_pre_extension_expr *expr = index_map[j];
1694 int idx = expr->bitmap_index;
1695 rtx se_insn = NULL;
1696 edge eg = INDEX_EDGE (edge_list, e);
1698 start_sequence ();
1699 emit_insn (copy_insn (PATTERN (expr->se_insn)));
1700 se_insn = get_insns ();
1701 end_sequence ();
1703 if (eg->flags & EDGE_ABNORMAL)
1705 rtx new_insn = NULL;
1707 new_insn = insert_insn_end_bb_new (se_insn, bb);
1708 gcc_assert (new_insn && INSN_P (new_insn));
1710 if (dump_file)
1712 fprintf (dump_file,
1713 "PRE: end of bb %d, insn %d, ",
1714 bb->index, INSN_UID (new_insn));
1715 fprintf (dump_file,
1716 "inserting expression %d\n", idx);
1719 else
1721 insert_insn_on_edge (se_insn, eg);
1723 if (dump_file)
1725 fprintf (dump_file, "PRE: edge (%d,%d), ",
1726 bb->index,
1727 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
1728 fprintf (dump_file, "inserting expression %d\n", idx);
1731 did_insert = true;
1735 return did_insert;
1739 /* Since all the redundant extensions must be anticipatable, they must be a use
1740 extensions. Mark them as deleted. This will prevent them from been emitted
1741 in the first place.
1743 This is a subroutine of see_commit_changes called via htab_traverse.
1745 SLOT contains the current see_pre_extension_expr structure pointer. */
1747 static int
1748 see_pre_delete_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1750 struct see_pre_extension_expr *const expr =
1751 (struct see_pre_extension_expr *) *slot;
1752 struct see_occr *occr;
1753 int indx = expr->bitmap_index;
1755 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1757 if (TEST_BIT (pre_delete_map[occr->block_num], indx))
1759 /* Mark as deleted. */
1760 INSN_DELETED_P (occr->insn) = 1;
1761 if (dump_file)
1763 fprintf (dump_file,"Redundant extension deleted:\n");
1764 print_rtl_single (dump_file, occr->insn);
1768 return 1;
1772 /* Create the index_map mapping of an index to an expression.
1774 This is a subroutine of see_commit_changes called via htab_traverse.
1776 SLOT contains the current see_pre_extension_expr structure pointer.
1777 B a pointer to see_pre_extension_expr structure pointer. */
1779 static int
1780 see_map_extension (void **slot, void *b)
1782 struct see_pre_extension_expr *const expr =
1783 (struct see_pre_extension_expr *) *slot;
1784 struct see_pre_extension_expr **const index_map =
1785 (struct see_pre_extension_expr **) b;
1787 index_map[expr->bitmap_index] = expr;
1789 return 1;
1793 /* Phase 4 top level function.
1794 In this phase we finally change the instruction stream.
1795 Here we insert extensions at their best placements and delete the
1796 redundant ones according to the output of the LCM. We also replace
1797 some of the instructions according to phase 2 merges results. */
1799 static void
1800 see_commit_changes (void)
1802 struct see_pre_extension_expr **index_map;
1803 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
1804 bool did_insert = false;
1805 int i;
1807 index_map = XCNEWVEC (struct see_pre_extension_expr *, pre_extension_num);
1809 if (dump_file)
1810 fprintf (dump_file,
1811 "* Phase 4: Commit changes to the insn stream. *\n");
1813 /* Produce a mapping of all the pre_extensions. */
1814 htab_traverse (see_pre_extension_hash, see_map_extension, (PTR) index_map);
1816 /* Delete redundant extension. This will prevent them from been emitted in
1817 the first place. */
1818 htab_traverse (see_pre_extension_hash, see_pre_delete_extension, NULL);
1820 /* Insert extensions on edges, according to the LCM result. */
1821 did_insert = see_pre_insert_extensions (index_map);
1823 if (did_insert)
1824 commit_edge_insertions ();
1826 /* Commit the rest of the changes. */
1827 for (i = 0; i < last_bb; i++)
1829 if (see_bb_splay_ar[i])
1831 /* Traverse over all the references in the basic block in forward
1832 order. */
1833 splay_tree_foreach (see_bb_splay_ar[i],
1834 see_commit_ref_changes, NULL);
1838 free (index_map);
1842 /* Phase 3 implementation: Eliminate globally redundant extensions. */
1844 /* Analyze the properties of a merged def extension for the LCM and record avail
1845 occurrences.
1847 This is a subroutine of see_analyze_ref_local_prop called
1848 via htab_traverse.
1850 SLOT contains the current def extension instruction.
1851 B is the see_ref_s structure pointer. */
1853 static int
1854 see_analyze_merged_def_local_prop (void **slot, void *b)
1856 rtx def_se = (rtx) *slot;
1857 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1858 rtx ref = curr_ref_s->insn;
1859 struct see_pre_extension_expr *extension_expr;
1860 int indx;
1861 int bb_num = BLOCK_NUM (ref);
1862 htab_t curr_bb_hash;
1863 struct see_register_properties *curr_prop, **slot_prop;
1864 struct see_register_properties temp_prop;
1865 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
1866 struct see_occr *curr_occr = NULL;
1867 struct see_occr *tmp_occr = NULL;
1869 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
1870 /* The extension_expr must be found. */
1871 gcc_assert (extension_expr);
1873 curr_bb_hash = see_bb_hash_ar[bb_num];
1874 gcc_assert (curr_bb_hash);
1875 temp_prop.regno = REGNO (dest_extension_reg);
1876 slot_prop =
1877 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1878 &temp_prop, INSERT);
1879 curr_prop = *slot_prop;
1880 gcc_assert (curr_prop);
1882 indx = extension_expr->bitmap_index;
1884 /* Reset the transparency bit. */
1885 RESET_BIT (transp[bb_num], indx);
1886 /* Reset the killed bit. */
1887 RESET_BIT (ae_kill[bb_num], indx);
1889 if (curr_prop->first_se_after_last_def == DF_INSN_LUID (ref))
1891 /* Set the available bit. */
1892 SET_BIT (comp[bb_num], indx);
1893 /* Record the available occurrence. */
1894 curr_occr = XNEW (struct see_occr);
1895 curr_occr->next = NULL;
1896 curr_occr->insn = def_se;
1897 curr_occr->block_num = bb_num;
1898 tmp_occr = extension_expr->avail_occr;
1899 if (!tmp_occr)
1900 extension_expr->avail_occr = curr_occr;
1901 else
1903 while (tmp_occr->next)
1904 tmp_occr = tmp_occr->next;
1905 tmp_occr->next = curr_occr;
1909 return 1;
1913 /* Analyze the properties of a unmerged def extension for the LCM.
1915 This is a subroutine of see_analyze_ref_local_prop called
1916 via htab_traverse.
1918 SLOT contains the current def extension instruction.
1919 B is the see_ref_s structure pointer. */
1921 static int
1922 see_analyze_unmerged_def_local_prop (void **slot, void *b)
1924 rtx def_se = (rtx) *slot;
1925 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1926 rtx ref = curr_ref_s->insn;
1927 struct see_pre_extension_expr *extension_expr;
1928 int indx;
1929 int bb_num = BLOCK_NUM (ref);
1930 htab_t curr_bb_hash;
1931 struct see_register_properties *curr_prop, **slot_prop;
1932 struct see_register_properties temp_prop;
1933 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
1935 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
1936 /* The extension_expr must be found. */
1937 gcc_assert (extension_expr);
1939 curr_bb_hash = see_bb_hash_ar[bb_num];
1940 gcc_assert (curr_bb_hash);
1941 temp_prop.regno = REGNO (dest_extension_reg);
1942 slot_prop =
1943 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1944 &temp_prop, INSERT);
1945 curr_prop = *slot_prop;
1946 gcc_assert (curr_prop);
1948 indx = extension_expr->bitmap_index;
1950 /* Reset the transparency bit. */
1951 RESET_BIT (transp[bb_num], indx);
1952 /* Set the killed bit. */
1953 SET_BIT (ae_kill[bb_num], indx);
1955 return 1;
1959 /* Analyze the properties of a use extension for the LCM and record any and
1960 avail occurrences.
1962 This is a subroutine of see_analyze_ref_local_prop called
1963 via htab_traverse.
1965 SLOT contains the current use extension instruction.
1966 B is the see_ref_s structure pointer. */
1968 static int
1969 see_analyze_use_local_prop (void **slot, void *b)
1971 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1972 rtx use_se = (rtx) *slot;
1973 rtx ref = curr_ref_s->insn;
1974 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
1975 struct see_pre_extension_expr *extension_expr;
1976 struct see_register_properties *curr_prop, **slot_prop;
1977 struct see_register_properties temp_prop;
1978 struct see_occr *curr_occr = NULL;
1979 struct see_occr *tmp_occr = NULL;
1980 htab_t curr_bb_hash;
1981 int indx;
1982 int bb_num = BLOCK_NUM (ref);
1984 extension_expr = see_seek_pre_extension_expr (use_se, USE_EXTENSION);
1985 /* The extension_expr must be found. */
1986 gcc_assert (extension_expr);
1988 curr_bb_hash = see_bb_hash_ar[bb_num];
1989 gcc_assert (curr_bb_hash);
1990 temp_prop.regno = REGNO (dest_extension_reg);
1991 slot_prop =
1992 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1993 &temp_prop, INSERT);
1994 curr_prop = *slot_prop;
1995 gcc_assert (curr_prop);
1997 indx = extension_expr->bitmap_index;
1999 if (curr_prop->first_se_before_any_def == DF_INSN_LUID (ref))
2001 /* Set the anticipatable bit. */
2002 SET_BIT (antloc[bb_num], indx);
2003 /* Record the anticipatable occurrence. */
2004 curr_occr = XNEW (struct see_occr);
2005 curr_occr->next = NULL;
2006 curr_occr->insn = use_se;
2007 curr_occr->block_num = bb_num;
2008 tmp_occr = extension_expr->antic_occr;
2009 if (!tmp_occr)
2010 extension_expr->antic_occr = curr_occr;
2011 else
2013 while (tmp_occr->next)
2014 tmp_occr = tmp_occr->next;
2015 tmp_occr->next = curr_occr;
2017 if (curr_prop->last_def < 0)
2019 /* Set the available bit. */
2020 SET_BIT (comp[bb_num], indx);
2021 /* Record the available occurrence. */
2022 curr_occr = XNEW (struct see_occr);
2023 curr_occr->next = NULL;
2024 curr_occr->insn = use_se;
2025 curr_occr->block_num = bb_num;
2026 tmp_occr = extension_expr->avail_occr;
2027 if (!tmp_occr)
2028 extension_expr->avail_occr = curr_occr;
2029 else
2031 while (tmp_occr->next)
2032 tmp_occr = tmp_occr->next;
2033 tmp_occr->next = curr_occr;
2036 /* Note: there is no need to reset the killed bit since it must be zero at
2037 this point. */
2039 else if (curr_prop->first_se_after_last_def == DF_INSN_LUID (ref))
2041 /* Set the available bit. */
2042 SET_BIT (comp[bb_num], indx);
2043 /* Reset the killed bit. */
2044 RESET_BIT (ae_kill[bb_num], indx);
2045 /* Record the available occurrence. */
2046 curr_occr = XNEW (struct see_occr);
2047 curr_occr->next = NULL;
2048 curr_occr->insn = use_se;
2049 curr_occr->block_num = bb_num;
2050 tmp_occr = extension_expr->avail_occr;
2051 if (!tmp_occr)
2052 extension_expr->avail_occr = curr_occr;
2053 else
2055 while (tmp_occr->next)
2056 tmp_occr = tmp_occr->next;
2057 tmp_occr->next = curr_occr;
2060 return 1;
2064 /* Here we traverse over all the merged and unmerged extensions of the reference
2065 and analyze their properties for the LCM.
2067 This is a subroutine of see_execute_LCM called via splay_tree_foreach.
2069 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2070 see_ref_s structure. */
2072 static int
2073 see_analyze_ref_local_prop (splay_tree_node stn,
2074 void *data ATTRIBUTE_UNUSED)
2076 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
2077 htab_t unmerged_def_se_hash =
2078 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
2079 htab_t merged_def_se_hash =
2080 ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
2082 /* Analyze use extensions that were not merged with the reference. */
2083 if (use_se_hash)
2084 htab_traverse_noresize (use_se_hash, see_analyze_use_local_prop,
2085 (PTR) (stn->value));
2087 /* Analyze def extensions that were not merged with the reference. */
2088 if (unmerged_def_se_hash)
2089 htab_traverse (unmerged_def_se_hash, see_analyze_unmerged_def_local_prop,
2090 (PTR) (stn->value));
2092 /* Analyze def extensions that were merged with the reference. */
2093 if (merged_def_se_hash)
2094 htab_traverse (merged_def_se_hash, see_analyze_merged_def_local_prop,
2095 (PTR) (stn->value));
2097 /* Continue to the next definition. */
2098 return 0;
2102 /* Phase 3 top level function.
2103 In this phase, we set the input bit vectors of the LCM according to data
2104 gathered in phase 2.
2105 Then we run the edge based LCM. */
2107 static void
2108 see_execute_LCM (void)
2110 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
2111 int i = 0;
2113 if (dump_file)
2114 fprintf (dump_file,
2115 "* Phase 3: Eliminate globally redundant extensions. *\n");
2117 /* Initialize the global sbitmap vectors. */
2118 transp = sbitmap_vector_alloc (last_bb, pre_extension_num);
2119 comp = sbitmap_vector_alloc (last_bb, pre_extension_num);
2120 antloc = sbitmap_vector_alloc (last_bb, pre_extension_num);
2121 ae_kill = sbitmap_vector_alloc (last_bb, pre_extension_num);
2122 sbitmap_vector_ones (transp, last_bb);
2123 sbitmap_vector_zero (comp, last_bb);
2124 sbitmap_vector_zero (antloc, last_bb);
2125 sbitmap_vector_zero (ae_kill, last_bb);
2127 /* Traverse over all the splay trees of the basic blocks. */
2128 for (i = 0; i < last_bb; i++)
2130 if (see_bb_splay_ar[i])
2132 /* Traverse over all the references in the basic block in forward
2133 order. */
2134 splay_tree_foreach (see_bb_splay_ar[i],
2135 see_analyze_ref_local_prop, NULL);
2139 /* Add fake exit edges before running the lcm. */
2140 add_noreturn_fake_exit_edges ();
2142 /* Run the LCM. */
2143 edge_list = pre_edge_lcm (pre_extension_num, transp, comp, antloc,
2144 ae_kill, &pre_insert_map, &pre_delete_map);
2146 /* Remove the fake edges. */
2147 remove_fake_exit_edges ();
2151 /* Phase 2 implementation: Merge and eliminate locally redundant extensions. */
2153 /* In this function we set the register properties for the register that is
2154 defined and extended in the reference.
2155 The properties are defined in see_register_properties structure which is
2156 allocated per basic block and per register.
2157 Later the extension is inserted into the see_pre_extension_hash for the next
2158 phase of the optimization.
2160 This is a subroutine of see_handle_extensions_for_one_ref called
2161 via htab_traverse.
2163 SLOT contains the current def extension instruction.
2164 B is the see_ref_s structure pointer. */
2166 static int
2167 see_set_prop_merged_def (void **slot, void *b)
2169 rtx def_se = (rtx) *slot;
2170 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2171 rtx insn = curr_ref_s->insn;
2172 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2173 htab_t curr_bb_hash;
2174 struct see_register_properties *curr_prop = NULL;
2175 struct see_register_properties **slot_prop;
2176 struct see_register_properties temp_prop;
2177 int ref_luid = DF_INSN_LUID (insn);
2179 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2180 if (!curr_bb_hash)
2182 /* The hash doesn't exist yet. Create it. */
2183 curr_bb_hash = htab_create (10,
2184 hash_descriptor_properties,
2185 eq_descriptor_properties,
2186 hash_del_properties);
2187 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2190 /* Find the right register properties in the right basic block. */
2191 temp_prop.regno = REGNO (dest_extension_reg);
2192 slot_prop =
2193 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2194 &temp_prop, INSERT);
2196 if (slot_prop && *slot_prop != NULL)
2198 /* Property already exists. */
2199 curr_prop = *slot_prop;
2200 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2202 curr_prop->last_def = ref_luid;
2203 curr_prop->first_se_after_last_def = ref_luid;
2205 else
2207 /* Property doesn't exist yet. */
2208 curr_prop = XNEW (struct see_register_properties);
2209 curr_prop->regno = REGNO (dest_extension_reg);
2210 curr_prop->last_def = ref_luid;
2211 curr_prop->first_se_before_any_def = -1;
2212 curr_prop->first_se_after_last_def = ref_luid;
2213 *slot_prop = curr_prop;
2216 /* Insert the def_se into see_pre_extension_hash if it isn't already
2217 there. */
2218 see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
2220 return 1;
2224 /* In this function we set the register properties for the register that is
2225 defined but not extended in the reference.
2226 The properties are defined in see_register_properties structure which is
2227 allocated per basic block and per register.
2228 Later the extension is inserted into the see_pre_extension_hash for the next
2229 phase of the optimization.
2231 This is a subroutine of see_handle_extensions_for_one_ref called
2232 via htab_traverse.
2234 SLOT contains the current def extension instruction.
2235 B is the see_ref_s structure pointer. */
2237 static int
2238 see_set_prop_unmerged_def (void **slot, void *b)
2240 rtx def_se = (rtx) *slot;
2241 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2242 rtx insn = curr_ref_s->insn;
2243 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2244 htab_t curr_bb_hash;
2245 struct see_register_properties *curr_prop = NULL;
2246 struct see_register_properties **slot_prop;
2247 struct see_register_properties temp_prop;
2248 int ref_luid = DF_INSN_LUID (insn);
2250 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2251 if (!curr_bb_hash)
2253 /* The hash doesn't exist yet. Create it. */
2254 curr_bb_hash = htab_create (10,
2255 hash_descriptor_properties,
2256 eq_descriptor_properties,
2257 hash_del_properties);
2258 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2261 /* Find the right register properties in the right basic block. */
2262 temp_prop.regno = REGNO (dest_extension_reg);
2263 slot_prop =
2264 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2265 &temp_prop, INSERT);
2267 if (slot_prop && *slot_prop != NULL)
2269 /* Property already exists. */
2270 curr_prop = *slot_prop;
2271 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2273 curr_prop->last_def = ref_luid;
2274 curr_prop->first_se_after_last_def = -1;
2276 else
2278 /* Property doesn't exist yet. */
2279 curr_prop = XNEW (struct see_register_properties);
2280 curr_prop->regno = REGNO (dest_extension_reg);
2281 curr_prop->last_def = ref_luid;
2282 curr_prop->first_se_before_any_def = -1;
2283 curr_prop->first_se_after_last_def = -1;
2284 *slot_prop = curr_prop;
2287 /* Insert the def_se into see_pre_extension_hash if it isn't already
2288 there. */
2289 see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
2291 return 1;
2295 /* In this function we set the register properties for the register that is used
2296 in the reference.
2297 The properties are defined in see_register_properties structure which is
2298 allocated per basic block and per register.
2299 When a redundant use extension is found it is removed from the hash of the
2300 reference.
2301 If the extension is non redundant it is inserted into the
2302 see_pre_extension_hash for the next phase of the optimization.
2304 This is a subroutine of see_handle_extensions_for_one_ref called
2305 via htab_traverse.
2307 SLOT contains the current use extension instruction.
2308 B is the see_ref_s structure pointer. */
2310 static int
2311 see_set_prop_unmerged_use (void **slot, void *b)
2313 rtx use_se = (rtx) *slot;
2314 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2315 rtx insn = curr_ref_s->insn;
2316 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
2317 htab_t curr_bb_hash;
2318 struct see_register_properties *curr_prop = NULL;
2319 struct see_register_properties **slot_prop;
2320 struct see_register_properties temp_prop;
2321 bool locally_redundant = false;
2322 int ref_luid = DF_INSN_LUID (insn);
2324 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2325 if (!curr_bb_hash)
2327 /* The hash doesn't exist yet. Create it. */
2328 curr_bb_hash = htab_create (10,
2329 hash_descriptor_properties,
2330 eq_descriptor_properties,
2331 hash_del_properties);
2332 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2335 /* Find the right register properties in the right basic block. */
2336 temp_prop.regno = REGNO (dest_extension_reg);
2337 slot_prop =
2338 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2339 &temp_prop, INSERT);
2341 if (slot_prop && *slot_prop != NULL)
2343 /* Property already exists. */
2344 curr_prop = *slot_prop;
2345 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2348 if (curr_prop->last_def < 0 && curr_prop->first_se_before_any_def < 0)
2349 curr_prop->first_se_before_any_def = ref_luid;
2350 else if (curr_prop->last_def < 0
2351 && curr_prop->first_se_before_any_def >= 0)
2353 /* In this case the extension is locally redundant. */
2354 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2355 locally_redundant = true;
2357 else if (curr_prop->last_def >= 0
2358 && curr_prop->first_se_after_last_def < 0)
2359 curr_prop->first_se_after_last_def = ref_luid;
2360 else if (curr_prop->last_def >= 0
2361 && curr_prop->first_se_after_last_def >= 0)
2363 /* In this case the extension is locally redundant. */
2364 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2365 locally_redundant = true;
2367 else
2368 gcc_unreachable ();
2370 else
2372 /* Property doesn't exist yet. Create a new one. */
2373 curr_prop = XNEW (struct see_register_properties);
2374 curr_prop->regno = REGNO (dest_extension_reg);
2375 curr_prop->last_def = -1;
2376 curr_prop->first_se_before_any_def = ref_luid;
2377 curr_prop->first_se_after_last_def = -1;
2378 *slot_prop = curr_prop;
2381 /* Insert the use_se into see_pre_extension_hash if it isn't already
2382 there. */
2383 if (!locally_redundant)
2384 see_seek_pre_extension_expr (use_se, USE_EXTENSION);
2385 if (locally_redundant && dump_file)
2387 fprintf (dump_file, "Locally redundant extension:\n");
2388 print_rtl_single (dump_file, use_se);
2390 return 1;
2394 /* Print an extension instruction.
2396 This is a subroutine of see_handle_extensions_for_one_ref called
2397 via htab_traverse.
2398 SLOT contains the extension instruction. */
2400 static int
2401 see_print_one_extension (void **slot, void *b ATTRIBUTE_UNUSED)
2403 rtx def_se = (rtx) *slot;
2405 gcc_assert (def_se && INSN_P (def_se));
2406 print_rtl_single (dump_file, def_se);
2408 return 1;
2411 /* Function called by note_uses to replace used subexpressions.
2413 X is a pointer to the subexpression and DATA is a pointer to a
2414 see_replace_data structure that contains the data for the replacement. */
2416 static void
2417 see_replace_src (rtx *x, void *data)
2419 struct see_replace_data *d
2420 = (struct see_replace_data *) data;
2422 *x = replace_rtx (*x, d->from, d->to);
2426 static rtx
2427 see_copy_insn (rtx insn)
2429 rtx pat = copy_insn (PATTERN (insn)), ret;
2431 if (NONJUMP_INSN_P (insn))
2432 ret = make_insn_raw (pat);
2433 else if (JUMP_P (insn))
2434 ret = make_jump_insn_raw (pat);
2435 else if (CALL_P (insn))
2437 start_sequence ();
2438 ret = emit_call_insn (pat);
2439 end_sequence ();
2440 if (CALL_INSN_FUNCTION_USAGE (insn))
2441 CALL_INSN_FUNCTION_USAGE (ret)
2442 = copy_rtx (CALL_INSN_FUNCTION_USAGE (insn));
2443 SIBLING_CALL_P (ret) = SIBLING_CALL_P (insn);
2444 RTL_CONST_CALL_P (ret) = RTL_CONST_CALL_P (insn);
2445 RTL_PURE_CALL_P (ret) = RTL_PURE_CALL_P (insn);
2446 RTL_LOOPING_CONST_OR_PURE_CALL_P (ret)
2447 = RTL_LOOPING_CONST_OR_PURE_CALL_P (insn);
2449 else
2450 gcc_unreachable ();
2451 if (REG_NOTES (insn))
2452 REG_NOTES (ret) = copy_rtx (REG_NOTES (insn));
2453 INSN_LOCATOR (ret) = INSN_LOCATOR (insn);
2454 RTX_FRAME_RELATED_P (ret) = RTX_FRAME_RELATED_P (insn);
2455 PREV_INSN (ret) = NULL_RTX;
2456 NEXT_INSN (ret) = NULL_RTX;
2457 return ret;
2461 /* At this point the pattern is expected to be:
2463 ref: set (dest_reg) (rhs)
2464 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2466 The merge of these two instructions didn't succeed.
2468 We try to generate the pattern:
2469 set (subreg (dest_extension_reg)) (rhs)
2471 We do this in 4 steps:
2472 a. Replace every use of dest_reg with a new pseudo register.
2473 b. Replace every instance of dest_reg with the subreg.
2474 c. Replace every use of the new pseudo register back to dest_reg.
2475 d. Try to recognize and simplify.
2477 If the manipulation failed, leave the original ref but try to generate and
2478 recognize a simple move instruction:
2479 set (subreg (dest_extension_reg)) (dest_reg)
2480 This move instruction will be emitted right after the ref to the instruction
2481 stream and assure the correctness of the code after def_se will be removed.
2483 CURR_REF_S is the current reference.
2484 DEF_SE is the extension that couldn't be merged. */
2486 static void
2487 see_def_extension_not_merged (struct see_ref_s *curr_ref_s, rtx def_se)
2489 struct see_replace_data d;
2490 /* If the original insn was already merged with an extension before,
2491 take the merged one. */
2492 rtx ref = curr_ref_s->merged_insn
2493 ? curr_ref_s->merged_insn : curr_ref_s->insn;
2494 rtx merged_ref_next = curr_ref_s->merged_insn
2495 ? NEXT_INSN (curr_ref_s->merged_insn) : NULL_RTX;
2496 rtx ref_copy = see_copy_insn (ref);
2497 rtx source_extension_reg = see_get_extension_reg (def_se, 0);
2498 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2499 rtx set, rhs;
2500 rtx dest_reg, dest_real_reg;
2501 rtx new_pseudo_reg, subreg;
2502 enum machine_mode source_extension_mode = GET_MODE (source_extension_reg);
2503 enum machine_mode dest_mode;
2505 set = single_set (def_se);
2506 gcc_assert (set);
2507 rhs = SET_SRC (set);
2508 gcc_assert (GET_CODE (rhs) == SIGN_EXTEND
2509 || GET_CODE (rhs) == ZERO_EXTEND);
2510 dest_reg = XEXP (rhs, 0);
2511 gcc_assert (REG_P (dest_reg)
2512 || (GET_CODE (dest_reg) == SUBREG
2513 && REG_P (SUBREG_REG (dest_reg))));
2514 dest_real_reg = REG_P (dest_reg) ? dest_reg : SUBREG_REG (dest_reg);
2515 dest_mode = GET_MODE (dest_reg);
2517 subreg = gen_lowpart_SUBREG (dest_mode, dest_extension_reg);
2518 new_pseudo_reg = gen_reg_rtx (source_extension_mode);
2520 /* Step a: Replace every use of dest_real_reg with a new pseudo register. */
2521 d.from = dest_real_reg;
2522 d.to = new_pseudo_reg;
2523 note_uses (&PATTERN (ref_copy), see_replace_src, &d);
2524 /* Step b: Replace every instance of dest_reg with the subreg. */
2525 ref_copy = replace_rtx (ref_copy, dest_reg, copy_rtx (subreg));
2527 /* Step c: Replace every use of the new pseudo register back to
2528 dest_real_reg. */
2529 d.from = new_pseudo_reg;
2530 d.to = dest_real_reg;
2531 note_uses (&PATTERN (ref_copy), see_replace_src, &d);
2533 if (rtx_equal_p (PATTERN (ref), PATTERN (ref_copy))
2534 || insn_invalid_p (ref_copy))
2536 /* The manipulation failed. */
2537 df_insn_delete (NULL, INSN_UID (ref_copy));
2539 /* Create a new copy. */
2540 ref_copy = see_copy_insn (ref);
2542 if (curr_ref_s->merged_insn)
2543 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2545 /* Create a simple move instruction that will replace the def_se. */
2546 start_sequence ();
2547 emit_insn (ref_copy);
2548 emit_move_insn (subreg, dest_reg);
2549 if (merged_ref_next != NULL_RTX)
2550 emit_insn (merged_ref_next);
2551 curr_ref_s->merged_insn = get_insns ();
2552 end_sequence ();
2554 if (dump_file)
2556 fprintf (dump_file, "Following def merge failure a move ");
2557 fprintf (dump_file, "insn was added after the ref.\n");
2558 fprintf (dump_file, "Original ref:\n");
2559 print_rtl_single (dump_file, ref);
2560 fprintf (dump_file, "Move insn that was added:\n");
2561 print_rtl_single (dump_file, NEXT_INSN (curr_ref_s->merged_insn));
2563 return;
2566 /* The manipulation succeeded. Store the new manipulated reference. */
2568 /* It is possible for dest_reg to appear multiple times in ref_copy. In this
2569 case, ref_copy now has invalid sharing. Copying solves the problem.
2570 We don't use copy_rtx as an optimization for the common case (no sharing).
2571 We can't just use copy_rtx_if_shared since it does nothing on INSNs.
2572 Another possible solution would be to make validate_replace_rtx_1
2573 public and use it instead of replace_rtx. */
2574 reset_used_flags (PATTERN (ref_copy));
2575 reset_used_flags (REG_NOTES (ref_copy));
2576 PATTERN (ref_copy) = copy_rtx_if_shared (PATTERN (ref_copy));
2577 REG_NOTES (ref_copy) = copy_rtx_if_shared (REG_NOTES (ref_copy));
2579 /* Try to simplify the new manipulated insn. */
2580 validate_simplify_insn (ref_copy);
2582 if (curr_ref_s->merged_insn)
2583 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2585 /* Create a simple move instruction to assure the correctness of the code. */
2586 start_sequence ();
2587 emit_insn (ref_copy);
2588 emit_move_insn (dest_reg, subreg);
2589 if (merged_ref_next != NULL_RTX)
2590 emit_insn (merged_ref_next);
2591 curr_ref_s->merged_insn = get_insns ();
2592 end_sequence ();
2594 if (dump_file)
2596 fprintf (dump_file, "Following merge failure the ref was transformed!\n");
2597 fprintf (dump_file, "Original ref:\n");
2598 print_rtl_single (dump_file, ref);
2599 fprintf (dump_file, "Transformed ref:\n");
2600 print_rtl_single (dump_file, curr_ref_s->merged_insn);
2601 fprintf (dump_file, "Move insn that was added:\n");
2602 print_rtl_single (dump_file, NEXT_INSN (curr_ref_s->merged_insn));
2607 /* Merge the reference instruction (ref) with the current use extension.
2609 use_se extends a NARROWmode register to a WIDEmode register.
2610 ref uses the WIDEmode register.
2612 The pattern we try to merge is this:
2613 use_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2614 ref: use (dest_extension_reg)
2616 where dest_extension_reg and source_extension_reg can be subregs.
2618 The merge is done by generating, simplifying and recognizing the pattern:
2619 use (sign/zero_extend (source_extension_reg))
2621 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2622 we don't try to merge it with use_se and we continue as if the merge failed.
2624 This is a subroutine of see_handle_extensions_for_one_ref called
2625 via htab_traverse.
2626 SLOT contains the current use extension instruction.
2627 B is the see_ref_s structure pointer. */
2629 static int
2630 see_merge_one_use_extension (void **slot, void *b)
2632 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2633 rtx use_se = (rtx) *slot;
2634 rtx ref = curr_ref_s->merged_insn
2635 ? curr_ref_s->merged_insn : curr_ref_s->insn;
2636 rtx merged_ref_next = curr_ref_s->merged_insn
2637 ? NEXT_INSN (curr_ref_s->merged_insn) : NULL_RTX;
2638 rtx ref_copy = see_copy_insn (ref);
2639 rtx extension_set = single_set (use_se);
2640 rtx extension_rhs = NULL;
2641 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
2642 rtx note = NULL;
2643 rtx simplified_note = NULL;
2645 gcc_assert (use_se && curr_ref_s && extension_set);
2647 extension_rhs = SET_SRC (extension_set);
2649 /* In REG_EQUIV and REG_EQUAL notes that mention the register we need to
2650 replace the uses of the dest_extension_reg with the rhs of the extension
2651 instruction. This is necessary since there might not be an extension in
2652 the path between the definition and the note when this optimization is
2653 over. */
2654 note = find_reg_equal_equiv_note (ref_copy);
2655 if (note)
2657 simplified_note = simplify_replace_rtx (XEXP (note, 0),
2658 dest_extension_reg,
2659 extension_rhs);
2660 if (rtx_equal_p (XEXP (note, 0), simplified_note))
2661 /* Replacement failed. Remove the note. */
2662 remove_note (ref_copy, note);
2663 else
2664 set_unique_reg_note (ref_copy, REG_NOTE_KIND (note),
2665 simplified_note);
2668 if (!see_want_to_be_merged_with_extension (ref, use_se, USE_EXTENSION))
2670 /* The use in the reference is too simple. Don't try to merge. */
2671 if (dump_file)
2673 fprintf (dump_file, "Use merge skipped!\n");
2674 fprintf (dump_file, "Original instructions:\n");
2675 print_rtl_single (dump_file, use_se);
2676 print_rtl_single (dump_file, ref);
2678 df_insn_delete (NULL, INSN_UID (ref_copy));
2679 /* Don't remove the current use_se from the use_se_hash and continue to
2680 the next extension. */
2681 return 1;
2684 validate_replace_src_group (dest_extension_reg, extension_rhs, ref_copy);
2686 if (!num_changes_pending ())
2687 /* In this case this is not a real use (the only use is/was in the notes
2688 list). Remove the use extension from the hash. This will prevent it
2689 from been emitted in the first place. */
2691 if (dump_file)
2693 fprintf (dump_file, "Use extension not necessary before:\n");
2694 print_rtl_single (dump_file, ref);
2696 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2698 if (curr_ref_s->merged_insn)
2699 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2701 if (merged_ref_next != NULL_RTX)
2703 start_sequence ();
2704 emit_insn (ref_copy);
2705 emit_insn (merged_ref_next);
2706 curr_ref_s->merged_insn = get_insns ();
2707 end_sequence ();
2709 else
2710 curr_ref_s->merged_insn = ref_copy;
2711 return 1;
2714 if (!apply_change_group ())
2716 /* The merge failed. */
2717 if (dump_file)
2719 fprintf (dump_file, "Use merge failed!\n");
2720 fprintf (dump_file, "Original instructions:\n");
2721 print_rtl_single (dump_file, use_se);
2722 print_rtl_single (dump_file, ref);
2724 df_insn_delete (NULL, INSN_UID (ref_copy));
2725 /* Don't remove the current use_se from the use_se_hash and continue to
2726 the next extension. */
2727 return 1;
2730 /* The merge succeeded! */
2732 /* Try to simplify the new merged insn. */
2733 validate_simplify_insn (ref_copy);
2735 if (curr_ref_s->merged_insn)
2736 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2738 if (merged_ref_next != NULL_RTX)
2740 start_sequence ();
2741 emit_insn (ref_copy);
2742 emit_insn (merged_ref_next);
2743 curr_ref_s->merged_insn = get_insns ();
2744 end_sequence ();
2746 else
2747 curr_ref_s->merged_insn = ref_copy;
2749 if (dump_file)
2751 fprintf (dump_file, "Use merge succeeded!\n");
2752 fprintf (dump_file, "Original instructions:\n");
2753 print_rtl_single (dump_file, use_se);
2754 print_rtl_single (dump_file, ref);
2755 fprintf (dump_file, "Merged instruction:\n");
2756 print_rtl_single (dump_file, curr_ref_s->merged_insn);
2759 /* Remove the current use_se from the use_se_hash. This will prevent it from
2760 been emitted in the first place. */
2761 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2762 return 1;
2766 /* Merge the reference instruction (ref) with the extension that follows it
2767 in the same basic block (def_se).
2768 ref sets a NARROWmode register and def_se extends it to WIDEmode register.
2770 The pattern we try to merge is this:
2771 ref: set (dest_reg) (rhs)
2772 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2774 where dest_reg and source_extension_reg can both be subregs (together)
2775 and (REGNO (dest_reg) == REGNO (source_extension_reg))
2777 The merge is done by generating, simplifying and recognizing the pattern:
2778 set (dest_extension_reg) (sign/zero_extend (rhs))
2779 If ref is a parallel instruction we just replace the relevant set in it.
2781 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2782 we don't try to merge it with def_se and we continue as if the merge failed.
2784 This is a subroutine of see_handle_extensions_for_one_ref called
2785 via htab_traverse.
2787 SLOT contains the current def extension instruction.
2788 B is the see_ref_s structure pointer. */
2790 static int
2791 see_merge_one_def_extension (void **slot, void *b)
2793 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2794 rtx def_se = (rtx) *slot;
2795 /* If the original insn was already merged with an extension before,
2796 take the merged one. */
2797 rtx ref = curr_ref_s->merged_insn
2798 ? curr_ref_s->merged_insn : curr_ref_s->insn;
2799 rtx merged_ref_next = curr_ref_s->merged_insn
2800 ? NEXT_INSN (curr_ref_s->merged_insn) : NULL_RTX;
2801 rtx ref_copy = see_copy_insn (ref);
2802 rtx new_set = NULL;
2803 rtx source_extension_reg = see_get_extension_reg (def_se, 0);
2804 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2805 rtx *rtx_slot, subreg;
2806 rtx temp_extension = NULL;
2807 rtx simplified_temp_extension = NULL;
2808 rtx *pat;
2809 enum rtx_code code;
2810 enum rtx_code extension_code;
2811 enum machine_mode source_extension_mode;
2812 enum machine_mode source_mode;
2813 enum machine_mode dest_extension_mode;
2814 bool merge_success = false;
2815 int i;
2817 gcc_assert (def_se
2818 && INSN_P (def_se)
2819 && curr_ref_s
2820 && ref
2821 && INSN_P (ref));
2823 if (!see_want_to_be_merged_with_extension (ref, def_se, DEF_EXTENSION))
2825 /* The definition in the reference is too simple. Don't try to merge. */
2826 if (dump_file)
2828 fprintf (dump_file, "Def merge skipped!\n");
2829 fprintf (dump_file, "Original instructions:\n");
2830 print_rtl_single (dump_file, ref);
2831 print_rtl_single (dump_file, def_se);
2834 df_insn_delete (NULL, INSN_UID (ref_copy));
2835 see_def_extension_not_merged (curr_ref_s, def_se);
2836 /* Continue to the next extension. */
2837 return 1;
2840 extension_code = see_get_extension_data (def_se, &source_mode);
2842 /* Try to merge and simplify the extension. */
2843 source_extension_mode = GET_MODE (source_extension_reg);
2844 dest_extension_mode = GET_MODE (dest_extension_reg);
2846 pat = &PATTERN (ref_copy);
2847 code = GET_CODE (*pat);
2849 if (code == PARALLEL)
2851 bool need_to_apply_change = false;
2853 for (i = 0; i < XVECLEN (*pat, 0); i++)
2855 rtx *sub = &XVECEXP (*pat, 0, i);
2857 if (GET_CODE (*sub) == SET
2858 && GET_MODE (SET_SRC (*sub)) != VOIDmode
2859 && GET_MODE (SET_DEST (*sub)) == source_mode
2860 && ((REG_P (SET_DEST (*sub))
2861 && REGNO (SET_DEST (*sub)) == REGNO (source_extension_reg))
2862 || (GET_CODE (SET_DEST (*sub)) == SUBREG
2863 && REG_P (SUBREG_REG (SET_DEST (*sub)))
2864 && (REGNO (SUBREG_REG (SET_DEST (*sub))) ==
2865 REGNO (source_extension_reg)))))
2867 rtx orig_src = SET_SRC (*sub);
2869 if (extension_code == SIGN_EXTEND)
2870 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode,
2871 orig_src);
2872 else
2873 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode,
2874 orig_src);
2875 simplified_temp_extension = simplify_rtx (temp_extension);
2876 temp_extension =
2877 (simplified_temp_extension) ? simplified_temp_extension :
2878 temp_extension;
2879 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg,
2880 temp_extension);
2881 validate_change (ref_copy, sub, new_set, 1);
2882 need_to_apply_change = true;
2885 if (need_to_apply_change)
2886 if (apply_change_group ())
2887 merge_success = true;
2889 else if (code == SET
2890 && GET_MODE (SET_SRC (*pat)) != VOIDmode
2891 && GET_MODE (SET_DEST (*pat)) == source_mode
2892 && ((REG_P (SET_DEST (*pat))
2893 && REGNO (SET_DEST (*pat)) == REGNO (source_extension_reg))
2894 || (GET_CODE (SET_DEST (*pat)) == SUBREG
2895 && REG_P (SUBREG_REG (SET_DEST (*pat)))
2896 && (REGNO (SUBREG_REG (SET_DEST (*pat))) ==
2897 REGNO (source_extension_reg)))))
2899 rtx orig_src = SET_SRC (*pat);
2901 if (extension_code == SIGN_EXTEND)
2902 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode, orig_src);
2903 else
2904 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode, orig_src);
2905 simplified_temp_extension = simplify_rtx (temp_extension);
2906 temp_extension = (simplified_temp_extension) ? simplified_temp_extension :
2907 temp_extension;
2908 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg, temp_extension);
2909 if (validate_change (ref_copy, pat, new_set, 0))
2910 merge_success = true;
2912 if (!merge_success)
2914 /* The merge failed. */
2915 if (dump_file)
2917 fprintf (dump_file, "Def merge failed!\n");
2918 fprintf (dump_file, "Original instructions:\n");
2919 print_rtl_single (dump_file, ref);
2920 print_rtl_single (dump_file, def_se);
2923 df_insn_delete (NULL, INSN_UID (ref_copy));
2924 see_def_extension_not_merged (curr_ref_s, def_se);
2925 /* Continue to the next extension. */
2926 return 1;
2929 /* The merge succeeded! */
2930 if (curr_ref_s->merged_insn)
2931 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2933 /* Create a simple move instruction to assure the correctness of the code. */
2934 subreg = gen_lowpart_SUBREG (source_extension_mode, dest_extension_reg);
2935 start_sequence ();
2936 emit_insn (ref_copy);
2937 emit_move_insn (source_extension_reg, subreg);
2938 if (merged_ref_next != NULL_RTX)
2939 emit_insn (merged_ref_next);
2940 curr_ref_s->merged_insn = get_insns ();
2941 end_sequence ();
2943 if (dump_file)
2945 fprintf (dump_file, "Def merge succeeded!\n");
2946 fprintf (dump_file, "Original instructions:\n");
2947 print_rtl_single (dump_file, ref);
2948 print_rtl_single (dump_file, def_se);
2949 fprintf (dump_file, "Merged instruction:\n");
2950 print_rtl_single (dump_file, curr_ref_s->merged_insn);
2951 fprintf (dump_file, "Move instruction that was added:\n");
2952 print_rtl_single (dump_file, NEXT_INSN (curr_ref_s->merged_insn));
2955 /* Remove the current def_se from the unmerged_def_se_hash and insert it to
2956 the merged_def_se_hash. */
2957 htab_clear_slot (curr_ref_s->unmerged_def_se_hash, (PTR *)slot);
2958 if (!curr_ref_s->merged_def_se_hash)
2959 curr_ref_s->merged_def_se_hash = htab_create (10,
2960 hash_descriptor_extension,
2961 eq_descriptor_extension,
2962 NULL);
2963 rtx_slot = (rtx *) htab_find_slot (curr_ref_s->merged_def_se_hash,
2964 dest_extension_reg, INSERT);
2965 gcc_assert (*rtx_slot == NULL);
2966 *rtx_slot = def_se;
2968 return 1;
2972 /* Try to eliminate extensions in this order:
2973 a. Try to merge only the def extensions, one by one.
2974 b. Try to merge only the use extensions, one by one.
2976 TODO:
2977 Try to merge any couple of use extensions simultaneously.
2978 Try to merge any def extension with one or two uses extensions
2979 simultaneously.
2981 After all the merges are done, update the register properties for the basic
2982 block and eliminate locally redundant use extensions.
2984 This is a subroutine of see_merge_and_eliminate_extensions called
2985 via splay_tree_foreach.
2986 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2987 see_ref_s structure. */
2989 static int
2990 see_handle_extensions_for_one_ref (splay_tree_node stn,
2991 void *data ATTRIBUTE_UNUSED)
2993 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
2994 htab_t unmerged_def_se_hash =
2995 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
2996 htab_t merged_def_se_hash;
2997 rtx ref = ((struct see_ref_s *) (stn->value))->insn;
2999 if (dump_file)
3001 fprintf (dump_file, "Handling ref:\n");
3002 print_rtl_single (dump_file, ref);
3005 /* a. Try to eliminate only def extensions, one by one. */
3006 if (unmerged_def_se_hash)
3007 htab_traverse_noresize (unmerged_def_se_hash, see_merge_one_def_extension,
3008 (PTR) (stn->value));
3010 if (use_se_hash)
3011 /* b. Try to eliminate only use extensions, one by one. */
3012 htab_traverse_noresize (use_se_hash, see_merge_one_use_extension,
3013 (PTR) (stn->value));
3015 merged_def_se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
3017 if (dump_file)
3019 fprintf (dump_file, "The hashes of the current reference:\n");
3020 if (unmerged_def_se_hash)
3022 fprintf (dump_file, "unmerged_def_se_hash:\n");
3023 htab_traverse (unmerged_def_se_hash, see_print_one_extension, NULL);
3025 if (merged_def_se_hash)
3027 fprintf (dump_file, "merged_def_se_hash:\n");
3028 htab_traverse (merged_def_se_hash, see_print_one_extension, NULL);
3030 if (use_se_hash)
3032 fprintf (dump_file, "use_se_hash:\n");
3033 htab_traverse (use_se_hash, see_print_one_extension, NULL);
3037 /* Now that all the merges are done, update the register properties of the
3038 basic block and eliminate locally redundant extensions.
3039 It is important that we first traverse the use extensions hash and
3040 afterwards the def extensions hashes. */
3042 if (use_se_hash)
3043 htab_traverse_noresize (use_se_hash, see_set_prop_unmerged_use,
3044 (PTR) (stn->value));
3046 if (unmerged_def_se_hash)
3047 htab_traverse (unmerged_def_se_hash, see_set_prop_unmerged_def,
3048 (PTR) (stn->value));
3050 if (merged_def_se_hash)
3051 htab_traverse (merged_def_se_hash, see_set_prop_merged_def,
3052 (PTR) (stn->value));
3054 /* Continue to the next definition. */
3055 return 0;
3059 /* Phase 2 top level function.
3060 In this phase, we try to merge def extensions and use extensions with their
3061 references, and eliminate redundant extensions in the same basic block.
3062 We also gather information for the next phases. */
3064 static void
3065 see_merge_and_eliminate_extensions (void)
3067 int i = 0;
3069 if (dump_file)
3070 fprintf (dump_file,
3071 "* Phase 2: Merge and eliminate locally redundant extensions. *\n");
3073 /* Traverse over all the splay trees of the basic blocks. */
3074 for (i = 0; i < last_bb; i++)
3076 if (see_bb_splay_ar[i])
3078 if (dump_file)
3079 fprintf (dump_file, "Handling references for bb %d\n", i);
3080 /* Traverse over all the references in the basic block in forward
3081 order. */
3082 splay_tree_foreach (see_bb_splay_ar[i],
3083 see_handle_extensions_for_one_ref, NULL);
3089 /* Phase 1 implementation: Propagate extensions to uses. */
3091 /* Insert REF_INSN into the splay tree of its basic block.
3092 SE_INSN is the extension to store in the proper hash according to TYPE.
3094 Return true if everything went well.
3095 Otherwise, return false (this will cause the optimization to be aborted). */
3097 static bool
3098 see_store_reference_and_extension (rtx ref_insn, rtx se_insn,
3099 enum extension_type type)
3101 rtx *rtx_slot;
3102 int curr_bb_num;
3103 splay_tree_node stn = NULL;
3104 htab_t se_hash = NULL;
3105 struct see_ref_s *ref_s = NULL;
3107 /* Check the arguments. */
3108 gcc_assert (ref_insn && se_insn);
3109 if (!see_bb_splay_ar)
3110 return false;
3112 curr_bb_num = BLOCK_NUM (ref_insn);
3113 gcc_assert (curr_bb_num < last_bb && curr_bb_num >= 0);
3115 /* Insert the reference to the splay tree of its basic block. */
3116 if (!see_bb_splay_ar[curr_bb_num])
3117 /* The splay tree for this block doesn't exist yet, create it. */
3118 see_bb_splay_ar[curr_bb_num] = splay_tree_new (splay_tree_compare_ints,
3119 NULL, see_free_ref_s);
3120 else
3121 /* Splay tree already exists, check if the current reference is already
3122 in it. */
3124 stn = splay_tree_lookup (see_bb_splay_ar[curr_bb_num],
3125 DF_INSN_LUID (ref_insn));
3126 if (stn)
3127 switch (type)
3129 case EXPLICIT_DEF_EXTENSION:
3130 se_hash =
3131 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
3132 if (!se_hash)
3134 se_hash = htab_create (10,
3135 hash_descriptor_extension,
3136 eq_descriptor_extension,
3137 NULL);
3138 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash =
3139 se_hash;
3141 break;
3142 case IMPLICIT_DEF_EXTENSION:
3143 se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
3144 if (!se_hash)
3146 se_hash = htab_create (10,
3147 hash_descriptor_extension,
3148 eq_descriptor_extension,
3149 NULL);
3150 ((struct see_ref_s *) (stn->value))->merged_def_se_hash =
3151 se_hash;
3153 break;
3154 case USE_EXTENSION:
3155 se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
3156 if (!se_hash)
3158 se_hash = htab_create (10,
3159 hash_descriptor_extension,
3160 eq_descriptor_extension,
3161 NULL);
3162 ((struct see_ref_s *) (stn->value))->use_se_hash = se_hash;
3164 break;
3165 default:
3166 gcc_unreachable ();
3170 /* Initialize a new see_ref_s structure and insert it to the splay
3171 tree. */
3172 if (!stn)
3174 ref_s = XNEW (struct see_ref_s);
3175 ref_s->luid = DF_INSN_LUID (ref_insn);
3176 ref_s->insn = ref_insn;
3177 ref_s->merged_insn = NULL;
3179 /* Initialize the hashes. */
3180 switch (type)
3182 case EXPLICIT_DEF_EXTENSION:
3183 ref_s->unmerged_def_se_hash = htab_create (10,
3184 hash_descriptor_extension,
3185 eq_descriptor_extension,
3186 NULL);
3187 se_hash = ref_s->unmerged_def_se_hash;
3188 ref_s->merged_def_se_hash = NULL;
3189 ref_s->use_se_hash = NULL;
3190 break;
3191 case IMPLICIT_DEF_EXTENSION:
3192 ref_s->merged_def_se_hash = htab_create (10,
3193 hash_descriptor_extension,
3194 eq_descriptor_extension,
3195 NULL);
3196 se_hash = ref_s->merged_def_se_hash;
3197 ref_s->unmerged_def_se_hash = NULL;
3198 ref_s->use_se_hash = NULL;
3199 break;
3200 case USE_EXTENSION:
3201 ref_s->use_se_hash = htab_create (10,
3202 hash_descriptor_extension,
3203 eq_descriptor_extension,
3204 NULL);
3205 se_hash = ref_s->use_se_hash;
3206 ref_s->unmerged_def_se_hash = NULL;
3207 ref_s->merged_def_se_hash = NULL;
3208 break;
3209 default:
3210 gcc_unreachable ();
3214 /* Insert the new extension instruction into the correct se_hash of the
3215 current reference. */
3216 rtx_slot = (rtx *) htab_find_slot (se_hash, se_insn, INSERT);
3217 if (*rtx_slot != NULL)
3219 gcc_assert (type == USE_EXTENSION);
3220 gcc_assert (rtx_equal_p (PATTERN (*rtx_slot), PATTERN (se_insn)));
3222 else
3223 *rtx_slot = se_insn;
3225 /* If this is a new reference, insert it into the splay_tree. */
3226 if (!stn)
3227 splay_tree_insert (see_bb_splay_ar[curr_bb_num],
3228 DF_INSN_LUID (ref_insn), (splay_tree_value) ref_s);
3229 return true;
3233 /* Go over all the defs, for each relevant definition (defined below) store its
3234 instruction as a reference.
3236 A definition is relevant if its root has
3237 ((entry_type == SIGN_EXTENDED_DEF) || (entry_type == ZERO_EXTENDED_DEF)) and
3238 his source_mode is not narrower then the roots source_mode.
3240 Return the number of relevant defs or negative number if something bad had
3241 happened and the optimization should be aborted. */
3243 static int
3244 see_handle_relevant_defs (struct df_ref *ref, rtx insn)
3246 struct web_entry *root_entry = NULL;
3247 rtx se_insn = NULL;
3248 enum rtx_code extension_code;
3249 rtx reg = DF_REF_REAL_REG (ref);
3250 rtx ref_insn = NULL;
3251 unsigned int i = DF_REF_ID (ref);
3253 root_entry = unionfind_root (&def_entry[DF_REF_ID (ref)]);
3255 if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF
3256 && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF)
3257 /* The current web is not relevant. Continue to the next def. */
3258 return 0;
3260 if (root_entry->reg)
3261 /* It isn't possible to have two different register for the same
3262 web. */
3263 gcc_assert (rtx_equal_p (root_entry->reg, reg));
3264 else
3265 root_entry->reg = reg;
3267 /* The current definition is an EXTENDED_DEF or a definition that its
3268 source_mode is narrower then its web's source_mode.
3269 This means that we need to generate the implicit extension explicitly
3270 and store it in the current reference's merged_def_se_hash. */
3271 if (ENTRY_EI (&def_entry[i])->local_relevancy == EXTENDED_DEF
3272 || (ENTRY_EI (&def_entry[i])->local_source_mode <
3273 ENTRY_EI (root_entry)->source_mode))
3276 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF)
3277 extension_code = SIGN_EXTEND;
3278 else
3279 extension_code = ZERO_EXTEND;
3281 se_insn =
3282 see_gen_normalized_extension (reg, extension_code,
3283 ENTRY_EI (root_entry)->source_mode);
3285 /* This is a dummy extension, mark it as deleted. */
3286 INSN_DELETED_P (se_insn) = 1;
3288 if (!see_store_reference_and_extension (insn, se_insn,
3289 IMPLICIT_DEF_EXTENSION))
3290 /* Something bad happened. Abort the optimization. */
3291 return -1;
3292 return 1;
3295 ref_insn = PREV_INSN (insn);
3296 gcc_assert (BLOCK_NUM (ref_insn) == BLOCK_NUM (insn));
3298 if (!see_store_reference_and_extension (ref_insn, insn,
3299 EXPLICIT_DEF_EXTENSION))
3300 /* Something bad happened. Abort the optimization. */
3301 return -1;
3303 return 0;
3306 /* Go over all the uses, for each use in relevant web store its instruction as
3307 a reference and generate an extension before it.
3309 Return the number of relevant uses or negative number if something bad had
3310 happened and the optimization should be aborted. */
3312 static int
3313 see_handle_relevant_uses (struct df_ref *ref, rtx insn)
3315 struct web_entry *root_entry = NULL;
3316 rtx se_insn = NULL;
3317 enum rtx_code extension_code;
3318 rtx reg = DF_REF_REAL_REG (ref);
3320 root_entry = unionfind_root (&use_entry[DF_REF_ID (ref)]);
3322 if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF
3323 && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF)
3324 /* The current web is not relevant. Continue to the next use. */
3325 return 0;
3327 if (root_entry->reg)
3328 /* It isn't possible to have two different register for the same
3329 web. */
3330 gcc_assert (rtx_equal_p (root_entry->reg, reg));
3331 else
3332 root_entry->reg = reg;
3334 /* Generate the use extension. */
3335 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF)
3336 extension_code = SIGN_EXTEND;
3337 else
3338 extension_code = ZERO_EXTEND;
3340 se_insn =
3341 see_gen_normalized_extension (reg, extension_code,
3342 ENTRY_EI (root_entry)->source_mode);
3343 if (!se_insn)
3344 /* This is very bad, abort the transformation. */
3345 return -1;
3347 if (!see_store_reference_and_extension (insn, se_insn,
3348 USE_EXTENSION))
3349 /* Something bad happened. Abort the optimization. */
3350 return -1;
3351 return 1;
3354 static int
3355 see_handle_relevant_refs (void)
3357 int num_relevant_refs = 0;
3358 basic_block bb;
3360 FOR_ALL_BB (bb)
3362 rtx insn;
3363 FOR_BB_INSNS (bb, insn)
3365 unsigned int uid = INSN_UID (insn);
3367 if (INSN_P (insn))
3369 struct df_ref **use_rec;
3370 struct df_ref **def_rec;
3372 for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
3374 struct df_ref *use = *use_rec;
3375 int result = see_handle_relevant_uses (use, insn);
3376 if (result == -1)
3377 return -1;
3378 num_relevant_refs += result;
3380 for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++)
3382 struct df_ref *use = *use_rec;
3383 int result = see_handle_relevant_uses (use, insn);
3384 if (result == -1)
3385 return -1;
3386 num_relevant_refs += result;
3388 for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
3390 struct df_ref *def = *def_rec;
3391 int result = see_handle_relevant_defs (def, insn);
3392 if (result == -1)
3393 return -1;
3394 num_relevant_refs += result;
3399 return num_relevant_refs;
3403 /* Initialized the use_entry field for REF in INSN at INDEX with ET. */
3405 static void
3406 see_update_uses_relevancy (rtx insn, struct df_ref *ref,
3407 enum entry_type et, unsigned int index)
3409 struct see_entry_extra_info *curr_entry_extra_info;
3411 if (dump_file)
3413 rtx reg = DF_REF_REAL_REG (ref);
3414 fprintf (dump_file, "u%i insn %i reg %i ",
3415 index, (insn ? INSN_UID (insn) : -1), REGNO (reg));
3416 if (et == NOT_RELEVANT)
3417 fprintf (dump_file, "NOT RELEVANT \n");
3418 else
3419 fprintf (dump_file, "RELEVANT USE \n");
3422 DF_REF_ID (ref) = index;
3423 curr_entry_extra_info = XNEW (struct see_entry_extra_info);
3424 curr_entry_extra_info->relevancy = et;
3425 curr_entry_extra_info->local_relevancy = et;
3426 use_entry[index].extra_info = curr_entry_extra_info;
3427 use_entry[index].reg = NULL;
3428 use_entry[index].pred = NULL;
3432 /* A definition in a candidate for this optimization only if its pattern is
3433 recognized as relevant in this function.
3434 INSN is the instruction to be recognized.
3436 - If this is the pattern of a common sign extension after definition:
3437 PREV_INSN (INSN): def (reg:NARROWmode r)
3438 INSN: set ((reg:WIDEmode r')
3439 (sign_extend:WIDEmode (reg:NARROWmode r)))
3440 return SIGN_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3442 - If this is the pattern of a common zero extension after definition:
3443 PREV_INSN (INSN): def (reg:NARROWmode r)
3444 INSN: set ((reg:WIDEmode r')
3445 (zero_extend:WIDEmode (reg:NARROWmode r)))
3446 return ZERO_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3448 - Otherwise,
3450 For the pattern:
3451 INSN: set ((reg:WIDEmode r) (sign_extend:WIDEmode (...expr...)))
3452 return EXTENDED_DEF and set SOURCE_MODE to the mode of expr.
3454 For the pattern:
3455 INSN: set ((reg:WIDEmode r) (zero_extend:WIDEmode (...expr...)))
3456 return EXTENDED_DEF and set SOURCE_MODE_UNSIGNED to the mode of expr.
3458 For the pattern:
3459 INSN: set ((reg:WIDEmode r) (CONST_INT (...)))
3460 return EXTENDED_DEF and set SOURCE_MODE(_UNSIGNED) to the narrowest mode that
3461 is implicitly sign(zero) extended to WIDEmode in the INSN.
3463 - FIXME: Extensions that are not adjacent to their definition and EXTENDED_DEF
3464 that is part of a PARALLEL instruction are not handled.
3465 These restriction can be relaxed. */
3467 static enum entry_type
3468 see_analyze_one_def (rtx insn, enum machine_mode *source_mode,
3469 enum machine_mode *source_mode_unsigned)
3471 enum rtx_code extension_code;
3472 rtx rhs = NULL;
3473 rtx lhs = NULL;
3474 rtx set = NULL;
3475 rtx source_register = NULL;
3476 rtx prev_insn = NULL;
3477 rtx next_insn = NULL;
3478 enum machine_mode mode;
3479 enum machine_mode next_source_mode;
3480 HOST_WIDE_INT val = 0;
3481 HOST_WIDE_INT val2 = 0;
3482 int i = 0;
3484 *source_mode = MAX_MACHINE_MODE;
3485 *source_mode_unsigned = MAX_MACHINE_MODE;
3487 extension_code = see_get_extension_data (insn, source_mode);
3488 switch (extension_code)
3490 case SIGN_EXTEND:
3491 case ZERO_EXTEND:
3492 source_register = see_get_extension_reg (insn, 0);
3493 /* FIXME: This restriction can be relaxed. The only thing that is
3494 important is that the reference would be inside the same basic block
3495 as the extension. */
3496 prev_insn = PREV_INSN (insn);
3497 if (!prev_insn || !INSN_P (prev_insn))
3498 return NOT_RELEVANT;
3500 if (!reg_set_between_p (source_register, PREV_INSN (prev_insn), insn))
3501 return NOT_RELEVANT;
3503 /* If we can't use copy_rtx on the reference it can't be a reference. */
3504 if (GET_CODE (PATTERN (prev_insn)) == PARALLEL
3505 && asm_noperands (PATTERN (prev_insn)) >= 0)
3506 return NOT_RELEVANT;
3508 /* Now, check if this extension is a reference itself. If so, it is not
3509 relevant. Handling this extension as relevant would make things much
3510 more complicated. */
3511 next_insn = NEXT_INSN (insn);
3512 if (next_insn
3513 && INSN_P (next_insn)
3514 && (see_get_extension_data (next_insn, &next_source_mode) !=
3515 NOT_RELEVANT))
3517 rtx curr_dest_register = see_get_extension_reg (insn, 1);
3518 rtx next_source_register = see_get_extension_reg (next_insn, 0);
3520 if (REGNO (curr_dest_register) == REGNO (next_source_register))
3521 return NOT_RELEVANT;
3524 if (extension_code == SIGN_EXTEND)
3525 return SIGN_EXTENDED_DEF;
3526 else
3527 return ZERO_EXTENDED_DEF;
3529 case UNKNOWN:
3530 /* This may still be an EXTENDED_DEF. */
3532 /* FIXME: This restriction can be relaxed. It is possible to handle
3533 PARALLEL insns too. */
3534 set = single_set (insn);
3535 if (!set)
3536 return NOT_RELEVANT;
3537 rhs = SET_SRC (set);
3538 lhs = SET_DEST (set);
3540 /* Don't handle extensions to something other then register or
3541 subregister. */
3542 if (!REG_P (lhs) && GET_CODE (lhs) != SUBREG)
3543 return NOT_RELEVANT;
3545 switch (GET_CODE (rhs))
3547 case SIGN_EXTEND:
3548 *source_mode = GET_MODE (XEXP (rhs, 0));
3549 *source_mode_unsigned = MAX_MACHINE_MODE;
3550 return EXTENDED_DEF;
3551 case ZERO_EXTEND:
3552 *source_mode = MAX_MACHINE_MODE;
3553 *source_mode_unsigned = GET_MODE (XEXP (rhs, 0));
3554 return EXTENDED_DEF;
3555 case CONST_INT:
3557 val = INTVAL (rhs);
3559 /* Find the narrowest mode, val could fit into. */
3560 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT), i = 0;
3561 GET_MODE_BITSIZE (mode) < BITS_PER_WORD;
3562 mode = GET_MODE_WIDER_MODE (mode), i++)
3564 val2 = trunc_int_for_mode (val, mode);
3565 if (val2 == val && *source_mode == MAX_MACHINE_MODE)
3566 *source_mode = mode;
3567 if (val == (val & (HOST_WIDE_INT)GET_MODE_MASK (mode))
3568 && *source_mode_unsigned == MAX_MACHINE_MODE)
3569 *source_mode_unsigned = mode;
3570 if (*source_mode != MAX_MACHINE_MODE
3571 && *source_mode_unsigned !=MAX_MACHINE_MODE)
3572 return EXTENDED_DEF;
3574 if (*source_mode != MAX_MACHINE_MODE
3575 || *source_mode_unsigned !=MAX_MACHINE_MODE)
3576 return EXTENDED_DEF;
3577 return NOT_RELEVANT;
3578 default:
3579 return NOT_RELEVANT;
3581 default:
3582 gcc_unreachable ();
3587 /* Initialized the def_entry field for REF in INSN at INDEX with ET. */
3589 static void
3590 see_update_defs_relevancy (rtx insn, struct df_ref *ref,
3591 enum entry_type et,
3592 enum machine_mode source_mode,
3593 enum machine_mode source_mode_unsigned,
3594 unsigned int index)
3596 struct see_entry_extra_info *curr_entry_extra_info
3597 = XNEW (struct see_entry_extra_info);
3598 curr_entry_extra_info->relevancy = et;
3599 curr_entry_extra_info->local_relevancy = et;
3601 DF_REF_ID (ref) = index;
3603 if (et != EXTENDED_DEF)
3605 curr_entry_extra_info->source_mode = source_mode;
3606 curr_entry_extra_info->local_source_mode = source_mode;
3608 else
3610 curr_entry_extra_info->source_mode_signed = source_mode;
3611 curr_entry_extra_info->source_mode_unsigned = source_mode_unsigned;
3613 def_entry[index].extra_info = curr_entry_extra_info;
3614 def_entry[index].reg = NULL;
3615 def_entry[index].pred = NULL;
3617 if (dump_file)
3619 rtx reg = DF_REF_REAL_REG (ref);
3620 if (et == NOT_RELEVANT)
3622 fprintf (dump_file, "d%i insn %i reg %i ",
3623 index, (insn ? INSN_UID (insn) : -1), REGNO (reg));
3624 fprintf (dump_file, "NOT RELEVANT \n");
3626 else
3628 fprintf (dump_file, "d%i insn %i reg %i ",
3629 index, INSN_UID (insn), REGNO (reg));
3630 fprintf (dump_file, "RELEVANT - ");
3631 switch (et)
3633 case SIGN_EXTENDED_DEF :
3634 fprintf (dump_file, "SIGN_EXTENDED_DEF, source_mode = %s\n",
3635 GET_MODE_NAME (source_mode));
3636 break;
3637 case ZERO_EXTENDED_DEF :
3638 fprintf (dump_file, "ZERO_EXTENDED_DEF, source_mode = %s\n",
3639 GET_MODE_NAME (source_mode));
3640 break;
3641 case EXTENDED_DEF :
3642 fprintf (dump_file, "EXTENDED_DEF, ");
3643 if (source_mode != MAX_MACHINE_MODE
3644 && source_mode_unsigned != MAX_MACHINE_MODE)
3646 fprintf (dump_file, "positive const, ");
3647 fprintf (dump_file, "source_mode_signed = %s, ",
3648 GET_MODE_NAME (source_mode));
3649 fprintf (dump_file, "source_mode_unsigned = %s\n",
3650 GET_MODE_NAME (source_mode_unsigned));
3652 else if (source_mode != MAX_MACHINE_MODE)
3653 fprintf (dump_file, "source_mode_signed = %s\n",
3654 GET_MODE_NAME (source_mode));
3655 else
3656 fprintf (dump_file, "source_mode_unsigned = %s\n",
3657 GET_MODE_NAME (source_mode_unsigned));
3658 break;
3659 default :
3660 gcc_unreachable ();
3667 /* Updates the relevancy of all the uses and all defs.
3669 The information of the u'th use is stored in use_entry[u] and the
3670 information of the d'th definition is stored in def_entry[d].
3672 Currently all the uses are relevant for the optimization except for
3673 uses that are in LIBCALL or RETVAL instructions. */
3675 static void
3676 see_update_relevancy (void)
3678 unsigned int d = 0;
3679 unsigned int u = 0;
3680 enum entry_type et;
3681 enum machine_mode source_mode;
3682 enum machine_mode source_mode_unsigned;
3683 basic_block bb;
3685 if (!def_entry)
3686 return;
3688 FOR_ALL_BB (bb)
3690 struct df_ref **use_rec;
3691 struct df_ref **def_rec;
3692 rtx insn;
3693 FOR_BB_INSNS (bb, insn)
3695 unsigned int uid = INSN_UID (insn);
3696 if (INSN_P (insn))
3698 et = RELEVANT_USE;
3700 for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
3702 struct df_ref *use = *use_rec;
3703 see_update_uses_relevancy (insn, use, et, u);
3704 u++;
3707 for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++)
3709 struct df_ref *use = *use_rec;
3710 see_update_uses_relevancy (insn, use, et, u);
3711 u++;
3714 et = see_analyze_one_def (insn, &source_mode, &source_mode_unsigned);
3715 for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
3717 struct df_ref *def = *def_rec;
3718 see_update_defs_relevancy (insn, def, et, source_mode,
3719 source_mode_unsigned, d);
3720 d++;
3725 for (use_rec = df_get_artificial_uses (bb->index); *use_rec; use_rec++)
3727 struct df_ref *use = *use_rec;
3728 see_update_uses_relevancy (NULL, use, NOT_RELEVANT, u);
3729 u++;
3732 for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
3734 struct df_ref *def = *def_rec;
3735 see_update_defs_relevancy (NULL, def, NOT_RELEVANT,
3736 MAX_MACHINE_MODE, MAX_MACHINE_MODE, d);
3737 d++;
3743 /* Phase 1 top level function.
3744 In this phase the relevancy of all the definitions and uses are checked,
3745 later the webs are produces and the extensions are generated.
3746 These extensions are not emitted yet into the insns stream.
3748 returns true if at list one relevant web was found and there were no
3749 problems, otherwise return false. */
3751 static bool
3752 see_propagate_extensions_to_uses (void)
3754 int num_relevant_refs;
3755 basic_block bb;
3757 if (dump_file)
3758 fprintf (dump_file,
3759 "* Phase 1: Propagate extensions to uses. *\n");
3761 /* Update the relevancy of references using the DF object. */
3762 see_update_relevancy ();
3764 /* Produce the webs and update the extra_info of the root.
3765 In general, a web is relevant if all its definitions and uses are relevant
3766 and there is at least one definition that was marked as SIGN_EXTENDED_DEF
3767 or ZERO_EXTENDED_DEF. */
3768 FOR_ALL_BB (bb)
3770 rtx insn;
3771 struct df_ref **use_rec;
3773 FOR_BB_INSNS (bb, insn)
3775 unsigned int uid = INSN_UID (insn);
3776 if (INSN_P (insn))
3778 for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
3780 struct df_ref *use = *use_rec;
3781 union_defs (use, def_entry, use_entry, see_update_leader_extra_info);
3784 for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++)
3786 struct df_ref *use = *use_rec;
3787 union_defs (use, def_entry, use_entry, see_update_leader_extra_info);
3792 for (use_rec = df_get_artificial_uses (bb->index); *use_rec; use_rec++)
3794 struct df_ref *use = *use_rec;
3795 union_defs (use, def_entry, use_entry, see_update_leader_extra_info);
3799 /* Generate use extensions for references and insert these
3800 references to see_bb_splay_ar data structure. */
3801 num_relevant_refs = see_handle_relevant_refs ();
3803 return num_relevant_refs > 0;
3807 /* Main entry point for the sign extension elimination optimization. */
3809 static void
3810 see_main (void)
3812 bool cont = false;
3813 int i = 0;
3815 /* Initialize global data structures. */
3816 see_initialize_data_structures ();
3818 /* Phase 1: Propagate extensions to uses. */
3819 cont = see_propagate_extensions_to_uses ();
3821 if (cont)
3823 init_recog ();
3825 /* Phase 2: Merge and eliminate locally redundant extensions. */
3826 see_merge_and_eliminate_extensions ();
3828 /* Phase 3: Eliminate globally redundant extensions. */
3829 see_execute_LCM ();
3831 /* Phase 4: Commit changes to the insn stream. */
3832 see_commit_changes ();
3834 if (dump_file)
3836 /* For debug purpose only. */
3837 fprintf (dump_file, "see_pre_extension_hash:\n");
3838 htab_traverse (see_pre_extension_hash, see_print_pre_extension_expr,
3839 NULL);
3841 for (i = 0; i < last_bb; i++)
3843 if (see_bb_hash_ar[i])
3844 /* Traverse over all the references in the basic block in
3845 forward order. */
3847 fprintf (dump_file,
3848 "Searching register properties in bb %d\n", i);
3849 htab_traverse (see_bb_hash_ar[i],
3850 see_print_register_properties, NULL);
3856 /* Free global data structures. */
3857 see_free_data_structures ();
3861 static bool
3862 gate_handle_see (void)
3864 return optimize > 1 && flag_see;
3867 static unsigned int
3868 rest_of_handle_see (void)
3870 see_main ();
3871 df_clear_flags (DF_DEFER_INSN_RESCAN);
3872 df_process_deferred_rescans ();
3873 run_fast_dce ();
3874 return 0;
3877 struct rtl_opt_pass pass_see =
3880 RTL_PASS,
3881 "see", /* name */
3882 gate_handle_see, /* gate */
3883 rest_of_handle_see, /* execute */
3884 NULL, /* sub */
3885 NULL, /* next */
3886 0, /* static_pass_number */
3887 TV_SEE, /* tv_id */
3888 0, /* properties_required */
3889 0, /* properties_provided */
3890 0, /* properties_destroyed */
3891 0, /* todo_flags_start */
3892 TODO_df_verify |
3893 TODO_df_finish | TODO_verify_rtl_sharing |
3894 TODO_dump_func /* todo_flags_finish */