re PR fortran/32046 (wrong code with -O2 for gfortran.dg/interface_12.f90 & result_in...
[official-gcc.git] / gcc / see.c
blobf5cacdee93f904a60e345d99afe100447c4a6872
1 /* Sign extension elimination optimization for GNU compiler.
2 Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Leehod Baruch <leehod@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 -Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
22 Problem description:
23 --------------------
24 In order to support 32bit computations on a 64bit machine, sign
25 extension instructions are generated to ensure the correctness of
26 the computation.
27 A possible policy (as currently implemented) is to generate a sign
28 extension right after each 32bit computation.
29 Depending on the instruction set of the architecture, some of these
30 sign extension instructions may be redundant.
31 There are two cases in which the extension may be redundant:
33 Case1:
34 The instruction that uses the 64bit operands that are sign
35 extended has a dual mode that works with 32bit operands.
36 For example:
38 int32 a, b;
40 a = .... --> a = ....
41 a = sign extend a -->
42 b = .... --> b = ....
43 b = sign extend a -->
44 -->
45 cmpd a, b --> cmpw a, b //half word compare
47 Case2:
48 The instruction that defines the 64bit operand (which is later sign
49 extended) has a dual mode that defines and sign-extends simultaneously
50 a 32bit operand. For example:
52 int32 a;
54 ld a --> lwa a // load half word and sign extend
55 a = sign extend a -->
56 -->
57 return a --> return a
60 General idea for solution:
61 --------------------------
62 First, try to merge the sign extension with the instruction that
63 defines the source of the extension and (separately) with the
64 instructions that uses the extended result. By doing this, both cases
65 of redundancies (as described above) will be eliminated.
67 Then, use partial redundancy elimination to place the non redundant
68 ones at optimal placements.
71 Implementation by example:
72 --------------------------
73 Note: The instruction stream is not changed till the last phase.
75 Phase 0: Initial code, as currently generated by gcc.
77 def1 def3
78 se1 def2 se3
79 | \ | / |
80 | \ | / |
81 | \ | / |
82 | \ | / |
83 | \ | / |
84 | \|/ |
85 use1 use2 use3
86 use4
87 def1 + se1:
88 set ((reg:SI 10) (..def1rhs..))
89 set ((reg:DI 100) (sign_extend:DI (reg:SI 10)))
91 def2:
92 set ((reg:DI 100) (const_int 7))
94 def3 + se3:
95 set ((reg:SI 20) (..def3rhs..))
96 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
98 use1:
99 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
101 use2, use3, use4:
102 set ((...) (reg:DI 100))
104 Phase 1: Propagate extensions to uses.
106 def1 def3
107 se1 def2 se3
108 | \ | / |
109 | \ | / |
110 | \ | / |
111 | \ | / |
112 | \ | / |
113 | \|/ |
114 se se se
115 use1 use2 use3
117 use4
119 From here, all of the subregs are lowpart !
121 def1, def2, def3: No change.
123 use1:
124 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
125 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
127 use2, use3, use4:
128 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
129 set ((...) (reg:DI 100))
132 Phase 2: Merge and eliminate locally redundant extensions.
135 *def1 def2 *def3
136 [se removed] se se3
137 | \ | / |
138 | \ | / |
139 | \ | / |
140 | \ | / |
141 | \ | / |
142 | \|/ |
143 [se removed] se se
144 *use1 use2 use3
145 [se removed]
146 use4
148 The instructions that were changed at this phase are marked with
149 asterisk.
151 *def1: Merge failed.
152 Remove the sign extension instruction, modify def1 and
153 insert a move instruction to assure to correctness of the code.
154 set ((subreg:SI (reg:DI 100)) (..def1rhs..))
155 set ((reg:SI 10) (subreg:SI (reg:DI 100)))
157 def2 + se: There is no need for merge.
158 Def2 is not changed but a sign extension instruction is
159 created.
160 set ((reg:DI 100) (const_int 7))
161 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
163 *def3 + se3: Merge succeeded.
164 set ((reg:DI 100) (sign_extend:DI (..def3rhs..)))
165 set ((reg:SI 20) (reg:DI 100))
166 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
167 (The extension instruction is the original one).
169 *use1: Merge succeeded. Remove the sign extension instruction.
170 set ((reg:CC...)
171 (compare:CC (subreg:SI (reg:DI 100)) (...)))
173 use2, use3: Merge failed. No change.
175 use4: The extension is locally redundant, therefore it is eliminated
176 at this point.
179 Phase 3: Eliminate globally redundant extensions.
181 Following the LCM output:
183 def1 def2 def3
184 se se3
185 | \ | / |
186 | \ | / |
187 | se | / |
188 | \ | / |
189 | \ | / |
190 | \|/ |
191 [ses removed]
192 use1 use2 use3
193 use4
196 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
198 se3:
199 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
202 Phase 4: Commit changes to the insn stream.
205 def1 def3 *def1 def2 *def3
206 se1 def2 se3 [se removed] [se removed]
207 | \ | / | | \ | / |
208 | \ | / | ------> | \ | / |
209 | \ | / | ------> | se | / |
210 | \ | / | | \ | / |
211 | \ | / | | \ | / |
212 | \|/ | | \|/ |
213 use1 use2 use3 *use1 use2 use3
214 use4 use4
216 The instructions that were changed during the whole optimization are
217 marked with asterisk.
219 The result:
221 def1 + se1:
222 [ set ((reg:SI 10) (..def1rhs..)) ] - Deleted
223 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 10))) ] - Deleted
224 set ((subreg:SI (reg:DI 100)) (..def3rhs..)) - Inserted
225 set ((reg:SI 10) (subreg:SI (reg:DI 100))) - Inserted
227 def2:
228 set ((reg:DI 100) (const_int 7)) - No change
230 def3 + se3:
231 [ set ((reg:SI 20) (..def3rhs..)) ] - Deleted
232 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) ] - Deleted
233 set ((reg:DI 100) (sign_extend:DI (..def3rhs..))) - Inserted
234 set ((reg:SI 20) (reg:DI 100)) - Inserted
236 use1:
237 [ set ((reg:CC...) (compare:CC (reg:DI 100) (...))) ] - Deleted
238 set ((reg:CC...) - Inserted
239 (compare:CC (subreg:SI (reg:DI 100)) (...)))
241 use2, use3, use4:
242 set ((...) (reg:DI 100)) - No change
244 se: - Inserted
245 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
247 Note: Most of the simple move instructions that were inserted will be
248 trivially dead and therefore eliminated.
250 The implementation outline:
251 ---------------------------
252 Some definitions:
253 A web is RELEVANT if at the end of phase 1, his leader's
254 relevancy is {ZERO, SIGN}_EXTENDED_DEF. The source_mode of
255 the web is the source_mode of his leader.
256 A definition is a candidate for the optimization if it is part
257 of a RELEVANT web and his local source_mode is not narrower
258 then the source_mode of its web.
259 A use is a candidate for the optimization if it is part of a
260 RELEVANT web.
261 A simple explicit extension is a single set instruction that
262 extends a register (or a subregister) to a register (or
263 subregister).
264 A complex explicit extension is an explicit extension instruction
265 that is not simple.
266 A def extension is a simple explicit extension that is
267 also a candidate for the optimization. This extension is part
268 of the instruction stream, it is not generated by this
269 optimization.
270 A use extension is a simple explicit extension that is generated
271 and stored for candidate use during this optimization. It is
272 not emitted to the instruction stream till the last phase of
273 the optimization.
274 A reference is an instruction that satisfy at least on of these
275 criteria:
276 - It contains a definition with EXTENDED_DEF relevancy in a RELEVANT web.
277 - It is followed by a def extension.
278 - It contains a candidate use.
280 Phase 1: Propagate extensions to uses.
281 In this phase, we find candidate extensions for the optimization
282 and we generate (but not emit) proper extensions "right before the
283 uses".
285 a. Build a DF object.
286 b. Traverse over all the instructions that contains a definition
287 and set their local relevancy and local source_mode like this:
288 - If the instruction is a simple explicit extension instruction,
289 mark it as {ZERO, SIGN}_EXTENDED_DEF according to the extension
290 type and mark its source_mode to be the mode of the quantity
291 that is been extended.
292 - Otherwise, If the instruction has an implicit extension,
293 which means that its high part is an extension of its low part,
294 or if it is a complicated explicit extension, mark it as
295 EXTENDED_DEF and set its source_mode to be the narrowest
296 mode that is been extended in the instruction.
297 c. Traverse over all the instructions that contains a use and set
298 their local relevancy to RELEVANT_USE (except for few corner
299 cases).
300 d. Produce the web. During union of two entries, update the
301 relevancy and source_mode of the leader. There are two major
302 guide lines for this update:
303 - If one of the entries is NOT_RELEVANT, mark the leader
304 NOT_RELEVANT.
305 - If one is ZERO_EXTENDED_DEF and the other is SIGN_EXTENDED_DEF
306 (or vice versa) mark the leader as NOT_RELEVANT. We don't
307 handle this kind of mixed webs.
308 (For more details about this update process,
309 see see_update_leader_extra_info ()).
310 e. Generate uses extensions according to the relevancy and
311 source_mode of the webs.
313 Phase 2: Merge and eliminate locally redundant extensions.
314 In this phase, we try to merge def extensions and use
315 extensions with their references, and eliminate redundant extensions
316 in the same basic block.
318 Traverse over all the references. Do this in basic block number and
319 luid number forward order.
320 For each reference do:
321 a. Peephole optimization - try to merge it with all its
322 def extensions and use extensions in the following
323 order:
324 - Try to merge only the def extensions, one by one.
325 - Try to merge only the use extensions, one by one.
326 - Try to merge any couple of use extensions simultaneously.
327 - Try to merge any def extension with one or two uses
328 extensions simultaneously.
329 b. Handle each EXTENDED_DEF in it as if it was already merged with
330 an extension.
332 During the merge process we save the following data for each
333 register in each basic block:
334 a. The first instruction that defines the register in the basic
335 block.
336 b. The last instruction that defines the register in the basic
337 block.
338 c. The first extension of this register before the first
339 instruction that defines it in the basic block.
340 c. The first extension of this register after the last
341 instruction that defines it in the basic block.
342 This data will help us eliminate (or more precisely, not generate)
343 locally redundant extensions, and will be useful in the next stage.
345 While merging extensions with their reference there are 4 possible
346 situations:
347 a. A use extension was merged with the reference:
348 Delete the extension instruction and save the merged reference
349 for phase 4. (For details, see see_use_extension_merged ())
350 b. A use extension failed to be merged with the reference:
351 If there is already such an extension in the same basic block
352 and it is not dead at this point, delete the unmerged extension
353 (it is locally redundant), otherwise properly update the above
354 basic block data.
355 (For details, see see_merge_one_use_extension ())
356 c. A def extension was merged with the reference:
357 Mark this extension as a merged_def extension and properly
358 update the above basic block data.
359 (For details, see see_merge_one_def_extension ())
360 d. A def extension failed to be merged with the reference:
361 Replace the definition of the NARROWmode register in the
362 reference with the proper subreg of WIDEmode register and save
363 the result as a merged reference. Also, properly update the
364 the above basic block data.
365 (For details, see see_def_extension_not_merged ())
367 Phase 3: Eliminate globally redundant extensions.
368 In this phase, we set the bit vectors input of the edge based LCM
369 using the recorded data on the registers in each basic block.
370 We also save pointers for all the anticipatable and available
371 occurrences of the relevant extensions. Then we run the LCM.
373 a. Initialize the comp, antloc, kill bit vectors to zero and the
374 transp bit vector to ones.
376 b. Traverse over all the references. Do this in basic block number
377 and luid number forward order. For each reference:
378 - Go over all its use extensions. For each such extension -
379 If it is not dead from the beginning of the basic block SET
380 the antloc bit of the current extension in the current
381 basic block bits.
382 If it is not dead till the end of the basic block SET the
383 comp bit of the current extension in the current basic
384 block bits.
385 - Go over all its def extensions that were merged with
386 it. For each such extension -
387 If it is not dead till the end of the basic block SET the
388 comp bit of the current extension in the current basic
389 block bits.
390 RESET the proper transp and kill bits.
391 - Go over all its def extensions that were not merged
392 with it. For each such extension -
393 RESET the transp bit and SET the kill bit of the current
394 extension in the current basic block bits.
396 c. Run the edge based LCM.
398 Phase 4: Commit changes to the insn stream.
399 This is the only phase that actually changes the instruction stream.
400 Up to this point the optimization could be aborted at any time.
401 Here we insert extensions at their best placements and delete the
402 redundant ones according to the output of the LCM. We also replace
403 some of the instructions according to the second phase merges results.
405 a. Use the pre_delete_map (from the output of the LCM) in order to
406 delete redundant extensions. This will prevent them from been
407 emitted in the first place.
409 b. Insert extensions on edges where needed according to
410 pre_insert_map and edge_list (from the output of the LCM).
412 c. For each reference do-
413 - Emit all the uses extensions that were not deleted until now,
414 right before the reference.
415 - Delete all the merged and unmerged def extensions from
416 the instruction stream.
417 - Replace the reference with the merged one, if exist.
419 The implementation consists of four data structures:
420 - Data structure I
421 Purpose: To handle the relevancy of the uses, definitions and webs.
422 Relevant structures: web_entry (from df.h), see_entry_extra_info.
423 Details: This is a disjoint-set data structure. Most of its functions are
424 implemented in web.c. Each definition and use in the code are
425 elements. A web_entry structure is allocated for each element to
426 hold the element's relevancy and source_mode. The union rules are
427 defined in see_update_leader_extra_info ().
428 - Data structure II
429 Purpose: To store references and their extensions (uses and defs)
430 and to enable traverse over these references according to basic
431 block order.
432 Relevant structure: see_ref_s.
433 Details: This data structure consists of an array of splay trees. One splay
434 tree for each basic block. The splay tree nodes are references and
435 the keys are the luids of the references.
436 A see_ref_s structure is allocated for each reference. It holds the
437 reference itself, its def and uses extensions and later the merged
438 version of the reference.
439 Using this data structure we can traverse over all the references of
440 a basic block and their extensions in forward order.
441 - Data structure III.
442 Purpose: To store local properties of registers for each basic block.
443 This data will later help us build the LCM sbitmap_vectors
444 input.
445 Relevant structure: see_register_properties.
446 Details: This data structure consists of an array of hash tables. One hash
447 for each basic block. The hash node are a register properties
448 and the keys are the numbers of the registers.
449 A see_register_properties structure is allocated for each register
450 that we might be interested in its properties.
451 Using this data structure we can easily find the properties of a
452 register in a specific basic block. This is necessary for locally
453 redundancy elimination and for setting up the LCM input.
454 - Data structure IV.
455 Purpose: To store the extensions that are candidate for PRE and their
456 anticipatable and available occurrences.
457 Relevant structure: see_occr, see_pre_extension_expr.
458 Details: This data structure is a hash tables. Its nodes are the extensions
459 that are candidate for PRE.
460 A see_pre_extension_expr structure is allocated for each candidate
461 extension. It holds a copy of the extension and a linked list of all
462 the anticipatable and available occurrences of it.
463 We use this data structure when we read the output of the LCM. */
465 #include "config.h"
466 #include "system.h"
467 #include "coretypes.h"
468 #include "tm.h"
470 #include "obstack.h"
471 #include "rtl.h"
472 #include "output.h"
473 #include "df.h"
474 #include "insn-config.h"
475 #include "recog.h"
476 #include "expr.h"
477 #include "splay-tree.h"
478 #include "hashtab.h"
479 #include "regs.h"
480 #include "timevar.h"
481 #include "tree-pass.h"
482 #include "dce.h"
484 /* Used to classify defs and uses according to relevancy. */
485 enum entry_type {
486 NOT_RELEVANT,
487 SIGN_EXTENDED_DEF,
488 ZERO_EXTENDED_DEF,
489 EXTENDED_DEF,
490 RELEVANT_USE
493 /* Used to classify extensions in relevant webs. */
494 enum extension_type {
495 DEF_EXTENSION,
496 EXPLICIT_DEF_EXTENSION,
497 IMPLICIT_DEF_EXTENSION,
498 USE_EXTENSION
501 /* Global data structures and flags. */
503 /* This structure will be assigned for each web_entry structure (defined
504 in df.h). It is placed in the extra_info field of a web_entry and holds the
505 relevancy and source mode of the web_entry. */
507 struct see_entry_extra_info
509 /* The relevancy of the ref. */
510 enum entry_type relevancy;
511 /* The relevancy of the ref.
512 This field is updated only once - when this structure is created. */
513 enum entry_type local_relevancy;
514 /* The source register mode. */
515 enum machine_mode source_mode;
516 /* This field is used only if the relevancy is ZERO/SIGN_EXTENDED_DEF.
517 It is updated only once when this structure is created. */
518 enum machine_mode local_source_mode;
519 /* This field is used only if the relevancy is EXTENDED_DEF.
520 It holds the narrowest mode that is sign extended. */
521 enum machine_mode source_mode_signed;
522 /* This field is used only if the relevancy is EXTENDED_DEF.
523 It holds the narrowest mode that is zero extended. */
524 enum machine_mode source_mode_unsigned;
527 /* There is one such structure for every reference. It stores the reference
528 itself as well as its extensions (uses and definitions).
529 Used as the value in splay_tree see_bb_splay_ar[]. */
530 struct see_ref_s
532 /* The luid of the insn. */
533 unsigned int luid;
534 /* The insn of the ref. */
535 rtx insn;
536 /* The merged insn that was formed from the reference's insn and extensions.
537 If all merges failed, it remains NULL. */
538 rtx merged_insn;
539 /* The def extensions of the reference that were not merged with
540 it. */
541 htab_t unmerged_def_se_hash;
542 /* The def extensions of the reference that were merged with
543 it. Implicit extensions of the reference will be stored here too. */
544 htab_t merged_def_se_hash;
545 /* The uses extensions of reference. */
546 htab_t use_se_hash;
549 /* There is one such structure for every relevant extended register in a
550 specific basic block. This data will help us build the LCM sbitmap_vectors
551 input. */
552 struct see_register_properties
554 /* The register number. */
555 unsigned int regno;
556 /* The last luid of the reference that defines this register in this basic
557 block. */
558 int last_def;
559 /* The luid of the reference that has the first extension of this register
560 that appears before any definition in this basic block. */
561 int first_se_before_any_def;
562 /* The luid of the reference that has the first extension of this register
563 that appears after the last definition in this basic block. */
564 int first_se_after_last_def;
567 /* Occurrence of an expression.
568 There must be at most one available occurrence and at most one anticipatable
569 occurrence per basic block. */
570 struct see_occr
572 /* Next occurrence of this expression. */
573 struct see_occr *next;
574 /* The insn that computes the expression. */
575 rtx insn;
576 int block_num;
579 /* There is one such structure for every relevant extension expression.
580 It holds a copy of this extension instruction as well as a linked lists of
581 pointers to all the antic and avail occurrences of it. */
582 struct see_pre_extension_expr
584 /* A copy of the extension instruction. */
585 rtx se_insn;
586 /* Index in the available expression bitmaps. */
587 int bitmap_index;
588 /* List of anticipatable occurrences in basic blocks in the function.
589 An "anticipatable occurrence" is the first occurrence in the basic block,
590 the operands are not modified in the basic block prior to the occurrence
591 and the output is not used between the start of the block and the
592 occurrence. */
593 struct see_occr *antic_occr;
594 /* List of available occurrence in basic blocks in the function.
595 An "available occurrence" is the last occurrence in the basic block and
596 the operands are not modified by following statements in the basic block
597 [including this insn]. */
598 struct see_occr *avail_occr;
601 /* Helper structure for the note_uses and see_replace_src functions. */
602 struct see_replace_data
604 rtx from;
605 rtx to;
608 /* Helper structure for the note_uses and see_mentioned_reg functions. */
609 struct see_mentioned_reg_data
611 rtx reg;
612 bool mentioned;
615 /* An array of web_entries. The i'th definition in the df object is associated
616 with def_entry[i] */
617 static struct web_entry *def_entry = NULL;
618 /* An array of web_entries. The i'th use in the df object is associated with
619 use_entry[i] */
620 static struct web_entry *use_entry = NULL;
621 /* Array of splay_trees.
622 see_bb_splay_ar[i] refers to the splay tree of the i'th basic block.
623 The splay tree will hold see_ref_s structures. The key is the luid
624 of the insn. This way we can traverse over the references of each basic
625 block in forward or backward order. */
626 static splay_tree *see_bb_splay_ar = NULL;
627 /* Array of hashes.
628 see_bb_hash_ar[i] refers to the hash of the i'th basic block.
629 The hash will hold see_register_properties structure. The key is regno. */
630 static htab_t *see_bb_hash_ar = NULL;
631 /* Hash table that holds a copy of all the extensions. The key is the right
632 hand side of the se_insn field. */
633 static htab_t see_pre_extension_hash = NULL;
635 /* Local LCM properties of expressions. */
636 /* Nonzero for expressions that are transparent in the block. */
637 static sbitmap *transp = NULL;
638 /* Nonzero for expressions that are computed (available) in the block. */
639 static sbitmap *comp = NULL;
640 /* Nonzero for expressions that are locally anticipatable in the block. */
641 static sbitmap *antloc = NULL;
642 /* Nonzero for expressions that are locally killed in the block. */
643 static sbitmap *ae_kill = NULL;
644 /* Nonzero for expressions which should be inserted on a specific edge. */
645 static sbitmap *pre_insert_map = NULL;
646 /* Nonzero for expressions which should be deleted in a specific block. */
647 static sbitmap *pre_delete_map = NULL;
648 /* Contains the edge_list returned by pre_edge_lcm. */
649 static struct edge_list *edge_list = NULL;
650 /* Records the last basic block at the beginning of the optimization. */
651 static int last_bb;
652 /* Records the number of uses at the beginning of the optimization. */
653 static unsigned int uses_num;
654 /* Records the number of definitions at the beginning of the optimization. */
655 static unsigned int defs_num;
657 #define ENTRY_EI(ENTRY) ((struct see_entry_extra_info *) (ENTRY)->extra_info)
659 /* Functions implementation. */
661 /* Verifies that EXTENSION's pattern is this:
663 set (reg/subreg reg1) (sign/zero_extend:WIDEmode (reg/subreg reg2))
665 If it doesn't have the expected pattern return NULL.
666 Otherwise, if RETURN_DEST_REG is set, return reg1 else return reg2. */
668 static rtx
669 see_get_extension_reg (rtx extension, bool return_dest_reg)
671 rtx set, rhs, lhs;
672 rtx reg1 = NULL;
673 rtx reg2 = NULL;
675 /* Parallel pattern for extension not supported for the moment. */
676 if (GET_CODE (PATTERN (extension)) == PARALLEL)
677 return NULL;
679 set = single_set (extension);
680 if (!set)
681 return NULL;
682 lhs = SET_DEST (set);
683 rhs = SET_SRC (set);
685 if (REG_P (lhs))
686 reg1 = lhs;
687 else if (REG_P (SUBREG_REG (lhs)))
688 reg1 = SUBREG_REG (lhs);
689 else
690 return NULL;
692 if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND)
693 return NULL;
695 rhs = XEXP (rhs, 0);
696 if (REG_P (rhs))
697 reg2 = rhs;
698 else if (REG_P (SUBREG_REG (rhs)))
699 reg2 = SUBREG_REG (rhs);
700 else
701 return NULL;
703 if (return_dest_reg)
704 return reg1;
705 return reg2;
708 /* Verifies that EXTENSION's pattern is this:
710 set (reg/subreg reg1) (sign/zero_extend: (...expr...)
712 If it doesn't have the expected pattern return UNKNOWN.
713 Otherwise, set SOURCE_MODE to be the mode of the extended expr and return
714 the rtx code of the extension. */
716 static enum rtx_code
717 see_get_extension_data (rtx extension, enum machine_mode *source_mode)
719 rtx rhs, lhs, set;
721 if (!extension || !INSN_P (extension))
722 return UNKNOWN;
724 /* Parallel pattern for extension not supported for the moment. */
725 if (GET_CODE (PATTERN (extension)) == PARALLEL)
726 return NOT_RELEVANT;
728 set = single_set (extension);
729 if (!set)
730 return NOT_RELEVANT;
731 rhs = SET_SRC (set);
732 lhs = SET_DEST (set);
734 /* Don't handle extensions to something other then register or
735 subregister. */
736 if (!REG_P (lhs) && !SUBREG_REG (lhs))
737 return UNKNOWN;
739 if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND)
740 return UNKNOWN;
742 if (!REG_P (XEXP (rhs, 0))
743 && !(GET_CODE (XEXP (rhs, 0)) == SUBREG
744 && REG_P (SUBREG_REG (XEXP (rhs, 0)))))
745 return UNKNOWN;
747 *source_mode = GET_MODE (XEXP (rhs, 0));
749 if (GET_CODE (rhs) == SIGN_EXTEND)
750 return SIGN_EXTEND;
751 return ZERO_EXTEND;
755 /* Generate instruction with the pattern:
756 set ((reg r) (sign/zero_extend (subreg:mode (reg r))))
757 (the register r on both sides of the set is the same register).
758 And recognize it.
759 If the recognition failed, this is very bad, return NULL (This will abort
760 the entire optimization).
761 Otherwise, return the generated instruction. */
763 static rtx
764 see_gen_normalized_extension (rtx reg, enum rtx_code extension_code,
765 enum machine_mode mode)
767 rtx subreg, insn;
768 rtx extension = NULL;
770 if (!reg
771 || !REG_P (reg)
772 || (extension_code != SIGN_EXTEND && extension_code != ZERO_EXTEND))
773 return NULL;
775 subreg = gen_lowpart_SUBREG (mode, reg);
776 if (extension_code == SIGN_EXTEND)
777 extension = gen_rtx_SIGN_EXTEND (GET_MODE (reg), subreg);
778 else
779 extension = gen_rtx_ZERO_EXTEND (GET_MODE (reg), subreg);
781 start_sequence ();
782 emit_insn (gen_rtx_SET (VOIDmode, reg, extension));
783 insn = get_insns ();
784 end_sequence ();
786 if (insn_invalid_p (insn))
787 /* Recognition failed, this is very bad for this optimization.
788 Abort the optimization. */
789 return NULL;
790 return insn;
793 /* Hashes and splay_trees related functions implementation. */
795 /* Helper functions for the pre_extension hash.
796 This kind of hash will hold see_pre_extension_expr structures.
798 The key is the right hand side of the se_insn field.
799 Note that the se_insn is an expression that looks like:
801 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
802 (subreg:NARROWmode (reg:WIDEmode r2)))) */
804 /* Return TRUE if P1 has the same value in its rhs as P2.
805 Otherwise, return FALSE.
806 P1 and P2 are see_pre_extension_expr structures. */
808 static int
809 eq_descriptor_pre_extension (const void *p1, const void *p2)
811 const struct see_pre_extension_expr *extension1 = p1;
812 const struct see_pre_extension_expr *extension2 = p2;
813 rtx set1 = single_set (extension1->se_insn);
814 rtx set2 = single_set (extension2->se_insn);
815 rtx rhs1, rhs2;
817 gcc_assert (set1 && set2);
818 rhs1 = SET_SRC (set1);
819 rhs2 = SET_SRC (set2);
821 return rtx_equal_p (rhs1, rhs2);
825 /* P is a see_pre_extension_expr struct, use the RHS of the se_insn field.
826 Note that the RHS is an expression that looks like this:
827 (sign_extend:WIDEmode (subreg:NARROWmode (reg:WIDEmode r))) */
829 static hashval_t
830 hash_descriptor_pre_extension (const void *p)
832 const struct see_pre_extension_expr *extension = p;
833 rtx set = single_set (extension->se_insn);
834 rtx rhs;
836 gcc_assert (set);
837 rhs = SET_SRC (set);
839 return hash_rtx (rhs, GET_MODE (rhs), 0, NULL, 0);
843 /* Free the allocated memory of the current see_pre_extension_expr struct.
845 It frees the two linked list of the occurrences structures. */
847 static void
848 hash_del_pre_extension (void *p)
850 struct see_pre_extension_expr *extension = p;
851 struct see_occr *curr_occr = extension->antic_occr;
852 struct see_occr *next_occr = NULL;
854 /* Free the linked list of the anticipatable occurrences. */
855 while (curr_occr)
857 next_occr = curr_occr->next;
858 free (curr_occr);
859 curr_occr = next_occr;
862 /* Free the linked list of the available occurrences. */
863 curr_occr = extension->avail_occr;
864 while (curr_occr)
866 next_occr = curr_occr->next;
867 free (curr_occr);
868 curr_occr = next_occr;
871 /* Free the see_pre_extension_expr structure itself. */
872 free (extension);
876 /* Helper functions for the register_properties hash.
877 This kind of hash will hold see_register_properties structures.
879 The value of the key is the regno field of the structure. */
881 /* Return TRUE if P1 has the same value in the regno field as P2.
882 Otherwise, return FALSE.
883 Where P1 and P2 are see_register_properties structures. */
885 static int
886 eq_descriptor_properties (const void *p1, const void *p2)
888 const struct see_register_properties *curr_prop1 = p1;
889 const struct see_register_properties *curr_prop2 = p2;
891 return curr_prop1->regno == curr_prop2->regno;
895 /* P is a see_register_properties struct, use the register number in the
896 regno field. */
898 static hashval_t
899 hash_descriptor_properties (const void *p)
901 const struct see_register_properties *curr_prop = p;
902 return curr_prop->regno;
906 /* Free the allocated memory of the current see_register_properties struct. */
907 static void
908 hash_del_properties (void *p)
910 struct see_register_properties *curr_prop = p;
911 free (curr_prop);
915 /* Helper functions for an extension hash.
916 This kind of hash will hold insns that look like:
918 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
919 (subreg:NARROWmode (reg:WIDEmode r2))))
921 set ((reg:WIDEmode r1) (sign_extend:WIDEmode (reg:NARROWmode r2)))
923 The value of the key is (REGNO (reg:WIDEmode r1))
924 It is possible to search this hash in two ways:
925 1. By a register rtx. The Value that is been compared to the keys is the
926 REGNO of it.
927 2. By an insn with the above pattern. The Value that is been compared to
928 the keys is the REGNO of the reg on the lhs. */
930 /* Return TRUE if P1 has the same value as P2. Otherwise, return FALSE.
931 Where P1 is an insn and P2 is an insn or a register. */
933 static int
934 eq_descriptor_extension (const void *p1, const void *p2)
936 const rtx insn = (rtx) p1;
937 const rtx element = (rtx) p2;
938 rtx set1 = single_set (insn);
939 rtx dest_reg1;
940 rtx set2 = NULL;
941 rtx dest_reg2 = NULL;
943 gcc_assert (set1 && element && (REG_P (element) || INSN_P (element)));
945 dest_reg1 = SET_DEST (set1);
947 if (INSN_P (element))
949 set2 = single_set (element);
950 dest_reg2 = SET_DEST (set2);
952 else
953 dest_reg2 = element;
955 return REGNO (dest_reg1) == REGNO (dest_reg2);
959 /* If P is an insn, use the register number of its lhs
960 otherwise, P is a register, use its number. */
962 static hashval_t
963 hash_descriptor_extension (const void *p)
965 const rtx r = (rtx) p;
966 rtx set, lhs;
968 if (r && REG_P (r))
969 return REGNO (r);
971 gcc_assert (r && INSN_P (r));
972 set = single_set (r);
973 gcc_assert (set);
974 lhs = SET_DEST (set);
975 return REGNO (lhs);
979 /* Helper function for a see_bb_splay_ar[i] splay tree.
980 It frees all the allocated memory of a struct see_ref_s pointer.
982 VALUE is the value of a splay tree node. */
984 static void
985 see_free_ref_s (splay_tree_value value)
987 struct see_ref_s *ref_s = (struct see_ref_s *)value;
989 if (ref_s->unmerged_def_se_hash)
990 htab_delete (ref_s->unmerged_def_se_hash);
991 if (ref_s->merged_def_se_hash)
992 htab_delete (ref_s->merged_def_se_hash);
993 if (ref_s->use_se_hash)
994 htab_delete (ref_s->use_se_hash);
995 free (ref_s);
999 /* Rest of the implementation. */
1001 /* Search the extension hash for a suitable entry for EXTENSION.
1002 TYPE is the type of EXTENSION (USE_EXTENSION or DEF_EXTENSION).
1004 If TYPE is DEF_EXTENSION we need to normalize EXTENSION before searching the
1005 extension hash.
1007 If a suitable entry was found, return the slot. Otherwise, store EXTENSION
1008 in the hash and return NULL. */
1010 static struct see_pre_extension_expr *
1011 see_seek_pre_extension_expr (rtx extension, enum extension_type type)
1013 struct see_pre_extension_expr **slot_pre_exp, temp_pre_exp;
1014 rtx dest_extension_reg = see_get_extension_reg (extension, 1);
1015 enum rtx_code extension_code;
1016 enum machine_mode source_extension_mode;
1018 if (type == DEF_EXTENSION)
1020 extension_code = see_get_extension_data (extension,
1021 &source_extension_mode);
1022 gcc_assert (extension_code != UNKNOWN);
1023 extension =
1024 see_gen_normalized_extension (dest_extension_reg, extension_code,
1025 source_extension_mode);
1027 temp_pre_exp.se_insn = extension;
1028 slot_pre_exp =
1029 (struct see_pre_extension_expr **) htab_find_slot (see_pre_extension_hash,
1030 &temp_pre_exp, INSERT);
1031 if (*slot_pre_exp == NULL)
1032 /* This is the first time this extension instruction is encountered. Store
1033 it in the hash. */
1035 (*slot_pre_exp) = xmalloc (sizeof (struct see_pre_extension_expr));
1036 (*slot_pre_exp)->se_insn = extension;
1037 (*slot_pre_exp)->bitmap_index =
1038 (htab_elements (see_pre_extension_hash) - 1);
1039 (*slot_pre_exp)->antic_occr = NULL;
1040 (*slot_pre_exp)->avail_occr = NULL;
1041 return NULL;
1043 return *slot_pre_exp;
1047 /* This function defines how to update the extra_info of the web_entry.
1049 FIRST is the pointer of the extra_info of the first web_entry.
1050 SECOND is the pointer of the extra_info of the second web_entry.
1051 The first web_entry will be the predecessor (leader) of the second web_entry
1052 after the union.
1054 Return true if FIRST and SECOND points to the same web entry structure and
1055 nothing is done. Otherwise, return false. */
1057 static bool
1058 see_update_leader_extra_info (struct web_entry *first, struct web_entry *second)
1060 struct see_entry_extra_info *first_ei, *second_ei;
1062 first = unionfind_root (first);
1063 second = unionfind_root (second);
1065 if (unionfind_union (first, second))
1066 return true;
1068 first_ei = (struct see_entry_extra_info *) first->extra_info;
1069 second_ei = (struct see_entry_extra_info *) second->extra_info;
1071 gcc_assert (first_ei && second_ei);
1073 if (second_ei->relevancy == NOT_RELEVANT)
1075 first_ei->relevancy = NOT_RELEVANT;
1076 return false;
1078 switch (first_ei->relevancy)
1080 case NOT_RELEVANT:
1081 break;
1082 case RELEVANT_USE:
1083 switch (second_ei->relevancy)
1085 case RELEVANT_USE:
1086 break;
1087 case EXTENDED_DEF:
1088 first_ei->relevancy = second_ei->relevancy;
1089 first_ei->source_mode_signed = second_ei->source_mode_signed;
1090 first_ei->source_mode_unsigned = second_ei->source_mode_unsigned;
1091 break;
1092 case SIGN_EXTENDED_DEF:
1093 case ZERO_EXTENDED_DEF:
1094 first_ei->relevancy = second_ei->relevancy;
1095 first_ei->source_mode = second_ei->source_mode;
1096 break;
1097 default:
1098 gcc_unreachable ();
1100 break;
1101 case SIGN_EXTENDED_DEF:
1102 switch (second_ei->relevancy)
1104 case SIGN_EXTENDED_DEF:
1105 /* The mode of the root should be the wider one in this case. */
1106 first_ei->source_mode =
1107 (first_ei->source_mode > second_ei->source_mode) ?
1108 first_ei->source_mode : second_ei->source_mode;
1109 break;
1110 case RELEVANT_USE:
1111 break;
1112 case ZERO_EXTENDED_DEF:
1113 /* Don't mix webs with zero extend and sign extend. */
1114 first_ei->relevancy = NOT_RELEVANT;
1115 break;
1116 case EXTENDED_DEF:
1117 if (second_ei->source_mode_signed == MAX_MACHINE_MODE)
1118 first_ei->relevancy = NOT_RELEVANT;
1119 else
1120 /* The mode of the root should be the wider one in this case. */
1121 first_ei->source_mode =
1122 (first_ei->source_mode > second_ei->source_mode_signed) ?
1123 first_ei->source_mode : second_ei->source_mode_signed;
1124 break;
1125 default:
1126 gcc_unreachable ();
1128 break;
1129 /* This case is similar to the previous one, with little changes. */
1130 case ZERO_EXTENDED_DEF:
1131 switch (second_ei->relevancy)
1133 case SIGN_EXTENDED_DEF:
1134 /* Don't mix webs with zero extend and sign extend. */
1135 first_ei->relevancy = NOT_RELEVANT;
1136 break;
1137 case RELEVANT_USE:
1138 break;
1139 case ZERO_EXTENDED_DEF:
1140 /* The mode of the root should be the wider one in this case. */
1141 first_ei->source_mode =
1142 (first_ei->source_mode > second_ei->source_mode) ?
1143 first_ei->source_mode : second_ei->source_mode;
1144 break;
1145 case EXTENDED_DEF:
1146 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE)
1147 first_ei->relevancy = NOT_RELEVANT;
1148 else
1149 /* The mode of the root should be the wider one in this case. */
1150 first_ei->source_mode =
1151 (first_ei->source_mode > second_ei->source_mode_unsigned) ?
1152 first_ei->source_mode : second_ei->source_mode_unsigned;
1153 break;
1154 default:
1155 gcc_unreachable ();
1157 break;
1158 case EXTENDED_DEF:
1159 if (first_ei->source_mode_signed != MAX_MACHINE_MODE
1160 && first_ei->source_mode_unsigned != MAX_MACHINE_MODE)
1162 switch (second_ei->relevancy)
1164 case SIGN_EXTENDED_DEF:
1165 first_ei->relevancy = SIGN_EXTENDED_DEF;
1166 first_ei->source_mode =
1167 (first_ei->source_mode_signed > second_ei->source_mode) ?
1168 first_ei->source_mode_signed : second_ei->source_mode;
1169 break;
1170 case RELEVANT_USE:
1171 break;
1172 case ZERO_EXTENDED_DEF:
1173 first_ei->relevancy = ZERO_EXTENDED_DEF;
1174 first_ei->source_mode =
1175 (first_ei->source_mode_unsigned > second_ei->source_mode) ?
1176 first_ei->source_mode_unsigned : second_ei->source_mode;
1177 break;
1178 case EXTENDED_DEF:
1179 if (second_ei->source_mode_unsigned != MAX_MACHINE_MODE)
1180 first_ei->source_mode_unsigned =
1181 (first_ei->source_mode_unsigned >
1182 second_ei->source_mode_unsigned) ?
1183 first_ei->source_mode_unsigned :
1184 second_ei->source_mode_unsigned;
1185 if (second_ei->source_mode_signed != MAX_MACHINE_MODE)
1186 first_ei->source_mode_signed =
1187 (first_ei->source_mode_signed >
1188 second_ei->source_mode_signed) ?
1189 first_ei->source_mode_signed : second_ei->source_mode_signed;
1190 break;
1191 default:
1192 gcc_unreachable ();
1195 else if (first_ei->source_mode_signed == MAX_MACHINE_MODE)
1197 gcc_assert (first_ei->source_mode_unsigned != MAX_MACHINE_MODE);
1198 switch (second_ei->relevancy)
1200 case SIGN_EXTENDED_DEF:
1201 first_ei->relevancy = NOT_RELEVANT;
1202 break;
1203 case RELEVANT_USE:
1204 break;
1205 case ZERO_EXTENDED_DEF:
1206 first_ei->relevancy = ZERO_EXTENDED_DEF;
1207 first_ei->source_mode =
1208 (first_ei->source_mode_unsigned > second_ei->source_mode) ?
1209 first_ei->source_mode_unsigned : second_ei->source_mode;
1210 break;
1211 case EXTENDED_DEF:
1212 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE)
1213 first_ei->relevancy = NOT_RELEVANT;
1214 else
1215 first_ei->source_mode_unsigned =
1216 (first_ei->source_mode_unsigned >
1217 second_ei->source_mode_unsigned) ?
1218 first_ei->source_mode_unsigned :
1219 second_ei->source_mode_unsigned;
1220 break;
1221 default:
1222 gcc_unreachable ();
1225 else
1227 gcc_assert (first_ei->source_mode_unsigned == MAX_MACHINE_MODE);
1228 gcc_assert (first_ei->source_mode_signed != MAX_MACHINE_MODE);
1229 switch (second_ei->relevancy)
1231 case SIGN_EXTENDED_DEF:
1232 first_ei->relevancy = SIGN_EXTENDED_DEF;
1233 first_ei->source_mode =
1234 (first_ei->source_mode_signed > second_ei->source_mode) ?
1235 first_ei->source_mode_signed : second_ei->source_mode;
1236 break;
1237 case RELEVANT_USE:
1238 break;
1239 case ZERO_EXTENDED_DEF:
1240 first_ei->relevancy = NOT_RELEVANT;
1241 break;
1242 case EXTENDED_DEF:
1243 if (second_ei->source_mode_signed == MAX_MACHINE_MODE)
1244 first_ei->relevancy = NOT_RELEVANT;
1245 else
1246 first_ei->source_mode_signed =
1247 (first_ei->source_mode_signed >
1248 second_ei->source_mode_signed) ?
1249 first_ei->source_mode_signed : second_ei->source_mode_signed;
1250 break;
1251 default:
1252 gcc_unreachable ();
1255 break;
1256 default:
1257 /* Unknown patern type. */
1258 gcc_unreachable ();
1261 return false;
1265 /* Free global data structures. */
1267 static void
1268 see_free_data_structures (void)
1270 int i;
1271 unsigned int j;
1273 /* Free the bitmap vectors. */
1274 if (transp)
1276 sbitmap_vector_free (transp);
1277 transp = NULL;
1278 sbitmap_vector_free (comp);
1279 comp = NULL;
1280 sbitmap_vector_free (antloc);
1281 antloc = NULL;
1282 sbitmap_vector_free (ae_kill);
1283 ae_kill = NULL;
1285 if (pre_insert_map)
1287 sbitmap_vector_free (pre_insert_map);
1288 pre_insert_map = NULL;
1290 if (pre_delete_map)
1292 sbitmap_vector_free (pre_delete_map);
1293 pre_delete_map = NULL;
1295 if (edge_list)
1297 free_edge_list (edge_list);
1298 edge_list = NULL;
1301 /* Free the extension hash. */
1302 htab_delete (see_pre_extension_hash);
1304 /* Free the array of hashes. */
1305 for (i = 0; i < last_bb; i++)
1306 if (see_bb_hash_ar[i])
1307 htab_delete (see_bb_hash_ar[i]);
1308 free (see_bb_hash_ar);
1310 /* Free the array of splay trees. */
1311 for (i = 0; i < last_bb; i++)
1312 if (see_bb_splay_ar[i])
1313 splay_tree_delete (see_bb_splay_ar[i]);
1314 free (see_bb_splay_ar);
1316 /* Free the array of web entries and their extra info field. */
1317 for (j = 0; j < defs_num; j++)
1318 free (def_entry[j].extra_info);
1319 free (def_entry);
1320 for (j = 0; j < uses_num; j++)
1321 free (use_entry[j].extra_info);
1322 free (use_entry);
1326 /* Initialize global data structures and variables. */
1328 static void
1329 see_initialize_data_structures (void)
1331 unsigned int max_reg = max_reg_num ();
1332 unsigned int i;
1334 /* Build the df object. */
1335 df_set_flags (DF_EQ_NOTES);
1336 df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
1337 df_analyze ();
1338 df_set_flags (DF_DEFER_INSN_RESCAN);
1340 if (dump_file)
1341 df_dump (dump_file);
1343 /* Record the last basic block at the beginning of the optimization. */
1344 last_bb = last_basic_block;
1346 /* Record the number of uses and defs at the beginning of the optimization. */
1347 uses_num = 0;
1348 defs_num = 0;
1349 for (i = 0; i < max_reg; i++)
1351 uses_num += DF_REG_USE_COUNT (i) + DF_REG_EQ_USE_COUNT (i);
1352 defs_num += DF_REG_DEF_COUNT (i);
1355 /* Allocate web entries array for the union-find data structure. */
1356 def_entry = xcalloc (defs_num, sizeof (struct web_entry));
1357 use_entry = xcalloc (uses_num, sizeof (struct web_entry));
1359 /* Allocate an array of splay trees.
1360 One splay tree for each basic block. */
1361 see_bb_splay_ar = xcalloc (last_bb, sizeof (splay_tree));
1363 /* Allocate an array of hashes.
1364 One hash for each basic block. */
1365 see_bb_hash_ar = xcalloc (last_bb, sizeof (htab_t));
1367 /* Allocate the extension hash. It will hold the extensions that we want
1368 to PRE. */
1369 see_pre_extension_hash = htab_create (10,
1370 hash_descriptor_pre_extension,
1371 eq_descriptor_pre_extension,
1372 hash_del_pre_extension);
1376 /* Function called by note_uses to check if a register is used in a
1377 subexpressions.
1379 X is a pointer to the subexpression and DATA is a pointer to a
1380 see_mentioned_reg_data structure that contains the register to look for and
1381 a place for the result. */
1383 static void
1384 see_mentioned_reg (rtx *x, void *data)
1386 struct see_mentioned_reg_data *d
1387 = (struct see_mentioned_reg_data *) data;
1389 if (reg_mentioned_p (d->reg, *x))
1390 d->mentioned = true;
1394 /* We don't want to merge a use extension with a reference if the extended
1395 register is used only in a simple move instruction. We also don't want to
1396 merge a def extension with a reference if the source register of the
1397 extension is defined only in a simple move in the reference.
1399 REF is the reference instruction.
1400 EXTENSION is the use extension or def extension instruction.
1401 TYPE is the type of the extension (use or def).
1403 Return true if the reference is complicated enough, so we would like to merge
1404 it with the extension. Otherwise, return false. */
1406 static bool
1407 see_want_to_be_merged_with_extension (rtx ref, rtx extension,
1408 enum extension_type type)
1410 rtx pat;
1411 rtx dest_extension_reg = see_get_extension_reg (extension, 1);
1412 rtx source_extension_reg = see_get_extension_reg (extension, 0);
1413 enum rtx_code code;
1414 struct see_mentioned_reg_data d;
1415 int i;
1417 pat = PATTERN (ref);
1418 code = GET_CODE (pat);
1420 if (code == PARALLEL)
1422 for (i = 0; i < XVECLEN (pat, 0); i++)
1424 rtx sub = XVECEXP (pat, 0, i);
1426 if (GET_CODE (sub) == SET
1427 && (REG_P (SET_DEST (sub))
1428 || (GET_CODE (SET_DEST (sub)) == SUBREG
1429 && REG_P (SUBREG_REG (SET_DEST (sub)))))
1430 && (REG_P (SET_SRC (sub))
1431 || (GET_CODE (SET_SRC (sub)) == SUBREG
1432 && REG_P (SUBREG_REG (SET_SRC (sub))))))
1434 /* This is a simple move SET. */
1435 if (type == DEF_EXTENSION
1436 && reg_mentioned_p (source_extension_reg, SET_DEST (sub)))
1437 return false;
1439 else
1441 /* This is not a simple move SET.
1442 Check if it uses the source of the extension. */
1443 if (type == USE_EXTENSION)
1445 d.reg = dest_extension_reg;
1446 d.mentioned = false;
1447 note_uses (&sub, see_mentioned_reg, &d);
1448 if (d.mentioned)
1449 return true;
1453 if (type == USE_EXTENSION)
1454 return false;
1456 else
1458 if (code == SET
1459 && (REG_P (SET_DEST (pat))
1460 || (GET_CODE (SET_DEST (pat)) == SUBREG
1461 && REG_P (SUBREG_REG (SET_DEST (pat)))))
1462 && (REG_P (SET_SRC (pat))
1463 || (GET_CODE (SET_SRC (pat)) == SUBREG
1464 && REG_P (SUBREG_REG (SET_SRC (pat))))))
1465 /* This is a simple move SET. */
1466 return false;
1469 return true;
1473 /* Print the register number of the current see_register_properties
1474 structure.
1476 This is a subroutine of see_main called via htab_traverse.
1477 SLOT contains the current see_register_properties structure pointer. */
1479 static int
1480 see_print_register_properties (void **slot, void *b ATTRIBUTE_UNUSED)
1482 struct see_register_properties *prop = *slot;
1484 gcc_assert (prop);
1485 fprintf (dump_file, "Property found for register %d\n", prop->regno);
1486 return 1;
1490 /* Print the extension instruction of the current see_register_properties
1491 structure.
1493 This is a subroutine of see_main called via htab_traverse.
1494 SLOT contains the current see_pre_extension_expr structure pointer. */
1496 static int
1497 see_print_pre_extension_expr (void **slot, void *b ATTRIBUTE_UNUSED)
1499 struct see_pre_extension_expr *pre_extension = *slot;
1501 gcc_assert (pre_extension
1502 && pre_extension->se_insn
1503 && INSN_P (pre_extension->se_insn));
1505 fprintf (dump_file, "Index %d for:\n", pre_extension->bitmap_index);
1506 print_rtl_single (dump_file, pre_extension->se_insn);
1508 return 1;
1512 /* Phase 4 implementation: Commit changes to the insn stream. */
1514 /* Delete the merged def extension.
1516 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1518 SLOT contains the current def extension instruction.
1519 B is the see_ref_s structure pointer. */
1521 static int
1522 see_delete_merged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1524 rtx def_se = *slot;
1526 if (dump_file)
1528 fprintf (dump_file, "Deleting merged def extension:\n");
1529 print_rtl_single (dump_file, def_se);
1532 if (INSN_DELETED_P (def_se))
1533 /* This def extension is an implicit one. No need to delete it since
1534 it is not in the insn stream. */
1535 return 1;
1537 delete_insn (def_se);
1538 return 1;
1542 /* Delete the unmerged def extension.
1544 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1546 SLOT contains the current def extension instruction.
1547 B is the see_ref_s structure pointer. */
1549 static int
1550 see_delete_unmerged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1552 rtx def_se = *slot;
1554 if (dump_file)
1556 fprintf (dump_file, "Deleting unmerged def extension:\n");
1557 print_rtl_single (dump_file, def_se);
1560 delete_insn (def_se);
1561 return 1;
1565 /* Emit the non-redundant use extension to the instruction stream.
1567 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1569 SLOT contains the current use extension instruction.
1570 B is the see_ref_s structure pointer. */
1572 static int
1573 see_emit_use_extension (void **slot, void *b)
1575 rtx use_se = *slot;
1576 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1578 if (INSN_DELETED_P (use_se))
1579 /* This use extension was previously removed according to the lcm
1580 output. */
1581 return 1;
1583 if (dump_file)
1585 fprintf (dump_file, "Inserting use extension:\n");
1586 print_rtl_single (dump_file, use_se);
1589 add_insn_before (use_se, curr_ref_s->insn, NULL);
1591 return 1;
1595 /* For each relevant reference:
1596 a. Emit the non-redundant use extensions.
1597 b. Delete the def extensions.
1598 c. Replace the original reference with the merged one (if exists) and add the
1599 move instructions that were generated.
1601 This is a subroutine of see_commit_changes called via splay_tree_foreach.
1603 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
1604 see_ref_s structure. */
1606 static int
1607 see_commit_ref_changes (splay_tree_node stn,
1608 void *data ATTRIBUTE_UNUSED)
1610 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
1611 htab_t unmerged_def_se_hash =
1612 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
1613 htab_t merged_def_se_hash =
1614 ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
1615 rtx ref = ((struct see_ref_s *) (stn->value))->insn;
1616 rtx merged_ref = ((struct see_ref_s *) (stn->value))->merged_insn;
1618 /* Emit the non-redundant use extensions. */
1619 if (use_se_hash)
1620 htab_traverse_noresize (use_se_hash, see_emit_use_extension,
1621 (PTR) (stn->value));
1623 /* Delete the def extensions. */
1624 if (unmerged_def_se_hash)
1625 htab_traverse (unmerged_def_se_hash, see_delete_unmerged_def_extension,
1626 (PTR) (stn->value));
1628 if (merged_def_se_hash)
1629 htab_traverse (merged_def_se_hash, see_delete_merged_def_extension,
1630 (PTR) (stn->value));
1632 /* Replace the original reference with the merged one (if exists) and add the
1633 move instructions that were generated. */
1634 if (merged_ref && !INSN_DELETED_P (ref))
1636 if (dump_file)
1638 fprintf (dump_file, "Replacing orig reference:\n");
1639 print_rtl_single (dump_file, ref);
1640 fprintf (dump_file, "With merged reference:\n");
1641 print_rtl_single (dump_file, merged_ref);
1643 emit_insn_after (merged_ref, ref);
1644 delete_insn (ref);
1647 /* Continue to the next reference. */
1648 return 0;
1652 /* Insert partially redundant expressions on edges to make the expressions fully
1653 redundant.
1655 INDEX_MAP is a mapping of an index to an expression.
1656 Return true if an instruction was inserted on an edge.
1657 Otherwise, return false. */
1659 static bool
1660 see_pre_insert_extensions (struct see_pre_extension_expr **index_map)
1662 int num_edges = NUM_EDGES (edge_list);
1663 int set_size = pre_insert_map[0]->size;
1664 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
1666 int did_insert = 0;
1667 int e;
1668 int i;
1669 int j;
1671 for (e = 0; e < num_edges; e++)
1673 int indx;
1674 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
1676 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
1678 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
1680 for (j = indx; insert && j < (int) pre_extension_num;
1681 j++, insert >>= 1)
1682 if (insert & 1)
1684 struct see_pre_extension_expr *expr = index_map[j];
1685 int idx = expr->bitmap_index;
1686 rtx se_insn = NULL;
1687 edge eg = INDEX_EDGE (edge_list, e);
1689 start_sequence ();
1690 emit_insn (PATTERN (expr->se_insn));
1691 se_insn = get_insns ();
1692 end_sequence ();
1694 if (eg->flags & EDGE_ABNORMAL)
1696 rtx new_insn = NULL;
1698 new_insn = insert_insn_end_bb_new (se_insn, bb);
1699 gcc_assert (new_insn && INSN_P (new_insn));
1701 if (dump_file)
1703 fprintf (dump_file,
1704 "PRE: end of bb %d, insn %d, ",
1705 bb->index, INSN_UID (new_insn));
1706 fprintf (dump_file,
1707 "inserting expression %d\n", idx);
1710 else
1712 insert_insn_on_edge (se_insn, eg);
1714 if (dump_file)
1716 fprintf (dump_file, "PRE: edge (%d,%d), ",
1717 bb->index,
1718 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
1719 fprintf (dump_file, "inserting expression %d\n", idx);
1722 did_insert = true;
1726 return did_insert;
1730 /* Since all the redundant extensions must be anticipatable, they must be a use
1731 extensions. Mark them as deleted. This will prevent them from been emitted
1732 in the first place.
1734 This is a subroutine of see_commit_changes called via htab_traverse.
1736 SLOT contains the current see_pre_extension_expr structure pointer. */
1738 static int
1739 see_pre_delete_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1741 struct see_pre_extension_expr *expr = *slot;
1742 struct see_occr *occr;
1743 int indx = expr->bitmap_index;
1745 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1747 if (TEST_BIT (pre_delete_map[occr->block_num], indx))
1749 /* Mark as deleted. */
1750 INSN_DELETED_P (occr->insn) = 1;
1751 if (dump_file)
1753 fprintf (dump_file,"Redundant extension deleted:\n");
1754 print_rtl_single (dump_file, occr->insn);
1758 return 1;
1762 /* Create the index_map mapping of an index to an expression.
1764 This is a subroutine of see_commit_changes called via htab_traverse.
1766 SLOT contains the current see_pre_extension_expr structure pointer.
1767 B a pointer to see_pre_extension_expr structure pointer. */
1769 static int
1770 see_map_extension (void **slot, void *b)
1772 struct see_pre_extension_expr *expr = *slot;
1773 struct see_pre_extension_expr **index_map =
1774 (struct see_pre_extension_expr **) b;
1776 index_map[expr->bitmap_index] = expr;
1778 return 1;
1782 /* Phase 4 top level function.
1783 In this phase we finally change the instruction stream.
1784 Here we insert extensions at their best placements and delete the
1785 redundant ones according to the output of the LCM. We also replace
1786 some of the instructions according to phase 2 merges results. */
1788 static void
1789 see_commit_changes (void)
1791 struct see_pre_extension_expr **index_map;
1792 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
1793 bool did_insert = false;
1794 int i;
1796 index_map = xcalloc (pre_extension_num,
1797 sizeof (struct see_pre_extension_expr *));
1799 if (dump_file)
1800 fprintf (dump_file,
1801 "* Phase 4: Commit changes to the insn stream. *\n");
1803 /* Produce a mapping of all the pre_extensions. */
1804 htab_traverse (see_pre_extension_hash, see_map_extension, (PTR) index_map);
1806 /* Delete redundant extension. This will prevent them from been emitted in
1807 the first place. */
1808 htab_traverse (see_pre_extension_hash, see_pre_delete_extension, NULL);
1810 /* Insert extensions on edges, according to the LCM result. */
1811 did_insert = see_pre_insert_extensions (index_map);
1813 if (did_insert)
1814 commit_edge_insertions ();
1816 /* Commit the rest of the changes. */
1817 for (i = 0; i < last_bb; i++)
1819 if (see_bb_splay_ar[i])
1821 /* Traverse over all the references in the basic block in forward
1822 order. */
1823 splay_tree_foreach (see_bb_splay_ar[i],
1824 see_commit_ref_changes, NULL);
1828 free (index_map);
1832 /* Phase 3 implementation: Eliminate globally redundant extensions. */
1834 /* Analyze the properties of a merged def extension for the LCM and record avail
1835 occurrences.
1837 This is a subroutine of see_analyze_ref_local_prop called
1838 via htab_traverse.
1840 SLOT contains the current def extension instruction.
1841 B is the see_ref_s structure pointer. */
1843 static int
1844 see_analyze_merged_def_local_prop (void **slot, void *b)
1846 rtx def_se = *slot;
1847 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1848 rtx ref = curr_ref_s->insn;
1849 struct see_pre_extension_expr *extension_expr;
1850 int indx;
1851 int bb_num = BLOCK_NUM (ref);
1852 htab_t curr_bb_hash;
1853 struct see_register_properties *curr_prop, **slot_prop;
1854 struct see_register_properties temp_prop;
1855 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
1856 struct see_occr *curr_occr = NULL;
1857 struct see_occr *tmp_occr = NULL;
1859 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
1860 /* The extension_expr must be found. */
1861 gcc_assert (extension_expr);
1863 curr_bb_hash = see_bb_hash_ar[bb_num];
1864 gcc_assert (curr_bb_hash);
1865 temp_prop.regno = REGNO (dest_extension_reg);
1866 slot_prop =
1867 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1868 &temp_prop, INSERT);
1869 curr_prop = *slot_prop;
1870 gcc_assert (curr_prop);
1872 indx = extension_expr->bitmap_index;
1874 /* Reset the transparency bit. */
1875 RESET_BIT (transp[bb_num], indx);
1876 /* Reset the killed bit. */
1877 RESET_BIT (ae_kill[bb_num], indx);
1879 if (curr_prop->first_se_after_last_def == DF_INSN_LUID (ref))
1881 /* Set the available bit. */
1882 SET_BIT (comp[bb_num], indx);
1883 /* Record the available occurrence. */
1884 curr_occr = xmalloc (sizeof (struct see_occr));
1885 curr_occr->next = NULL;
1886 curr_occr->insn = def_se;
1887 curr_occr->block_num = bb_num;
1888 tmp_occr = extension_expr->avail_occr;
1889 if (!tmp_occr)
1890 extension_expr->avail_occr = curr_occr;
1891 else
1893 while (tmp_occr->next)
1894 tmp_occr = tmp_occr->next;
1895 tmp_occr->next = curr_occr;
1899 return 1;
1903 /* Analyze the properties of a unmerged def extension for the LCM.
1905 This is a subroutine of see_analyze_ref_local_prop called
1906 via htab_traverse.
1908 SLOT contains the current def extension instruction.
1909 B is the see_ref_s structure pointer. */
1911 static int
1912 see_analyze_unmerged_def_local_prop (void **slot, void *b)
1914 rtx def_se = *slot;
1915 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1916 rtx ref = curr_ref_s->insn;
1917 struct see_pre_extension_expr *extension_expr;
1918 int indx;
1919 int bb_num = BLOCK_NUM (ref);
1920 htab_t curr_bb_hash;
1921 struct see_register_properties *curr_prop, **slot_prop;
1922 struct see_register_properties temp_prop;
1923 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
1925 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
1926 /* The extension_expr must be found. */
1927 gcc_assert (extension_expr);
1929 curr_bb_hash = see_bb_hash_ar[bb_num];
1930 gcc_assert (curr_bb_hash);
1931 temp_prop.regno = REGNO (dest_extension_reg);
1932 slot_prop =
1933 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1934 &temp_prop, INSERT);
1935 curr_prop = *slot_prop;
1936 gcc_assert (curr_prop);
1938 indx = extension_expr->bitmap_index;
1940 /* Reset the transparency bit. */
1941 RESET_BIT (transp[bb_num], indx);
1942 /* Set the killed bit. */
1943 SET_BIT (ae_kill[bb_num], indx);
1945 return 1;
1949 /* Analyze the properties of a use extension for the LCM and record anic and
1950 avail occurrences.
1952 This is a subroutine of see_analyze_ref_local_prop called
1953 via htab_traverse.
1955 SLOT contains the current use extension instruction.
1956 B is the see_ref_s structure pointer. */
1958 static int
1959 see_analyze_use_local_prop (void **slot, void *b)
1961 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1962 rtx use_se = *slot;
1963 rtx ref = curr_ref_s->insn;
1964 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
1965 struct see_pre_extension_expr *extension_expr;
1966 struct see_register_properties *curr_prop, **slot_prop;
1967 struct see_register_properties temp_prop;
1968 struct see_occr *curr_occr = NULL;
1969 struct see_occr *tmp_occr = NULL;
1970 htab_t curr_bb_hash;
1971 int indx;
1972 int bb_num = BLOCK_NUM (ref);
1974 extension_expr = see_seek_pre_extension_expr (use_se, USE_EXTENSION);
1975 /* The extension_expr must be found. */
1976 gcc_assert (extension_expr);
1978 curr_bb_hash = see_bb_hash_ar[bb_num];
1979 gcc_assert (curr_bb_hash);
1980 temp_prop.regno = REGNO (dest_extension_reg);
1981 slot_prop =
1982 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1983 &temp_prop, INSERT);
1984 curr_prop = *slot_prop;
1985 gcc_assert (curr_prop);
1987 indx = extension_expr->bitmap_index;
1989 if (curr_prop->first_se_before_any_def == DF_INSN_LUID (ref))
1991 /* Set the anticipatable bit. */
1992 SET_BIT (antloc[bb_num], indx);
1993 /* Record the anticipatable occurrence. */
1994 curr_occr = xmalloc (sizeof (struct see_occr));
1995 curr_occr->next = NULL;
1996 curr_occr->insn = use_se;
1997 curr_occr->block_num = bb_num;
1998 tmp_occr = extension_expr->antic_occr;
1999 if (!tmp_occr)
2000 extension_expr->antic_occr = curr_occr;
2001 else
2003 while (tmp_occr->next)
2004 tmp_occr = tmp_occr->next;
2005 tmp_occr->next = curr_occr;
2007 if (curr_prop->last_def < 0)
2009 /* Set the available bit. */
2010 SET_BIT (comp[bb_num], indx);
2011 /* Record the available occurrence. */
2012 curr_occr = xmalloc (sizeof (struct see_occr));
2013 curr_occr->next = NULL;
2014 curr_occr->insn = use_se;
2015 curr_occr->block_num = bb_num;
2016 tmp_occr = extension_expr->avail_occr;
2017 if (!tmp_occr)
2018 extension_expr->avail_occr = curr_occr;
2019 else
2021 while (tmp_occr->next)
2022 tmp_occr = tmp_occr->next;
2023 tmp_occr->next = curr_occr;
2026 /* Note: there is no need to reset the killed bit since it must be zero at
2027 this point. */
2029 else if (curr_prop->first_se_after_last_def == DF_INSN_LUID (ref))
2031 /* Set the available bit. */
2032 SET_BIT (comp[bb_num], indx);
2033 /* Reset the killed bit. */
2034 RESET_BIT (ae_kill[bb_num], indx);
2035 /* Record the available occurrence. */
2036 curr_occr = xmalloc (sizeof (struct see_occr));
2037 curr_occr->next = NULL;
2038 curr_occr->insn = use_se;
2039 curr_occr->block_num = bb_num;
2040 tmp_occr = extension_expr->avail_occr;
2041 if (!tmp_occr)
2042 extension_expr->avail_occr = curr_occr;
2043 else
2045 while (tmp_occr->next)
2046 tmp_occr = tmp_occr->next;
2047 tmp_occr->next = curr_occr;
2050 return 1;
2054 /* Here we traverse over all the merged and unmerged extensions of the reference
2055 and analyze their properties for the LCM.
2057 This is a subroutine of see_execute_LCM called via splay_tree_foreach.
2059 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2060 see_ref_s structure. */
2062 static int
2063 see_analyze_ref_local_prop (splay_tree_node stn,
2064 void *data ATTRIBUTE_UNUSED)
2066 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
2067 htab_t unmerged_def_se_hash =
2068 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
2069 htab_t merged_def_se_hash =
2070 ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
2072 /* Analyze use extensions that were not merged with the reference. */
2073 if (use_se_hash)
2074 htab_traverse_noresize (use_se_hash, see_analyze_use_local_prop,
2075 (PTR) (stn->value));
2077 /* Analyze def extensions that were not merged with the reference. */
2078 if (unmerged_def_se_hash)
2079 htab_traverse (unmerged_def_se_hash, see_analyze_unmerged_def_local_prop,
2080 (PTR) (stn->value));
2082 /* Analyze def extensions that were merged with the reference. */
2083 if (merged_def_se_hash)
2084 htab_traverse (merged_def_se_hash, see_analyze_merged_def_local_prop,
2085 (PTR) (stn->value));
2087 /* Continue to the next definition. */
2088 return 0;
2092 /* Phase 3 top level function.
2093 In this phase, we set the input bit vectors of the LCM according to data
2094 gathered in phase 2.
2095 Then we run the edge based LCM. */
2097 static void
2098 see_execute_LCM (void)
2100 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
2101 int i = 0;
2103 if (dump_file)
2104 fprintf (dump_file,
2105 "* Phase 3: Eliminate globally redundant extensions. *\n");
2107 /* Initialize the global sbitmap vectors. */
2108 transp = sbitmap_vector_alloc (last_bb, pre_extension_num);
2109 comp = sbitmap_vector_alloc (last_bb, pre_extension_num);
2110 antloc = sbitmap_vector_alloc (last_bb, pre_extension_num);
2111 ae_kill = sbitmap_vector_alloc (last_bb, pre_extension_num);
2112 sbitmap_vector_ones (transp, last_bb);
2113 sbitmap_vector_zero (comp, last_bb);
2114 sbitmap_vector_zero (antloc, last_bb);
2115 sbitmap_vector_zero (ae_kill, last_bb);
2117 /* Traverse over all the splay trees of the basic blocks. */
2118 for (i = 0; i < last_bb; i++)
2120 if (see_bb_splay_ar[i])
2122 /* Traverse over all the references in the basic block in forward
2123 order. */
2124 splay_tree_foreach (see_bb_splay_ar[i],
2125 see_analyze_ref_local_prop, NULL);
2129 /* Add fake exit edges before running the lcm. */
2130 add_noreturn_fake_exit_edges ();
2132 /* Run the LCM. */
2133 edge_list = pre_edge_lcm (pre_extension_num, transp, comp, antloc,
2134 ae_kill, &pre_insert_map, &pre_delete_map);
2136 /* Remove the fake edges. */
2137 remove_fake_exit_edges ();
2141 /* Phase 2 implementation: Merge and eliminate locally redundant extensions. */
2143 /* In this function we set the register properties for the register that is
2144 defined and extended in the reference.
2145 The properties are defined in see_register_properties structure which is
2146 allocated per basic block and per register.
2147 Later the extension is inserted into the see_pre_extension_hash for the next
2148 phase of the optimization.
2150 This is a subroutine of see_handle_extensions_for_one_ref called
2151 via htab_traverse.
2153 SLOT contains the current def extension instruction.
2154 B is the see_ref_s structure pointer. */
2156 static int
2157 see_set_prop_merged_def (void **slot, void *b)
2159 rtx def_se = *slot;
2160 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2161 rtx insn = curr_ref_s->insn;
2162 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2163 htab_t curr_bb_hash;
2164 struct see_register_properties *curr_prop = NULL;
2165 struct see_register_properties **slot_prop;
2166 struct see_register_properties temp_prop;
2167 int ref_luid = DF_INSN_LUID (insn);
2169 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2170 if (!curr_bb_hash)
2172 /* The hash doesn't exist yet. Create it. */
2173 curr_bb_hash = htab_create (10,
2174 hash_descriptor_properties,
2175 eq_descriptor_properties,
2176 hash_del_properties);
2177 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2180 /* Find the right register properties in the right basic block. */
2181 temp_prop.regno = REGNO (dest_extension_reg);
2182 slot_prop =
2183 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2184 &temp_prop, INSERT);
2186 if (slot_prop && *slot_prop != NULL)
2188 /* Property already exists. */
2189 curr_prop = *slot_prop;
2190 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2192 curr_prop->last_def = ref_luid;
2193 curr_prop->first_se_after_last_def = ref_luid;
2195 else
2197 /* Property doesn't exist yet. */
2198 curr_prop = xmalloc (sizeof (struct see_register_properties));
2199 curr_prop->regno = REGNO (dest_extension_reg);
2200 curr_prop->last_def = ref_luid;
2201 curr_prop->first_se_before_any_def = -1;
2202 curr_prop->first_se_after_last_def = ref_luid;
2203 *slot_prop = curr_prop;
2206 /* Insert the def_se into see_pre_extension_hash if it isn't already
2207 there. */
2208 see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
2210 return 1;
2214 /* In this function we set the register properties for the register that is
2215 defined but not extended in the reference.
2216 The properties are defined in see_register_properties structure which is
2217 allocated per basic block and per register.
2218 Later the extension is inserted into the see_pre_extension_hash for the next
2219 phase of the optimization.
2221 This is a subroutine of see_handle_extensions_for_one_ref called
2222 via htab_traverse.
2224 SLOT contains the current def extension instruction.
2225 B is the see_ref_s structure pointer. */
2227 static int
2228 see_set_prop_unmerged_def (void **slot, void *b)
2230 rtx def_se = *slot;
2231 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2232 rtx insn = curr_ref_s->insn;
2233 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2234 htab_t curr_bb_hash;
2235 struct see_register_properties *curr_prop = NULL;
2236 struct see_register_properties **slot_prop;
2237 struct see_register_properties temp_prop;
2238 int ref_luid = DF_INSN_LUID (insn);
2240 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2241 if (!curr_bb_hash)
2243 /* The hash doesn't exist yet. Create it. */
2244 curr_bb_hash = htab_create (10,
2245 hash_descriptor_properties,
2246 eq_descriptor_properties,
2247 hash_del_properties);
2248 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2251 /* Find the right register properties in the right basic block. */
2252 temp_prop.regno = REGNO (dest_extension_reg);
2253 slot_prop =
2254 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2255 &temp_prop, INSERT);
2257 if (slot_prop && *slot_prop != NULL)
2259 /* Property already exists. */
2260 curr_prop = *slot_prop;
2261 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2263 curr_prop->last_def = ref_luid;
2264 curr_prop->first_se_after_last_def = -1;
2266 else
2268 /* Property doesn't exist yet. */
2269 curr_prop = xmalloc (sizeof (struct see_register_properties));
2270 curr_prop->regno = REGNO (dest_extension_reg);
2271 curr_prop->last_def = ref_luid;
2272 curr_prop->first_se_before_any_def = -1;
2273 curr_prop->first_se_after_last_def = -1;
2274 *slot_prop = curr_prop;
2277 /* Insert the def_se into see_pre_extension_hash if it isn't already
2278 there. */
2279 see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
2281 return 1;
2285 /* In this function we set the register properties for the register that is used
2286 in the reference.
2287 The properties are defined in see_register_properties structure which is
2288 allocated per basic block and per register.
2289 When a redundant use extension is found it is removed from the hash of the
2290 reference.
2291 If the extension is non redundant it is inserted into the
2292 see_pre_extension_hash for the next phase of the optimization.
2294 This is a subroutine of see_handle_extensions_for_one_ref called
2295 via htab_traverse.
2297 SLOT contains the current use extension instruction.
2298 B is the see_ref_s structure pointer. */
2300 static int
2301 see_set_prop_unmerged_use (void **slot, void *b)
2303 rtx use_se = *slot;
2304 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2305 rtx insn = curr_ref_s->insn;
2306 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
2307 htab_t curr_bb_hash;
2308 struct see_register_properties *curr_prop = NULL;
2309 struct see_register_properties **slot_prop;
2310 struct see_register_properties temp_prop;
2311 bool locally_redundant = false;
2312 int ref_luid = DF_INSN_LUID (insn);
2314 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2315 if (!curr_bb_hash)
2317 /* The hash doesn't exist yet. Create it. */
2318 curr_bb_hash = htab_create (10,
2319 hash_descriptor_properties,
2320 eq_descriptor_properties,
2321 hash_del_properties);
2322 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2325 /* Find the right register properties in the right basic block. */
2326 temp_prop.regno = REGNO (dest_extension_reg);
2327 slot_prop =
2328 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2329 &temp_prop, INSERT);
2331 if (slot_prop && *slot_prop != NULL)
2333 /* Property already exists. */
2334 curr_prop = *slot_prop;
2335 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2338 if (curr_prop->last_def < 0 && curr_prop->first_se_before_any_def < 0)
2339 curr_prop->first_se_before_any_def = ref_luid;
2340 else if (curr_prop->last_def < 0
2341 && curr_prop->first_se_before_any_def >= 0)
2343 /* In this case the extension is locally redundant. */
2344 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2345 locally_redundant = true;
2347 else if (curr_prop->last_def >= 0
2348 && curr_prop->first_se_after_last_def < 0)
2349 curr_prop->first_se_after_last_def = ref_luid;
2350 else if (curr_prop->last_def >= 0
2351 && curr_prop->first_se_after_last_def >= 0)
2353 /* In this case the extension is locally redundant. */
2354 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2355 locally_redundant = true;
2357 else
2358 gcc_unreachable ();
2360 else
2362 /* Property doesn't exist yet. Create a new one. */
2363 curr_prop = xmalloc (sizeof (struct see_register_properties));
2364 curr_prop->regno = REGNO (dest_extension_reg);
2365 curr_prop->last_def = -1;
2366 curr_prop->first_se_before_any_def = ref_luid;
2367 curr_prop->first_se_after_last_def = -1;
2368 *slot_prop = curr_prop;
2371 /* Insert the use_se into see_pre_extension_hash if it isn't already
2372 there. */
2373 if (!locally_redundant)
2374 see_seek_pre_extension_expr (use_se, USE_EXTENSION);
2375 if (locally_redundant && dump_file)
2377 fprintf (dump_file, "Locally redundant extension:\n");
2378 print_rtl_single (dump_file, use_se);
2380 return 1;
2384 /* Print an extension instruction.
2386 This is a subroutine of see_handle_extensions_for_one_ref called
2387 via htab_traverse.
2388 SLOT contains the extension instruction. */
2390 static int
2391 see_print_one_extension (void **slot, void *b ATTRIBUTE_UNUSED)
2393 rtx def_se = *slot;
2395 gcc_assert (def_se && INSN_P (def_se));
2396 print_rtl_single (dump_file, def_se);
2398 return 1;
2401 /* Function called by note_uses to replace used subexpressions.
2403 X is a pointer to the subexpression and DATA is a pointer to a
2404 see_replace_data structure that contains the data for the replacement. */
2406 static void
2407 see_replace_src (rtx *x, void *data)
2409 struct see_replace_data *d
2410 = (struct see_replace_data *) data;
2412 *x = replace_rtx (*x, d->from, d->to);
2416 /* At this point the pattern is expected to be:
2418 ref: set (dest_reg) (rhs)
2419 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2421 The merge of these two instructions didn't succeed.
2423 We try to generate the pattern:
2424 set (subreg (dest_extension_reg)) (rhs)
2426 We do this in 4 steps:
2427 a. Replace every use of dest_reg with a new pseudo register.
2428 b. Replace every instance of dest_reg with the subreg.
2429 c. Replace every use of the new pseudo register back to dest_reg.
2430 d. Try to recognize and simplify.
2432 If the manipulation failed, leave the original ref but try to generate and
2433 recognize a simple move instruction:
2434 set (subreg (dest_extension_reg)) (dest_reg)
2435 This move instruction will be emitted right after the ref to the instruction
2436 stream and assure the correctness of the code after def_se will be removed.
2438 CURR_REF_S is the current reference.
2439 DEF_SE is the extension that couldn't be merged. */
2441 static void
2442 see_def_extension_not_merged (struct see_ref_s *curr_ref_s, rtx def_se)
2444 struct see_replace_data d;
2445 /* If the original insn was already merged with an extension before,
2446 take the merged one. */
2447 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn :
2448 curr_ref_s->insn;
2449 rtx merged_ref_next = (curr_ref_s->merged_insn) ?
2450 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX;
2451 rtx ref_copy = copy_rtx (ref);
2452 rtx source_extension_reg = see_get_extension_reg (def_se, 0);
2453 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2454 rtx move_insn = NULL;
2455 rtx set, rhs;
2456 rtx dest_reg, dest_real_reg;
2457 rtx new_pseudo_reg, subreg;
2458 enum machine_mode source_extension_mode = GET_MODE (source_extension_reg);
2459 enum machine_mode dest_mode;
2461 set = single_set (def_se);
2462 gcc_assert (set);
2463 rhs = SET_SRC (set);
2464 gcc_assert (GET_CODE (rhs) == SIGN_EXTEND
2465 || GET_CODE (rhs) == ZERO_EXTEND);
2466 dest_reg = XEXP (rhs, 0);
2467 gcc_assert (REG_P (dest_reg)
2468 || (GET_CODE (dest_reg) == SUBREG
2469 && REG_P (SUBREG_REG (dest_reg))));
2470 dest_real_reg = REG_P (dest_reg) ? dest_reg : SUBREG_REG (dest_reg);
2471 dest_mode = GET_MODE (dest_reg);
2473 subreg = gen_lowpart_SUBREG (dest_mode, dest_extension_reg);
2474 new_pseudo_reg = gen_reg_rtx (source_extension_mode);
2476 /* Step a: Replace every use of dest_real_reg with a new pseudo register. */
2477 d.from = dest_real_reg;
2478 d.to = new_pseudo_reg;
2479 note_uses (&PATTERN (ref_copy), see_replace_src, &d);
2480 /* Step b: Replace every instance of dest_reg with the subreg. */
2481 ref_copy = replace_rtx (ref_copy, dest_reg, subreg);
2483 /* Step c: Replace every use of the new pseudo register back to
2484 dest_real_reg. */
2485 d.from = new_pseudo_reg;
2486 d.to = dest_real_reg;
2487 note_uses (&PATTERN (ref_copy), see_replace_src, &d);
2489 if (rtx_equal_p (PATTERN (ref), PATTERN (ref_copy))
2490 || insn_invalid_p (ref_copy))
2492 /* The manipulation failed. */
2494 /* Create a new copy. */
2495 ref_copy = copy_rtx (ref);
2497 /* Create a simple move instruction that will replace the def_se. */
2498 start_sequence ();
2499 emit_move_insn (subreg, dest_reg);
2500 move_insn = get_insns ();
2501 end_sequence ();
2503 /* Link the manipulated instruction to the newly created move instruction
2504 and to the former created move instructions. */
2505 PREV_INSN (ref_copy) = NULL_RTX;
2506 NEXT_INSN (ref_copy) = move_insn;
2507 PREV_INSN (move_insn) = ref_copy;
2508 NEXT_INSN (move_insn) = merged_ref_next;
2509 if (merged_ref_next != NULL_RTX)
2510 PREV_INSN (merged_ref_next) = move_insn;
2511 curr_ref_s->merged_insn = ref_copy;
2513 if (dump_file)
2515 fprintf (dump_file, "Following def merge failure a move ");
2516 fprintf (dump_file, "insn was added after the ref.\n");
2517 fprintf (dump_file, "Original ref:\n");
2518 print_rtl_single (dump_file, ref);
2519 fprintf (dump_file, "Move insn that was added:\n");
2520 print_rtl_single (dump_file, move_insn);
2522 return;
2525 /* The manipulation succeeded. Store the new manipulated reference. */
2527 /* Try to simplify the new manipulated insn. */
2528 validate_simplify_insn (ref_copy);
2530 /* Create a simple move instruction to assure the correctness of the code. */
2531 start_sequence ();
2532 emit_move_insn (dest_reg, subreg);
2533 move_insn = get_insns ();
2534 end_sequence ();
2536 /* Link the manipulated instruction to the newly created move instruction and
2537 to the former created move instructions. */
2538 PREV_INSN (ref_copy) = NULL_RTX;
2539 NEXT_INSN (ref_copy) = move_insn;
2540 PREV_INSN (move_insn) = ref_copy;
2541 NEXT_INSN (move_insn) = merged_ref_next;
2542 if (merged_ref_next != NULL_RTX)
2543 PREV_INSN (merged_ref_next) = move_insn;
2544 curr_ref_s->merged_insn = ref_copy;
2546 if (dump_file)
2548 fprintf (dump_file, "Following merge failure the ref was transformed!\n");
2549 fprintf (dump_file, "Original ref:\n");
2550 print_rtl_single (dump_file, ref);
2551 fprintf (dump_file, "Transformed ref:\n");
2552 print_rtl_single (dump_file, ref_copy);
2553 fprintf (dump_file, "Move insn that was added:\n");
2554 print_rtl_single (dump_file, move_insn);
2559 /* Merge the reference instruction (ref) with the current use extension.
2561 use_se extends a NARROWmode register to a WIDEmode register.
2562 ref uses the WIDEmode register.
2564 The pattern we try to merge is this:
2565 use_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2566 ref: use (dest_extension_reg)
2568 where dest_extension_reg and source_extension_reg can be subregs.
2570 The merge is done by generating, simplifying and recognizing the pattern:
2571 use (sign/zero_extend (source_extension_reg))
2573 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2574 we don't try to merge it with use_se and we continue as if the merge failed.
2576 This is a subroutine of see_handle_extensions_for_one_ref called
2577 via htab_traverse.
2578 SLOT contains the current use extension instruction.
2579 B is the see_ref_s structure pointer. */
2581 static int
2582 see_merge_one_use_extension (void **slot, void *b)
2584 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2585 rtx use_se = *slot;
2586 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn :
2587 curr_ref_s->insn;
2588 rtx merged_ref_next = (curr_ref_s->merged_insn) ?
2589 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX;
2590 rtx ref_copy = copy_rtx (ref);
2591 rtx extension_set = single_set (use_se);
2592 rtx extension_rhs = NULL;
2593 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
2594 rtx note = NULL;
2595 rtx simplified_note = NULL;
2597 gcc_assert (use_se && curr_ref_s && extension_set);
2599 extension_rhs = SET_SRC (extension_set);
2601 /* In REG_EQUIV and REG_EQUAL notes that mention the register we need to
2602 replace the uses of the dest_extension_reg with the rhs of the extension
2603 instruction. This is necessary since there might not be an extension in
2604 the path between the definition and the note when this optimization is
2605 over. */
2606 note = find_reg_equal_equiv_note (ref_copy);
2607 if (note)
2609 simplified_note = simplify_replace_rtx (XEXP (note, 0),
2610 dest_extension_reg,
2611 extension_rhs);
2612 if (rtx_equal_p (XEXP (note, 0), simplified_note))
2613 /* Replacement failed. Remove the note. */
2614 remove_note (ref_copy, note);
2615 else
2616 set_unique_reg_note (ref_copy, REG_NOTE_KIND (note),
2617 simplified_note);
2620 if (!see_want_to_be_merged_with_extension (ref, use_se, USE_EXTENSION))
2622 /* The use in the reference is too simple. Don't try to merge. */
2623 if (dump_file)
2625 fprintf (dump_file, "Use merge skipped!\n");
2626 fprintf (dump_file, "Original instructions:\n");
2627 print_rtl_single (dump_file, use_se);
2628 print_rtl_single (dump_file, ref);
2630 /* Don't remove the current use_se from the use_se_hash and continue to
2631 the next extension. */
2632 return 1;
2635 validate_replace_src_group (dest_extension_reg, extension_rhs, ref_copy);
2637 if (!num_changes_pending ())
2638 /* In this case this is not a real use (the only use is/was in the notes
2639 list). Remove the use extension from the hash. This will prevent it
2640 from been emitted in the first place. */
2642 if (dump_file)
2644 fprintf (dump_file, "Use extension not necessary before:\n");
2645 print_rtl_single (dump_file, ref);
2647 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2648 PREV_INSN (ref_copy) = NULL_RTX;
2649 NEXT_INSN (ref_copy) = merged_ref_next;
2650 if (merged_ref_next != NULL_RTX)
2651 PREV_INSN (merged_ref_next) = ref_copy;
2652 curr_ref_s->merged_insn = ref_copy;
2653 return 1;
2656 if (!apply_change_group ())
2658 /* The merge failed. */
2659 if (dump_file)
2661 fprintf (dump_file, "Use merge failed!\n");
2662 fprintf (dump_file, "Original instructions:\n");
2663 print_rtl_single (dump_file, use_se);
2664 print_rtl_single (dump_file, ref);
2666 /* Don't remove the current use_se from the use_se_hash and continue to
2667 the next extension. */
2668 return 1;
2671 /* The merge succeeded! */
2673 /* Try to simplify the new merged insn. */
2674 validate_simplify_insn (ref_copy);
2676 PREV_INSN (ref_copy) = NULL_RTX;
2677 NEXT_INSN (ref_copy) = merged_ref_next;
2678 if (merged_ref_next != NULL_RTX)
2679 PREV_INSN (merged_ref_next) = ref_copy;
2680 curr_ref_s->merged_insn = ref_copy;
2682 if (dump_file)
2684 fprintf (dump_file, "Use merge succeeded!\n");
2685 fprintf (dump_file, "Original instructions:\n");
2686 print_rtl_single (dump_file, use_se);
2687 print_rtl_single (dump_file, ref);
2688 fprintf (dump_file, "Merged instruction:\n");
2689 print_rtl_single (dump_file, ref_copy);
2692 /* Remove the current use_se from the use_se_hash. This will prevent it from
2693 been emitted in the first place. */
2694 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2695 return 1;
2699 /* Merge the reference instruction (ref) with the extension that follows it
2700 in the same basic block (def_se).
2701 ref sets a NARROWmode register and def_se extends it to WIDEmode register.
2703 The pattern we try to merge is this:
2704 ref: set (dest_reg) (rhs)
2705 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2707 where dest_reg and source_extension_reg can both be subregs (together)
2708 and (REGNO (dest_reg) == REGNO (source_extension_reg))
2710 The merge is done by generating, simplifying and recognizing the pattern:
2711 set (dest_extension_reg) (sign/zero_extend (rhs))
2712 If ref is a parallel instruction we just replace the relevant set in it.
2714 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2715 we don't try to merge it with def_se and we continue as if the merge failed.
2717 This is a subroutine of see_handle_extensions_for_one_ref called
2718 via htab_traverse.
2720 SLOT contains the current def extension instruction.
2721 B is the see_ref_s structure pointer. */
2723 static int
2724 see_merge_one_def_extension (void **slot, void *b)
2726 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2727 rtx def_se = *slot;
2728 /* If the original insn was already merged with an extension before,
2729 take the merged one. */
2730 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn :
2731 curr_ref_s->insn;
2732 rtx merged_ref_next = (curr_ref_s->merged_insn) ?
2733 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX;
2734 rtx ref_copy = copy_rtx (ref);
2735 rtx new_set = NULL;
2736 rtx source_extension_reg = see_get_extension_reg (def_se, 0);
2737 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2738 rtx move_insn, *rtx_slot, subreg;
2739 rtx temp_extension = NULL;
2740 rtx simplified_temp_extension = NULL;
2741 rtx *pat;
2742 enum rtx_code code;
2743 enum rtx_code extension_code;
2744 enum machine_mode source_extension_mode;
2745 enum machine_mode source_mode;
2746 enum machine_mode dest_extension_mode;
2747 bool merge_success = false;
2748 int i;
2750 gcc_assert (def_se
2751 && INSN_P (def_se)
2752 && curr_ref_s
2753 && ref
2754 && INSN_P (ref));
2756 if (!see_want_to_be_merged_with_extension (ref, def_se, DEF_EXTENSION))
2758 /* The definition in the reference is too simple. Don't try to merge. */
2759 if (dump_file)
2761 fprintf (dump_file, "Def merge skipped!\n");
2762 fprintf (dump_file, "Original instructions:\n");
2763 print_rtl_single (dump_file, ref);
2764 print_rtl_single (dump_file, def_se);
2767 see_def_extension_not_merged (curr_ref_s, def_se);
2768 /* Continue to the next extension. */
2769 return 1;
2772 extension_code = see_get_extension_data (def_se, &source_mode);
2774 /* Try to merge and simplify the extension. */
2775 source_extension_mode = GET_MODE (source_extension_reg);
2776 dest_extension_mode = GET_MODE (dest_extension_reg);
2778 pat = &PATTERN (ref_copy);
2779 code = GET_CODE (*pat);
2781 if (code == PARALLEL)
2783 bool need_to_apply_change = false;
2785 for (i = 0; i < XVECLEN (*pat, 0); i++)
2787 rtx *sub = &XVECEXP (*pat, 0, i);
2789 if (GET_CODE (*sub) == SET
2790 && GET_MODE (SET_SRC (*sub)) != VOIDmode
2791 && GET_MODE (SET_DEST (*sub)) == source_mode
2792 && ((REG_P (SET_DEST (*sub))
2793 && REGNO (SET_DEST (*sub)) == REGNO (source_extension_reg))
2794 || (GET_CODE (SET_DEST (*sub)) == SUBREG
2795 && REG_P (SUBREG_REG (SET_DEST (*sub)))
2796 && (REGNO (SUBREG_REG (SET_DEST (*sub))) ==
2797 REGNO (source_extension_reg)))))
2799 rtx orig_src = SET_SRC (*sub);
2801 if (extension_code == SIGN_EXTEND)
2802 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode,
2803 orig_src);
2804 else
2805 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode,
2806 orig_src);
2807 simplified_temp_extension = simplify_rtx (temp_extension);
2808 temp_extension =
2809 (simplified_temp_extension) ? simplified_temp_extension :
2810 temp_extension;
2811 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg,
2812 temp_extension);
2813 validate_change (ref_copy, sub, new_set, 1);
2814 need_to_apply_change = true;
2817 if (need_to_apply_change)
2818 if (apply_change_group ())
2819 merge_success = true;
2821 else if (code == SET
2822 && GET_MODE (SET_SRC (*pat)) != VOIDmode
2823 && GET_MODE (SET_DEST (*pat)) == source_mode
2824 && ((REG_P (SET_DEST (*pat))
2825 && REGNO (SET_DEST (*pat)) == REGNO (source_extension_reg))
2826 || (GET_CODE (SET_DEST (*pat)) == SUBREG
2827 && REG_P (SUBREG_REG (SET_DEST (*pat)))
2828 && (REGNO (SUBREG_REG (SET_DEST (*pat))) ==
2829 REGNO (source_extension_reg)))))
2831 rtx orig_src = SET_SRC (*pat);
2833 if (extension_code == SIGN_EXTEND)
2834 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode, orig_src);
2835 else
2836 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode, orig_src);
2837 simplified_temp_extension = simplify_rtx (temp_extension);
2838 temp_extension = (simplified_temp_extension) ? simplified_temp_extension :
2839 temp_extension;
2840 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg, temp_extension);
2841 if (validate_change (ref_copy, pat, new_set, 0))
2842 merge_success = true;
2844 if (!merge_success)
2846 /* The merge failed. */
2847 if (dump_file)
2849 fprintf (dump_file, "Def merge failed!\n");
2850 fprintf (dump_file, "Original instructions:\n");
2851 print_rtl_single (dump_file, ref);
2852 print_rtl_single (dump_file, def_se);
2855 see_def_extension_not_merged (curr_ref_s, def_se);
2856 /* Continue to the next extension. */
2857 return 1;
2860 /* The merge succeeded! */
2862 /* Create a simple move instruction to assure the correctness of the code. */
2863 subreg = gen_lowpart_SUBREG (source_extension_mode, dest_extension_reg);
2864 start_sequence ();
2865 emit_move_insn (source_extension_reg, subreg);
2866 move_insn = get_insns ();
2867 end_sequence ();
2869 /* Link the merged instruction to the newly created move instruction and
2870 to the former created move instructions. */
2871 PREV_INSN (ref_copy) = NULL_RTX;
2872 NEXT_INSN (ref_copy) = move_insn;
2873 PREV_INSN (move_insn) = ref_copy;
2874 NEXT_INSN (move_insn) = merged_ref_next;
2875 if (merged_ref_next != NULL_RTX)
2876 PREV_INSN (merged_ref_next) = move_insn;
2877 curr_ref_s->merged_insn = ref_copy;
2879 if (dump_file)
2881 fprintf (dump_file, "Def merge succeeded!\n");
2882 fprintf (dump_file, "Original instructions:\n");
2883 print_rtl_single (dump_file, ref);
2884 print_rtl_single (dump_file, def_se);
2885 fprintf (dump_file, "Merged instruction:\n");
2886 print_rtl_single (dump_file, ref_copy);
2887 fprintf (dump_file, "Move instruction that was added:\n");
2888 print_rtl_single (dump_file, move_insn);
2891 /* Remove the current def_se from the unmerged_def_se_hash and insert it to
2892 the merged_def_se_hash. */
2893 htab_clear_slot (curr_ref_s->unmerged_def_se_hash, (PTR *)slot);
2894 if (!curr_ref_s->merged_def_se_hash)
2895 curr_ref_s->merged_def_se_hash = htab_create (10,
2896 hash_descriptor_extension,
2897 eq_descriptor_extension,
2898 NULL);
2899 rtx_slot = (rtx *) htab_find_slot (curr_ref_s->merged_def_se_hash,
2900 dest_extension_reg, INSERT);
2901 gcc_assert (*rtx_slot == NULL);
2902 *rtx_slot = def_se;
2904 return 1;
2908 /* Try to eliminate extensions in this order:
2909 a. Try to merge only the def extensions, one by one.
2910 b. Try to merge only the use extensions, one by one.
2912 TODO:
2913 Try to merge any couple of use extensions simultaneously.
2914 Try to merge any def extension with one or two uses extensions
2915 simultaneously.
2917 After all the merges are done, update the register properties for the basic
2918 block and eliminate locally redundant use extensions.
2920 This is a subroutine of see_merge_and_eliminate_extensions called
2921 via splay_tree_foreach.
2922 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2923 see_ref_s structure. */
2925 static int
2926 see_handle_extensions_for_one_ref (splay_tree_node stn,
2927 void *data ATTRIBUTE_UNUSED)
2929 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
2930 htab_t unmerged_def_se_hash =
2931 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
2932 htab_t merged_def_se_hash;
2933 rtx ref = ((struct see_ref_s *) (stn->value))->insn;
2935 if (dump_file)
2937 fprintf (dump_file, "Handling ref:\n");
2938 print_rtl_single (dump_file, ref);
2941 /* a. Try to eliminate only def extensions, one by one. */
2942 if (unmerged_def_se_hash)
2943 htab_traverse_noresize (unmerged_def_se_hash, see_merge_one_def_extension,
2944 (PTR) (stn->value));
2946 if (use_se_hash)
2947 /* b. Try to eliminate only use extensions, one by one. */
2948 htab_traverse_noresize (use_se_hash, see_merge_one_use_extension,
2949 (PTR) (stn->value));
2951 merged_def_se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
2953 if (dump_file)
2955 fprintf (dump_file, "The hashes of the current reference:\n");
2956 if (unmerged_def_se_hash)
2958 fprintf (dump_file, "unmerged_def_se_hash:\n");
2959 htab_traverse (unmerged_def_se_hash, see_print_one_extension, NULL);
2961 if (merged_def_se_hash)
2963 fprintf (dump_file, "merged_def_se_hash:\n");
2964 htab_traverse (merged_def_se_hash, see_print_one_extension, NULL);
2966 if (use_se_hash)
2968 fprintf (dump_file, "use_se_hash:\n");
2969 htab_traverse (use_se_hash, see_print_one_extension, NULL);
2973 /* Now that all the merges are done, update the register properties of the
2974 basic block and eliminate locally redundant extensions.
2975 It is important that we first traverse the use extensions hash and
2976 afterwards the def extensions hashes. */
2978 if (use_se_hash)
2979 htab_traverse_noresize (use_se_hash, see_set_prop_unmerged_use,
2980 (PTR) (stn->value));
2982 if (unmerged_def_se_hash)
2983 htab_traverse (unmerged_def_se_hash, see_set_prop_unmerged_def,
2984 (PTR) (stn->value));
2986 if (merged_def_se_hash)
2987 htab_traverse (merged_def_se_hash, see_set_prop_merged_def,
2988 (PTR) (stn->value));
2990 /* Continue to the next definition. */
2991 return 0;
2995 /* Phase 2 top level function.
2996 In this phase, we try to merge def extensions and use extensions with their
2997 references, and eliminate redundant extensions in the same basic block.
2998 We also gather information for the next phases. */
3000 static void
3001 see_merge_and_eliminate_extensions (void)
3003 int i = 0;
3005 if (dump_file)
3006 fprintf (dump_file,
3007 "* Phase 2: Merge and eliminate locally redundant extensions. *\n");
3009 /* Traverse over all the splay trees of the basic blocks. */
3010 for (i = 0; i < last_bb; i++)
3012 if (see_bb_splay_ar[i])
3014 if (dump_file)
3015 fprintf (dump_file, "Handling references for bb %d\n", i);
3016 /* Traverse over all the references in the basic block in forward
3017 order. */
3018 splay_tree_foreach (see_bb_splay_ar[i],
3019 see_handle_extensions_for_one_ref, NULL);
3025 /* Phase 1 implementation: Propagate extensions to uses. */
3027 /* Insert REF_INSN into the splay tree of its basic block.
3028 SE_INSN is the extension to store in the proper hash according to TYPE.
3030 Return true if everything went well.
3031 Otherwise, return false (this will cause the optimization to be aborted). */
3033 static bool
3034 see_store_reference_and_extension (rtx ref_insn, rtx se_insn,
3035 enum extension_type type)
3037 rtx *rtx_slot;
3038 int curr_bb_num;
3039 splay_tree_node stn = NULL;
3040 htab_t se_hash = NULL;
3041 struct see_ref_s *ref_s = NULL;
3043 /* Check the arguments. */
3044 gcc_assert (ref_insn && se_insn);
3045 if (!see_bb_splay_ar)
3046 return false;
3048 curr_bb_num = BLOCK_NUM (ref_insn);
3049 gcc_assert (curr_bb_num < last_bb && curr_bb_num >= 0);
3051 /* Insert the reference to the splay tree of its basic block. */
3052 if (!see_bb_splay_ar[curr_bb_num])
3053 /* The splay tree for this block doesn't exist yet, create it. */
3054 see_bb_splay_ar[curr_bb_num] = splay_tree_new (splay_tree_compare_ints,
3055 NULL, see_free_ref_s);
3056 else
3057 /* Splay tree already exists, check if the current reference is already
3058 in it. */
3060 stn = splay_tree_lookup (see_bb_splay_ar[curr_bb_num],
3061 DF_INSN_LUID (ref_insn));
3062 if (stn)
3063 switch (type)
3065 case EXPLICIT_DEF_EXTENSION:
3066 se_hash =
3067 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
3068 if (!se_hash)
3070 se_hash = htab_create (10,
3071 hash_descriptor_extension,
3072 eq_descriptor_extension,
3073 NULL);
3074 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash =
3075 se_hash;
3077 break;
3078 case IMPLICIT_DEF_EXTENSION:
3079 se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
3080 if (!se_hash)
3082 se_hash = htab_create (10,
3083 hash_descriptor_extension,
3084 eq_descriptor_extension,
3085 NULL);
3086 ((struct see_ref_s *) (stn->value))->merged_def_se_hash =
3087 se_hash;
3089 break;
3090 case USE_EXTENSION:
3091 se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
3092 if (!se_hash)
3094 se_hash = htab_create (10,
3095 hash_descriptor_extension,
3096 eq_descriptor_extension,
3097 NULL);
3098 ((struct see_ref_s *) (stn->value))->use_se_hash = se_hash;
3100 break;
3101 default:
3102 gcc_unreachable ();
3106 /* Initialize a new see_ref_s structure and insert it to the splay
3107 tree. */
3108 if (!stn)
3110 ref_s = xmalloc (sizeof (struct see_ref_s));
3111 ref_s->luid = DF_INSN_LUID (ref_insn);
3112 ref_s->insn = ref_insn;
3113 ref_s->merged_insn = NULL;
3115 /* Initialize the hashes. */
3116 switch (type)
3118 case EXPLICIT_DEF_EXTENSION:
3119 ref_s->unmerged_def_se_hash = htab_create (10,
3120 hash_descriptor_extension,
3121 eq_descriptor_extension,
3122 NULL);
3123 se_hash = ref_s->unmerged_def_se_hash;
3124 ref_s->merged_def_se_hash = NULL;
3125 ref_s->use_se_hash = NULL;
3126 break;
3127 case IMPLICIT_DEF_EXTENSION:
3128 ref_s->merged_def_se_hash = htab_create (10,
3129 hash_descriptor_extension,
3130 eq_descriptor_extension,
3131 NULL);
3132 se_hash = ref_s->merged_def_se_hash;
3133 ref_s->unmerged_def_se_hash = NULL;
3134 ref_s->use_se_hash = NULL;
3135 break;
3136 case USE_EXTENSION:
3137 ref_s->use_se_hash = htab_create (10,
3138 hash_descriptor_extension,
3139 eq_descriptor_extension,
3140 NULL);
3141 se_hash = ref_s->use_se_hash;
3142 ref_s->unmerged_def_se_hash = NULL;
3143 ref_s->merged_def_se_hash = NULL;
3144 break;
3145 default:
3146 gcc_unreachable ();
3150 /* Insert the new extension instruction into the correct se_hash of the
3151 current reference. */
3152 rtx_slot = (rtx *) htab_find_slot (se_hash, se_insn, INSERT);
3153 if (*rtx_slot != NULL)
3155 gcc_assert (type == USE_EXTENSION);
3156 gcc_assert (rtx_equal_p (PATTERN (*rtx_slot), PATTERN (se_insn)));
3158 else
3159 *rtx_slot = se_insn;
3161 /* If this is a new reference, insert it into the splay_tree. */
3162 if (!stn)
3163 splay_tree_insert (see_bb_splay_ar[curr_bb_num],
3164 DF_INSN_LUID (ref_insn), (splay_tree_value) ref_s);
3165 return true;
3169 /* Go over all the defs, for each relevant definition (defined below) store its
3170 instruction as a reference.
3172 A definition is relevant if its root has
3173 ((entry_type == SIGN_EXTENDED_DEF) || (entry_type == ZERO_EXTENDED_DEF)) and
3174 his source_mode is not narrower then the roots source_mode.
3176 Return the number of relevant defs or negative number if something bad had
3177 happened and the optimization should be aborted. */
3179 static int
3180 see_handle_relevant_defs (struct df_ref *ref, rtx insn)
3182 struct web_entry *root_entry = NULL;
3183 rtx se_insn = NULL;
3184 enum rtx_code extension_code;
3185 rtx reg = DF_REF_REAL_REG (ref);
3186 rtx ref_insn = NULL;
3187 unsigned int i = DF_REF_ID (ref);
3189 root_entry = unionfind_root (&def_entry[DF_REF_ID (ref)]);
3191 if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF
3192 && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF)
3193 /* The current web is not relevant. Continue to the next def. */
3194 return 0;
3196 if (root_entry->reg)
3197 /* It isn't possible to have two different register for the same
3198 web. */
3199 gcc_assert (rtx_equal_p (root_entry->reg, reg));
3200 else
3201 root_entry->reg = reg;
3203 /* The current definition is an EXTENDED_DEF or a definition that its
3204 source_mode is narrower then its web's source_mode.
3205 This means that we need to generate the implicit extension explicitly
3206 and store it in the current reference's merged_def_se_hash. */
3207 if (ENTRY_EI (&def_entry[i])->local_relevancy == EXTENDED_DEF
3208 || (ENTRY_EI (&def_entry[i])->local_source_mode <
3209 ENTRY_EI (root_entry)->source_mode))
3212 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF)
3213 extension_code = SIGN_EXTEND;
3214 else
3215 extension_code = ZERO_EXTEND;
3217 se_insn =
3218 see_gen_normalized_extension (reg, extension_code,
3219 ENTRY_EI (root_entry)->source_mode);
3221 /* This is a dummy extension, mark it as deleted. */
3222 INSN_DELETED_P (se_insn) = 1;
3224 if (!see_store_reference_and_extension (insn, se_insn,
3225 IMPLICIT_DEF_EXTENSION))
3226 /* Something bad happened. Abort the optimization. */
3227 return -1;
3228 return 1;
3231 ref_insn = PREV_INSN (insn);
3232 gcc_assert (BLOCK_NUM (ref_insn) == BLOCK_NUM (insn));
3234 if (!see_store_reference_and_extension (ref_insn, insn,
3235 EXPLICIT_DEF_EXTENSION))
3236 /* Something bad happened. Abort the optimization. */
3237 return -1;
3239 return 0;
3242 /* Go over all the uses, for each use in relevant web store its instruction as
3243 a reference and generate an extension before it.
3245 Return the number of relevant uses or negative number if something bad had
3246 happened and the optimization should be aborted. */
3248 static int
3249 see_handle_relevant_uses (struct df_ref *ref, rtx insn)
3251 struct web_entry *root_entry = NULL;
3252 rtx se_insn = NULL;
3253 enum rtx_code extension_code;
3254 rtx reg = DF_REF_REAL_REG (ref);
3256 root_entry = unionfind_root (&use_entry[DF_REF_ID (ref)]);
3258 if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF
3259 && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF)
3260 /* The current web is not relevant. Continue to the next use. */
3261 return 0;
3263 if (root_entry->reg)
3264 /* It isn't possible to have two different register for the same
3265 web. */
3266 gcc_assert (rtx_equal_p (root_entry->reg, reg));
3267 else
3268 root_entry->reg = reg;
3270 /* Generate the use extension. */
3271 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF)
3272 extension_code = SIGN_EXTEND;
3273 else
3274 extension_code = ZERO_EXTEND;
3276 se_insn =
3277 see_gen_normalized_extension (reg, extension_code,
3278 ENTRY_EI (root_entry)->source_mode);
3279 if (!se_insn)
3280 /* This is very bad, abort the transformation. */
3281 return -1;
3283 if (!see_store_reference_and_extension (insn, se_insn,
3284 USE_EXTENSION))
3285 /* Something bad happened. Abort the optimization. */
3286 return -1;
3287 return 1;
3290 static int
3291 see_handle_relevant_refs (void)
3293 int num_relevant_refs = 0;
3294 basic_block bb;
3296 FOR_ALL_BB (bb)
3298 rtx insn;
3299 FOR_BB_INSNS (bb, insn)
3301 unsigned int uid = INSN_UID (insn);
3303 if (INSN_P (insn))
3305 struct df_ref **use_rec;
3306 struct df_ref **def_rec;
3308 for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
3310 struct df_ref *use = *use_rec;
3311 int result = see_handle_relevant_uses (use, insn);
3312 if (result == -1)
3313 return -1;
3314 num_relevant_refs += result;
3316 for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++)
3318 struct df_ref *use = *use_rec;
3319 int result = see_handle_relevant_uses (use, insn);
3320 if (result == -1)
3321 return -1;
3322 num_relevant_refs += result;
3324 for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
3326 struct df_ref *def = *def_rec;
3327 int result = see_handle_relevant_defs (def, insn);
3328 if (result == -1)
3329 return -1;
3330 num_relevant_refs += result;
3335 return num_relevant_refs;
3339 /* Initialized the use_entry field for REF in INSN at INDEX with ET. */
3341 static void
3342 see_update_uses_relevancy (rtx insn, struct df_ref *ref,
3343 enum entry_type et, unsigned int index)
3345 struct see_entry_extra_info *curr_entry_extra_info;
3347 if (dump_file)
3349 rtx reg = DF_REF_REAL_REG (ref);
3350 fprintf (dump_file, "u%i insn %i reg %i ",
3351 index, (insn ? INSN_UID (insn) : -1), REGNO (reg));
3352 if (et == NOT_RELEVANT)
3353 fprintf (dump_file, "NOT RELEVANT \n");
3354 else
3355 fprintf (dump_file, "RELEVANT USE \n");
3358 DF_REF_ID (ref) = index;
3359 curr_entry_extra_info = xmalloc (sizeof (struct see_entry_extra_info));
3360 curr_entry_extra_info->relevancy = et;
3361 curr_entry_extra_info->local_relevancy = et;
3362 use_entry[index].extra_info = curr_entry_extra_info;
3363 use_entry[index].reg = NULL;
3364 use_entry[index].pred = NULL;
3368 /* A definition in a candidate for this optimization only if its pattern is
3369 recognized as relevant in this function.
3370 INSN is the instruction to be recognized.
3372 - If this is the pattern of a common sign extension after definition:
3373 PREV_INSN (INSN): def (reg:NARROWmode r)
3374 INSN: set ((reg:WIDEmode r')
3375 (sign_extend:WIDEmode (reg:NARROWmode r)))
3376 return SIGN_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3378 - If this is the pattern of a common zero extension after definition:
3379 PREV_INSN (INSN): def (reg:NARROWmode r)
3380 INSN: set ((reg:WIDEmode r')
3381 (zero_extend:WIDEmode (reg:NARROWmode r)))
3382 return ZERO_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3384 - Otherwise,
3386 For the pattern:
3387 INSN: set ((reg:WIDEmode r) (sign_extend:WIDEmode (...expr...)))
3388 return EXTENDED_DEF and set SOURCE_MODE to the mode of expr.
3390 For the pattern:
3391 INSN: set ((reg:WIDEmode r) (zero_extend:WIDEmode (...expr...)))
3392 return EXTENDED_DEF and set SOURCE_MODE_UNSIGNED to the mode of expr.
3394 For the pattern:
3395 INSN: set ((reg:WIDEmode r) (CONST_INT (...)))
3396 return EXTENDED_DEF and set SOURCE_MODE(_UNSIGNED) to the narrowest mode that
3397 is implicitly sign(zero) extended to WIDEmode in the INSN.
3399 - FIXME: Extensions that are not adjacent to their definition and EXTENDED_DEF
3400 that is part of a PARALLEL instruction are not handled.
3401 These restriction can be relaxed. */
3403 static enum entry_type
3404 see_analyze_one_def (rtx insn, enum machine_mode *source_mode,
3405 enum machine_mode *source_mode_unsigned)
3407 enum rtx_code extension_code;
3408 rtx rhs = NULL;
3409 rtx lhs = NULL;
3410 rtx set = NULL;
3411 rtx source_register = NULL;
3412 rtx prev_insn = NULL;
3413 rtx next_insn = NULL;
3414 enum machine_mode mode;
3415 enum machine_mode next_source_mode;
3416 HOST_WIDE_INT val = 0;
3417 HOST_WIDE_INT val2 = 0;
3418 int i = 0;
3420 *source_mode = MAX_MACHINE_MODE;
3421 *source_mode_unsigned = MAX_MACHINE_MODE;
3423 extension_code = see_get_extension_data (insn, source_mode);
3424 switch (extension_code)
3426 case SIGN_EXTEND:
3427 case ZERO_EXTEND:
3428 source_register = see_get_extension_reg (insn, 0);
3429 /* FIXME: This restriction can be relaxed. The only thing that is
3430 important is that the reference would be inside the same basic block
3431 as the extension. */
3432 prev_insn = PREV_INSN (insn);
3433 if (!prev_insn || !INSN_P (prev_insn))
3434 return NOT_RELEVANT;
3436 if (!reg_set_between_p (source_register, PREV_INSN (prev_insn), insn))
3437 return NOT_RELEVANT;
3439 if (find_reg_note (prev_insn, REG_LIBCALL, NULL_RTX))
3440 return NOT_RELEVANT;
3442 if (find_reg_note (prev_insn, REG_RETVAL, NULL_RTX))
3443 return NOT_RELEVANT;
3445 /* If we can't use copy_rtx on the reference it can't be a reference. */
3446 if (GET_CODE (PATTERN (prev_insn)) == PARALLEL
3447 && asm_noperands (PATTERN (prev_insn)) >= 0)
3448 return NOT_RELEVANT;
3450 /* Now, check if this extension is a reference itself. If so, it is not
3451 relevant. Handling this extension as relevant would make things much
3452 more complicated. */
3453 next_insn = NEXT_INSN (insn);
3454 if (next_insn
3455 && INSN_P (next_insn)
3456 && (see_get_extension_data (next_insn, &next_source_mode) !=
3457 NOT_RELEVANT))
3459 rtx curr_dest_register = see_get_extension_reg (insn, 1);
3460 rtx next_source_register = see_get_extension_reg (next_insn, 0);
3462 if (REGNO (curr_dest_register) == REGNO (next_source_register))
3463 return NOT_RELEVANT;
3466 if (extension_code == SIGN_EXTEND)
3467 return SIGN_EXTENDED_DEF;
3468 else
3469 return ZERO_EXTENDED_DEF;
3471 case UNKNOWN:
3472 /* This may still be an EXTENDED_DEF. */
3474 /* FIXME: This restriction can be relaxed. It is possible to handle
3475 PARALLEL insns too. */
3476 set = single_set (insn);
3477 if (!set)
3478 return NOT_RELEVANT;
3479 rhs = SET_SRC (set);
3480 lhs = SET_DEST (set);
3482 /* Don't handle extensions to something other then register or
3483 subregister. */
3484 if (!REG_P (lhs) && !SUBREG_REG (lhs))
3485 return NOT_RELEVANT;
3487 switch (GET_CODE (rhs))
3489 case SIGN_EXTEND:
3490 *source_mode = GET_MODE (XEXP (rhs, 0));
3491 *source_mode_unsigned = MAX_MACHINE_MODE;
3492 return EXTENDED_DEF;
3493 case ZERO_EXTEND:
3494 *source_mode = MAX_MACHINE_MODE;
3495 *source_mode_unsigned = GET_MODE (XEXP (rhs, 0));
3496 return EXTENDED_DEF;
3497 case CONST_INT:
3499 val = INTVAL (rhs);
3501 /* Find the narrowest mode, val could fit into. */
3502 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT), i = 0;
3503 GET_MODE_BITSIZE (mode) < BITS_PER_WORD;
3504 mode = GET_MODE_WIDER_MODE (mode), i++)
3506 val2 = trunc_int_for_mode (val, mode);
3507 if (val2 == val && *source_mode == MAX_MACHINE_MODE)
3508 *source_mode = mode;
3509 if (val == (val & (HOST_WIDE_INT)GET_MODE_MASK (mode))
3510 && *source_mode_unsigned == MAX_MACHINE_MODE)
3511 *source_mode_unsigned = mode;
3512 if (*source_mode != MAX_MACHINE_MODE
3513 && *source_mode_unsigned !=MAX_MACHINE_MODE)
3514 return EXTENDED_DEF;
3516 if (*source_mode != MAX_MACHINE_MODE
3517 || *source_mode_unsigned !=MAX_MACHINE_MODE)
3518 return EXTENDED_DEF;
3519 return NOT_RELEVANT;
3520 default:
3521 return NOT_RELEVANT;
3523 default:
3524 gcc_unreachable ();
3529 /* Initialized the def_entry field for REF in INSN at INDEX with ET. */
3531 static void
3532 see_update_defs_relevancy (rtx insn, struct df_ref *ref,
3533 enum entry_type et,
3534 enum machine_mode source_mode,
3535 enum machine_mode source_mode_unsigned,
3536 unsigned int index)
3538 struct see_entry_extra_info *curr_entry_extra_info
3539 = xmalloc (sizeof (struct see_entry_extra_info));
3540 curr_entry_extra_info->relevancy = et;
3541 curr_entry_extra_info->local_relevancy = et;
3543 DF_REF_ID (ref) = index;
3545 if (et != EXTENDED_DEF)
3547 curr_entry_extra_info->source_mode = source_mode;
3548 curr_entry_extra_info->local_source_mode = source_mode;
3550 else
3552 curr_entry_extra_info->source_mode_signed = source_mode;
3553 curr_entry_extra_info->source_mode_unsigned = source_mode_unsigned;
3555 def_entry[index].extra_info = curr_entry_extra_info;
3556 def_entry[index].reg = NULL;
3557 def_entry[index].pred = NULL;
3559 if (dump_file)
3561 rtx reg = DF_REF_REAL_REG (ref);
3562 if (et == NOT_RELEVANT)
3564 fprintf (dump_file, "d%i insn %i reg %i ",
3565 index, (insn ? INSN_UID (insn) : -1), REGNO (reg));
3566 fprintf (dump_file, "NOT RELEVANT \n");
3568 else
3570 fprintf (dump_file, "d%i insn %i reg %i ",
3571 index, INSN_UID (insn), REGNO (reg));
3572 fprintf (dump_file, "RELEVANT - ");
3573 switch (et)
3575 case SIGN_EXTENDED_DEF :
3576 fprintf (dump_file, "SIGN_EXTENDED_DEF, source_mode = %s\n",
3577 GET_MODE_NAME (source_mode));
3578 break;
3579 case ZERO_EXTENDED_DEF :
3580 fprintf (dump_file, "ZERO_EXTENDED_DEF, source_mode = %s\n",
3581 GET_MODE_NAME (source_mode));
3582 break;
3583 case EXTENDED_DEF :
3584 fprintf (dump_file, "EXTENDED_DEF, ");
3585 if (source_mode != MAX_MACHINE_MODE
3586 && source_mode_unsigned != MAX_MACHINE_MODE)
3588 fprintf (dump_file, "positive const, ");
3589 fprintf (dump_file, "source_mode_signed = %s, ",
3590 GET_MODE_NAME (source_mode));
3591 fprintf (dump_file, "source_mode_unsigned = %s\n",
3592 GET_MODE_NAME (source_mode_unsigned));
3594 else if (source_mode != MAX_MACHINE_MODE)
3595 fprintf (dump_file, "source_mode_signed = %s\n",
3596 GET_MODE_NAME (source_mode));
3597 else
3598 fprintf (dump_file, "source_mode_unsigned = %s\n",
3599 GET_MODE_NAME (source_mode_unsigned));
3600 break;
3601 default :
3602 gcc_unreachable ();
3609 /* Updates the relevancy of all the uses and all defs.
3611 The information of the u'th use is stored in use_entry[u] and the
3612 information of the d'th definition is stored in def_entry[d].
3614 Currently all the uses are relevant for the optimization except for
3615 uses that are in LIBCALL instructions. */
3617 static void
3618 see_update_relevancy (void)
3620 unsigned int d = 0;
3621 unsigned int u = 0;
3622 enum entry_type et;
3623 enum machine_mode source_mode;
3624 enum machine_mode source_mode_unsigned;
3625 basic_block bb;
3627 if (!def_entry)
3628 return;
3630 FOR_ALL_BB (bb)
3632 struct df_ref **use_rec;
3633 struct df_ref **def_rec;
3634 rtx insn;
3635 FOR_BB_INSNS (bb, insn)
3637 unsigned int uid = INSN_UID (insn);
3638 if (INSN_P (insn))
3641 /* If this is an insn in a libcall, do not touch the uses. */
3642 if (find_reg_note (insn, REG_LIBCALL_ID, NULL_RTX))
3643 et = NOT_RELEVANT;
3644 else
3645 et = RELEVANT_USE;
3647 for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
3649 struct df_ref *use = *use_rec;
3650 see_update_uses_relevancy (insn, use, et, u);
3651 u++;
3654 for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++)
3656 struct df_ref *use = *use_rec;
3657 see_update_uses_relevancy (insn, use, et, u);
3658 u++;
3661 et = see_analyze_one_def (insn, &source_mode, &source_mode_unsigned);
3662 for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
3664 struct df_ref *def = *def_rec;
3665 see_update_defs_relevancy (insn, def, et, source_mode,
3666 source_mode_unsigned, d);
3667 d++;
3672 for (use_rec = df_get_artificial_uses (bb->index); *use_rec; use_rec++)
3674 struct df_ref *use = *use_rec;
3675 see_update_uses_relevancy (NULL, use, NOT_RELEVANT, u);
3676 u++;
3679 for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
3681 struct df_ref *def = *def_rec;
3682 see_update_defs_relevancy (NULL, def, NOT_RELEVANT,
3683 MAX_MACHINE_MODE, MAX_MACHINE_MODE, d);
3684 d++;
3690 /* Phase 1 top level function.
3691 In this phase the relevancy of all the definitions and uses are checked,
3692 later the webs are produces and the extensions are generated.
3693 These extensions are not emitted yet into the insns stream.
3695 returns true if at list one relevant web was found and there were no
3696 problems, otherwise return false. */
3698 static bool
3699 see_propagate_extensions_to_uses (void)
3701 int num_relevant_refs;
3702 basic_block bb;
3704 if (dump_file)
3705 fprintf (dump_file,
3706 "* Phase 1: Propagate extensions to uses. *\n");
3708 /* Update the relevancy of references using the DF object. */
3709 see_update_relevancy ();
3711 /* Produce the webs and update the extra_info of the root.
3712 In general, a web is relevant if all its definitions and uses are relevant
3713 and there is at least one definition that was marked as SIGN_EXTENDED_DEF
3714 or ZERO_EXTENDED_DEF. */
3715 FOR_ALL_BB (bb)
3717 rtx insn;
3718 struct df_ref **use_rec;
3720 FOR_BB_INSNS (bb, insn)
3722 unsigned int uid = INSN_UID (insn);
3723 if (INSN_P (insn))
3725 for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
3727 struct df_ref *use = *use_rec;
3728 union_defs (use, def_entry, use_entry, see_update_leader_extra_info);
3731 for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++)
3733 struct df_ref *use = *use_rec;
3734 union_defs (use, def_entry, use_entry, see_update_leader_extra_info);
3739 for (use_rec = df_get_artificial_uses (bb->index); *use_rec; use_rec++)
3741 struct df_ref *use = *use_rec;
3742 union_defs (use, def_entry, use_entry, see_update_leader_extra_info);
3746 /* Generate use extensions for references and insert these
3747 references to see_bb_splay_ar data structure. */
3748 num_relevant_refs = see_handle_relevant_refs ();
3750 return num_relevant_refs > 0;
3754 /* Main entry point for the sign extension elimination optimization. */
3756 static void
3757 see_main (void)
3759 bool cont = false;
3760 int i = 0;
3762 /* Initialize global data structures. */
3763 see_initialize_data_structures ();
3765 /* Phase 1: Propagate extensions to uses. */
3766 cont = see_propagate_extensions_to_uses ();
3768 if (cont)
3770 init_recog ();
3772 /* Phase 2: Merge and eliminate locally redundant extensions. */
3773 see_merge_and_eliminate_extensions ();
3775 /* Phase 3: Eliminate globally redundant extensions. */
3776 see_execute_LCM ();
3778 /* Phase 4: Commit changes to the insn stream. */
3779 see_commit_changes ();
3781 if (dump_file)
3783 /* For debug purpose only. */
3784 fprintf (dump_file, "see_pre_extension_hash:\n");
3785 htab_traverse (see_pre_extension_hash, see_print_pre_extension_expr,
3786 NULL);
3788 for (i = 0; i < last_bb; i++)
3790 if (see_bb_hash_ar[i])
3791 /* Traverse over all the references in the basic block in
3792 forward order. */
3794 fprintf (dump_file,
3795 "Searching register properties in bb %d\n", i);
3796 htab_traverse (see_bb_hash_ar[i],
3797 see_print_register_properties, NULL);
3803 /* Free global data structures. */
3804 see_free_data_structures ();
3808 static bool
3809 gate_handle_see (void)
3811 return optimize > 1 && flag_see;
3814 static unsigned int
3815 rest_of_handle_see (void)
3817 int no_new_pseudos_bcp = no_new_pseudos;
3819 no_new_pseudos = 0;
3820 see_main ();
3821 no_new_pseudos = no_new_pseudos_bcp;
3823 run_fast_dce ();
3824 return 0;
3827 struct tree_opt_pass pass_see =
3829 "see", /* name */
3830 gate_handle_see, /* gate */
3831 rest_of_handle_see, /* execute */
3832 NULL, /* sub */
3833 NULL, /* next */
3834 0, /* static_pass_number */
3835 TV_SEE, /* tv_id */
3836 0, /* properties_required */
3837 0, /* properties_provided */
3838 0, /* properties_destroyed */
3839 0, /* todo_flags_start */
3840 TODO_df_finish |
3841 TODO_dump_func, /* todo_flags_finish */
3842 'u' /* letter */