2008-05-20 Kai Tietz <kai.tietz@onevision.com>
[official-gcc.git] / gcc / see.c
blob6e5260b995aa2ffaf62ac5cb5c42baefddb9f99a
1 /* Sign extension elimination optimization for GNU compiler.
2 Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Leehod Baruch <leehod@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 -Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>.
21 Problem description:
22 --------------------
23 In order to support 32bit computations on a 64bit machine, sign
24 extension instructions are generated to ensure the correctness of
25 the computation.
26 A possible policy (as currently implemented) is to generate a sign
27 extension right after each 32bit computation.
28 Depending on the instruction set of the architecture, some of these
29 sign extension instructions may be redundant.
30 There are two cases in which the extension may be redundant:
32 Case1:
33 The instruction that uses the 64bit operands that are sign
34 extended has a dual mode that works with 32bit operands.
35 For example:
37 int32 a, b;
39 a = .... --> a = ....
40 a = sign extend a -->
41 b = .... --> b = ....
42 b = sign extend a -->
43 -->
44 cmpd a, b --> cmpw a, b //half word compare
46 Case2:
47 The instruction that defines the 64bit operand (which is later sign
48 extended) has a dual mode that defines and sign-extends simultaneously
49 a 32bit operand. For example:
51 int32 a;
53 ld a --> lwa a // load half word and sign extend
54 a = sign extend a -->
55 -->
56 return a --> return a
59 General idea for solution:
60 --------------------------
61 First, try to merge the sign extension with the instruction that
62 defines the source of the extension and (separately) with the
63 instructions that uses the extended result. By doing this, both cases
64 of redundancies (as described above) will be eliminated.
66 Then, use partial redundancy elimination to place the non redundant
67 ones at optimal placements.
70 Implementation by example:
71 --------------------------
72 Note: The instruction stream is not changed till the last phase.
74 Phase 0: Initial code, as currently generated by gcc.
76 def1 def3
77 se1 def2 se3
78 | \ | / |
79 | \ | / |
80 | \ | / |
81 | \ | / |
82 | \ | / |
83 | \|/ |
84 use1 use2 use3
85 use4
86 def1 + se1:
87 set ((reg:SI 10) (..def1rhs..))
88 set ((reg:DI 100) (sign_extend:DI (reg:SI 10)))
90 def2:
91 set ((reg:DI 100) (const_int 7))
93 def3 + se3:
94 set ((reg:SI 20) (..def3rhs..))
95 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
97 use1:
98 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
100 use2, use3, use4:
101 set ((...) (reg:DI 100))
103 Phase 1: Propagate extensions to uses.
105 def1 def3
106 se1 def2 se3
107 | \ | / |
108 | \ | / |
109 | \ | / |
110 | \ | / |
111 | \ | / |
112 | \|/ |
113 se se se
114 use1 use2 use3
116 use4
118 From here, all of the subregs are lowpart !
120 def1, def2, def3: No change.
122 use1:
123 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
124 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
126 use2, use3, use4:
127 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
128 set ((...) (reg:DI 100))
131 Phase 2: Merge and eliminate locally redundant extensions.
134 *def1 def2 *def3
135 [se removed] se se3
136 | \ | / |
137 | \ | / |
138 | \ | / |
139 | \ | / |
140 | \ | / |
141 | \|/ |
142 [se removed] se se
143 *use1 use2 use3
144 [se removed]
145 use4
147 The instructions that were changed at this phase are marked with
148 asterisk.
150 *def1: Merge failed.
151 Remove the sign extension instruction, modify def1 and
152 insert a move instruction to assure to correctness of the code.
153 set ((subreg:SI (reg:DI 100)) (..def1rhs..))
154 set ((reg:SI 10) (subreg:SI (reg:DI 100)))
156 def2 + se: There is no need for merge.
157 Def2 is not changed but a sign extension instruction is
158 created.
159 set ((reg:DI 100) (const_int 7))
160 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
162 *def3 + se3: Merge succeeded.
163 set ((reg:DI 100) (sign_extend:DI (..def3rhs..)))
164 set ((reg:SI 20) (reg:DI 100))
165 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
166 (The extension instruction is the original one).
168 *use1: Merge succeeded. Remove the sign extension instruction.
169 set ((reg:CC...)
170 (compare:CC (subreg:SI (reg:DI 100)) (...)))
172 use2, use3: Merge failed. No change.
174 use4: The extension is locally redundant, therefore it is eliminated
175 at this point.
178 Phase 3: Eliminate globally redundant extensions.
180 Following the LCM output:
182 def1 def2 def3
183 se se3
184 | \ | / |
185 | \ | / |
186 | se | / |
187 | \ | / |
188 | \ | / |
189 | \|/ |
190 [ses removed]
191 use1 use2 use3
192 use4
195 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
197 se3:
198 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
201 Phase 4: Commit changes to the insn stream.
204 def1 def3 *def1 def2 *def3
205 se1 def2 se3 [se removed] [se removed]
206 | \ | / | | \ | / |
207 | \ | / | ------> | \ | / |
208 | \ | / | ------> | se | / |
209 | \ | / | | \ | / |
210 | \ | / | | \ | / |
211 | \|/ | | \|/ |
212 use1 use2 use3 *use1 use2 use3
213 use4 use4
215 The instructions that were changed during the whole optimization are
216 marked with asterisk.
218 The result:
220 def1 + se1:
221 [ set ((reg:SI 10) (..def1rhs..)) ] - Deleted
222 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 10))) ] - Deleted
223 set ((subreg:SI (reg:DI 100)) (..def3rhs..)) - Inserted
224 set ((reg:SI 10) (subreg:SI (reg:DI 100))) - Inserted
226 def2:
227 set ((reg:DI 100) (const_int 7)) - No change
229 def3 + se3:
230 [ set ((reg:SI 20) (..def3rhs..)) ] - Deleted
231 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) ] - Deleted
232 set ((reg:DI 100) (sign_extend:DI (..def3rhs..))) - Inserted
233 set ((reg:SI 20) (reg:DI 100)) - Inserted
235 use1:
236 [ set ((reg:CC...) (compare:CC (reg:DI 100) (...))) ] - Deleted
237 set ((reg:CC...) - Inserted
238 (compare:CC (subreg:SI (reg:DI 100)) (...)))
240 use2, use3, use4:
241 set ((...) (reg:DI 100)) - No change
243 se: - Inserted
244 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
246 Note: Most of the simple move instructions that were inserted will be
247 trivially dead and therefore eliminated.
249 The implementation outline:
250 ---------------------------
251 Some definitions:
252 A web is RELEVANT if at the end of phase 1, his leader's
253 relevancy is {ZERO, SIGN}_EXTENDED_DEF. The source_mode of
254 the web is the source_mode of his leader.
255 A definition is a candidate for the optimization if it is part
256 of a RELEVANT web and his local source_mode is not narrower
257 then the source_mode of its web.
258 A use is a candidate for the optimization if it is part of a
259 RELEVANT web.
260 A simple explicit extension is a single set instruction that
261 extends a register (or a subregister) to a register (or
262 subregister).
263 A complex explicit extension is an explicit extension instruction
264 that is not simple.
265 A def extension is a simple explicit extension that is
266 also a candidate for the optimization. This extension is part
267 of the instruction stream, it is not generated by this
268 optimization.
269 A use extension is a simple explicit extension that is generated
270 and stored for candidate use during this optimization. It is
271 not emitted to the instruction stream till the last phase of
272 the optimization.
273 A reference is an instruction that satisfy at least on of these
274 criteria:
275 - It contains a definition with EXTENDED_DEF relevancy in a RELEVANT web.
276 - It is followed by a def extension.
277 - It contains a candidate use.
279 Phase 1: Propagate extensions to uses.
280 In this phase, we find candidate extensions for the optimization
281 and we generate (but not emit) proper extensions "right before the
282 uses".
284 a. Build a DF object.
285 b. Traverse over all the instructions that contains a definition
286 and set their local relevancy and local source_mode like this:
287 - If the instruction is a simple explicit extension instruction,
288 mark it as {ZERO, SIGN}_EXTENDED_DEF according to the extension
289 type and mark its source_mode to be the mode of the quantity
290 that is been extended.
291 - Otherwise, If the instruction has an implicit extension,
292 which means that its high part is an extension of its low part,
293 or if it is a complicated explicit extension, mark it as
294 EXTENDED_DEF and set its source_mode to be the narrowest
295 mode that is been extended in the instruction.
296 c. Traverse over all the instructions that contains a use and set
297 their local relevancy to RELEVANT_USE (except for few corner
298 cases).
299 d. Produce the web. During union of two entries, update the
300 relevancy and source_mode of the leader. There are two major
301 guide lines for this update:
302 - If one of the entries is NOT_RELEVANT, mark the leader
303 NOT_RELEVANT.
304 - If one is ZERO_EXTENDED_DEF and the other is SIGN_EXTENDED_DEF
305 (or vice versa) mark the leader as NOT_RELEVANT. We don't
306 handle this kind of mixed webs.
307 (For more details about this update process,
308 see see_update_leader_extra_info ()).
309 e. Generate uses extensions according to the relevancy and
310 source_mode of the webs.
312 Phase 2: Merge and eliminate locally redundant extensions.
313 In this phase, we try to merge def extensions and use
314 extensions with their references, and eliminate redundant extensions
315 in the same basic block.
317 Traverse over all the references. Do this in basic block number and
318 luid number forward order.
319 For each reference do:
320 a. Peephole optimization - try to merge it with all its
321 def extensions and use extensions in the following
322 order:
323 - Try to merge only the def extensions, one by one.
324 - Try to merge only the use extensions, one by one.
325 - Try to merge any couple of use extensions simultaneously.
326 - Try to merge any def extension with one or two uses
327 extensions simultaneously.
328 b. Handle each EXTENDED_DEF in it as if it was already merged with
329 an extension.
331 During the merge process we save the following data for each
332 register in each basic block:
333 a. The first instruction that defines the register in the basic
334 block.
335 b. The last instruction that defines the register in the basic
336 block.
337 c. The first extension of this register before the first
338 instruction that defines it in the basic block.
339 c. The first extension of this register after the last
340 instruction that defines it in the basic block.
341 This data will help us eliminate (or more precisely, not generate)
342 locally redundant extensions, and will be useful in the next stage.
344 While merging extensions with their reference there are 4 possible
345 situations:
346 a. A use extension was merged with the reference:
347 Delete the extension instruction and save the merged reference
348 for phase 4. (For details, see see_use_extension_merged ())
349 b. A use extension failed to be merged with the reference:
350 If there is already such an extension in the same basic block
351 and it is not dead at this point, delete the unmerged extension
352 (it is locally redundant), otherwise properly update the above
353 basic block data.
354 (For details, see see_merge_one_use_extension ())
355 c. A def extension was merged with the reference:
356 Mark this extension as a merged_def extension and properly
357 update the above basic block data.
358 (For details, see see_merge_one_def_extension ())
359 d. A def extension failed to be merged with the reference:
360 Replace the definition of the NARROWmode register in the
361 reference with the proper subreg of WIDEmode register and save
362 the result as a merged reference. Also, properly update the
363 the above basic block data.
364 (For details, see see_def_extension_not_merged ())
366 Phase 3: Eliminate globally redundant extensions.
367 In this phase, we set the bit vectors input of the edge based LCM
368 using the recorded data on the registers in each basic block.
369 We also save pointers for all the anticipatable and available
370 occurrences of the relevant extensions. Then we run the LCM.
372 a. Initialize the comp, antloc, kill bit vectors to zero and the
373 transp bit vector to ones.
375 b. Traverse over all the references. Do this in basic block number
376 and luid number forward order. For each reference:
377 - Go over all its use extensions. For each such extension -
378 If it is not dead from the beginning of the basic block SET
379 the antloc bit of the current extension in the current
380 basic block bits.
381 If it is not dead till the end of the basic block SET the
382 comp bit of the current extension in the current basic
383 block bits.
384 - Go over all its def extensions that were merged with
385 it. For each such extension -
386 If it is not dead till the end of the basic block SET the
387 comp bit of the current extension in the current basic
388 block bits.
389 RESET the proper transp and kill bits.
390 - Go over all its def extensions that were not merged
391 with it. For each such extension -
392 RESET the transp bit and SET the kill bit of the current
393 extension in the current basic block bits.
395 c. Run the edge based LCM.
397 Phase 4: Commit changes to the insn stream.
398 This is the only phase that actually changes the instruction stream.
399 Up to this point the optimization could be aborted at any time.
400 Here we insert extensions at their best placements and delete the
401 redundant ones according to the output of the LCM. We also replace
402 some of the instructions according to the second phase merges results.
404 a. Use the pre_delete_map (from the output of the LCM) in order to
405 delete redundant extensions. This will prevent them from been
406 emitted in the first place.
408 b. Insert extensions on edges where needed according to
409 pre_insert_map and edge_list (from the output of the LCM).
411 c. For each reference do-
412 - Emit all the uses extensions that were not deleted until now,
413 right before the reference.
414 - Delete all the merged and unmerged def extensions from
415 the instruction stream.
416 - Replace the reference with the merged one, if exist.
418 The implementation consists of four data structures:
419 - Data structure I
420 Purpose: To handle the relevancy of the uses, definitions and webs.
421 Relevant structures: web_entry (from df.h), see_entry_extra_info.
422 Details: This is a disjoint-set data structure. Most of its functions are
423 implemented in web.c. Each definition and use in the code are
424 elements. A web_entry structure is allocated for each element to
425 hold the element's relevancy and source_mode. The union rules are
426 defined in see_update_leader_extra_info ().
427 - Data structure II
428 Purpose: To store references and their extensions (uses and defs)
429 and to enable traverse over these references according to basic
430 block order.
431 Relevant structure: see_ref_s.
432 Details: This data structure consists of an array of splay trees. One splay
433 tree for each basic block. The splay tree nodes are references and
434 the keys are the luids of the references.
435 A see_ref_s structure is allocated for each reference. It holds the
436 reference itself, its def and uses extensions and later the merged
437 version of the reference.
438 Using this data structure we can traverse over all the references of
439 a basic block and their extensions in forward order.
440 - Data structure III.
441 Purpose: To store local properties of registers for each basic block.
442 This data will later help us build the LCM sbitmap_vectors
443 input.
444 Relevant structure: see_register_properties.
445 Details: This data structure consists of an array of hash tables. One hash
446 for each basic block. The hash node are a register properties
447 and the keys are the numbers of the registers.
448 A see_register_properties structure is allocated for each register
449 that we might be interested in its properties.
450 Using this data structure we can easily find the properties of a
451 register in a specific basic block. This is necessary for locally
452 redundancy elimination and for setting up the LCM input.
453 - Data structure IV.
454 Purpose: To store the extensions that are candidate for PRE and their
455 anticipatable and available occurrences.
456 Relevant structure: see_occr, see_pre_extension_expr.
457 Details: This data structure is a hash tables. Its nodes are the extensions
458 that are candidate for PRE.
459 A see_pre_extension_expr structure is allocated for each candidate
460 extension. It holds a copy of the extension and a linked list of all
461 the anticipatable and available occurrences of it.
462 We use this data structure when we read the output of the LCM. */
464 #include "config.h"
465 #include "system.h"
466 #include "coretypes.h"
467 #include "tm.h"
469 #include "obstack.h"
470 #include "rtl.h"
471 #include "output.h"
472 #include "df.h"
473 #include "insn-config.h"
474 #include "recog.h"
475 #include "expr.h"
476 #include "splay-tree.h"
477 #include "hashtab.h"
478 #include "regs.h"
479 #include "timevar.h"
480 #include "tree-pass.h"
481 #include "dce.h"
483 /* Used to classify defs and uses according to relevancy. */
484 enum entry_type {
485 NOT_RELEVANT,
486 SIGN_EXTENDED_DEF,
487 ZERO_EXTENDED_DEF,
488 EXTENDED_DEF,
489 RELEVANT_USE
492 /* Used to classify extensions in relevant webs. */
493 enum extension_type {
494 DEF_EXTENSION,
495 EXPLICIT_DEF_EXTENSION,
496 IMPLICIT_DEF_EXTENSION,
497 USE_EXTENSION
500 /* Global data structures and flags. */
502 /* This structure will be assigned for each web_entry structure (defined
503 in df.h). It is placed in the extra_info field of a web_entry and holds the
504 relevancy and source mode of the web_entry. */
506 struct see_entry_extra_info
508 /* The relevancy of the ref. */
509 enum entry_type relevancy;
510 /* The relevancy of the ref.
511 This field is updated only once - when this structure is created. */
512 enum entry_type local_relevancy;
513 /* The source register mode. */
514 enum machine_mode source_mode;
515 /* This field is used only if the relevancy is ZERO/SIGN_EXTENDED_DEF.
516 It is updated only once when this structure is created. */
517 enum machine_mode local_source_mode;
518 /* This field is used only if the relevancy is EXTENDED_DEF.
519 It holds the narrowest mode that is sign extended. */
520 enum machine_mode source_mode_signed;
521 /* This field is used only if the relevancy is EXTENDED_DEF.
522 It holds the narrowest mode that is zero extended. */
523 enum machine_mode source_mode_unsigned;
526 /* There is one such structure for every reference. It stores the reference
527 itself as well as its extensions (uses and definitions).
528 Used as the value in splay_tree see_bb_splay_ar[]. */
529 struct see_ref_s
531 /* The luid of the insn. */
532 unsigned int luid;
533 /* The insn of the ref. */
534 rtx insn;
535 /* The merged insn that was formed from the reference's insn and extensions.
536 If all merges failed, it remains NULL. */
537 rtx merged_insn;
538 /* The def extensions of the reference that were not merged with
539 it. */
540 htab_t unmerged_def_se_hash;
541 /* The def extensions of the reference that were merged with
542 it. Implicit extensions of the reference will be stored here too. */
543 htab_t merged_def_se_hash;
544 /* The uses extensions of reference. */
545 htab_t use_se_hash;
548 /* There is one such structure for every relevant extended register in a
549 specific basic block. This data will help us build the LCM sbitmap_vectors
550 input. */
551 struct see_register_properties
553 /* The register number. */
554 unsigned int regno;
555 /* The last luid of the reference that defines this register in this basic
556 block. */
557 int last_def;
558 /* The luid of the reference that has the first extension of this register
559 that appears before any definition in this basic block. */
560 int first_se_before_any_def;
561 /* The luid of the reference that has the first extension of this register
562 that appears after the last definition in this basic block. */
563 int first_se_after_last_def;
566 /* Occurrence of an expression.
567 There must be at most one available occurrence and at most one anticipatable
568 occurrence per basic block. */
569 struct see_occr
571 /* Next occurrence of this expression. */
572 struct see_occr *next;
573 /* The insn that computes the expression. */
574 rtx insn;
575 int block_num;
578 /* There is one such structure for every relevant extension expression.
579 It holds a copy of this extension instruction as well as a linked lists of
580 pointers to all the antic and avail occurrences of it. */
581 struct see_pre_extension_expr
583 /* A copy of the extension instruction. */
584 rtx se_insn;
585 /* Index in the available expression bitmaps. */
586 int bitmap_index;
587 /* List of anticipatable occurrences in basic blocks in the function.
588 An "anticipatable occurrence" is the first occurrence in the basic block,
589 the operands are not modified in the basic block prior to the occurrence
590 and the output is not used between the start of the block and the
591 occurrence. */
592 struct see_occr *antic_occr;
593 /* List of available occurrence in basic blocks in the function.
594 An "available occurrence" is the last occurrence in the basic block and
595 the operands are not modified by following statements in the basic block
596 [including this insn]. */
597 struct see_occr *avail_occr;
600 /* Helper structure for the note_uses and see_replace_src functions. */
601 struct see_replace_data
603 rtx from;
604 rtx to;
607 /* Helper structure for the note_uses and see_mentioned_reg functions. */
608 struct see_mentioned_reg_data
610 rtx reg;
611 bool mentioned;
614 /* An array of web_entries. The i'th definition in the df object is associated
615 with def_entry[i] */
616 static struct web_entry *def_entry = NULL;
617 /* An array of web_entries. The i'th use in the df object is associated with
618 use_entry[i] */
619 static struct web_entry *use_entry = NULL;
620 /* Array of splay_trees.
621 see_bb_splay_ar[i] refers to the splay tree of the i'th basic block.
622 The splay tree will hold see_ref_s structures. The key is the luid
623 of the insn. This way we can traverse over the references of each basic
624 block in forward or backward order. */
625 static splay_tree *see_bb_splay_ar = NULL;
626 /* Array of hashes.
627 see_bb_hash_ar[i] refers to the hash of the i'th basic block.
628 The hash will hold see_register_properties structure. The key is regno. */
629 static htab_t *see_bb_hash_ar = NULL;
630 /* Hash table that holds a copy of all the extensions. The key is the right
631 hand side of the se_insn field. */
632 static htab_t see_pre_extension_hash = NULL;
634 /* Local LCM properties of expressions. */
635 /* Nonzero for expressions that are transparent in the block. */
636 static sbitmap *transp = NULL;
637 /* Nonzero for expressions that are computed (available) in the block. */
638 static sbitmap *comp = NULL;
639 /* Nonzero for expressions that are locally anticipatable in the block. */
640 static sbitmap *antloc = NULL;
641 /* Nonzero for expressions that are locally killed in the block. */
642 static sbitmap *ae_kill = NULL;
643 /* Nonzero for expressions which should be inserted on a specific edge. */
644 static sbitmap *pre_insert_map = NULL;
645 /* Nonzero for expressions which should be deleted in a specific block. */
646 static sbitmap *pre_delete_map = NULL;
647 /* Contains the edge_list returned by pre_edge_lcm. */
648 static struct edge_list *edge_list = NULL;
649 /* Records the last basic block at the beginning of the optimization. */
650 static int last_bb;
651 /* Records the number of uses at the beginning of the optimization. */
652 static unsigned int uses_num;
653 /* Records the number of definitions at the beginning of the optimization. */
654 static unsigned int defs_num;
656 #define ENTRY_EI(ENTRY) ((struct see_entry_extra_info *) (ENTRY)->extra_info)
658 /* Functions implementation. */
660 /* Verifies that EXTENSION's pattern is this:
662 set (reg/subreg reg1) (sign/zero_extend:WIDEmode (reg/subreg reg2))
664 If it doesn't have the expected pattern return NULL.
665 Otherwise, if RETURN_DEST_REG is set, return reg1 else return reg2. */
667 static rtx
668 see_get_extension_reg (rtx extension, bool return_dest_reg)
670 rtx set, rhs, lhs;
671 rtx reg1 = NULL;
672 rtx reg2 = NULL;
674 /* Parallel pattern for extension not supported for the moment. */
675 if (GET_CODE (PATTERN (extension)) == PARALLEL)
676 return NULL;
678 set = single_set (extension);
679 if (!set)
680 return NULL;
681 lhs = SET_DEST (set);
682 rhs = SET_SRC (set);
684 if (REG_P (lhs))
685 reg1 = lhs;
686 else if (REG_P (SUBREG_REG (lhs)))
687 reg1 = SUBREG_REG (lhs);
688 else
689 return NULL;
691 if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND)
692 return NULL;
694 rhs = XEXP (rhs, 0);
695 if (REG_P (rhs))
696 reg2 = rhs;
697 else if (REG_P (SUBREG_REG (rhs)))
698 reg2 = SUBREG_REG (rhs);
699 else
700 return NULL;
702 if (return_dest_reg)
703 return reg1;
704 return reg2;
707 /* Verifies that EXTENSION's pattern is this:
709 set (reg/subreg reg1) (sign/zero_extend: (...expr...)
711 If it doesn't have the expected pattern return UNKNOWN.
712 Otherwise, set SOURCE_MODE to be the mode of the extended expr and return
713 the rtx code of the extension. */
715 static enum rtx_code
716 see_get_extension_data (rtx extension, enum machine_mode *source_mode)
718 rtx rhs, lhs, set;
720 if (!extension || !INSN_P (extension))
721 return UNKNOWN;
723 /* Parallel pattern for extension not supported for the moment. */
724 if (GET_CODE (PATTERN (extension)) == PARALLEL)
725 return NOT_RELEVANT;
727 set = single_set (extension);
728 if (!set)
729 return NOT_RELEVANT;
730 rhs = SET_SRC (set);
731 lhs = SET_DEST (set);
733 /* Don't handle extensions to something other then register or
734 subregister. */
735 if (!REG_P (lhs) && !SUBREG_REG (lhs))
736 return UNKNOWN;
738 if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND)
739 return UNKNOWN;
741 if (!REG_P (XEXP (rhs, 0))
742 && !(GET_CODE (XEXP (rhs, 0)) == SUBREG
743 && REG_P (SUBREG_REG (XEXP (rhs, 0)))))
744 return UNKNOWN;
746 *source_mode = GET_MODE (XEXP (rhs, 0));
748 if (GET_CODE (rhs) == SIGN_EXTEND)
749 return SIGN_EXTEND;
750 return ZERO_EXTEND;
754 /* Generate instruction with the pattern:
755 set ((reg r) (sign/zero_extend (subreg:mode (reg r))))
756 (the register r on both sides of the set is the same register).
757 And recognize it.
758 If the recognition failed, this is very bad, return NULL (This will abort
759 the entire optimization).
760 Otherwise, return the generated instruction. */
762 static rtx
763 see_gen_normalized_extension (rtx reg, enum rtx_code extension_code,
764 enum machine_mode mode)
766 rtx subreg, insn;
767 rtx extension = NULL;
769 if (!reg
770 || !REG_P (reg)
771 || (extension_code != SIGN_EXTEND && extension_code != ZERO_EXTEND))
772 return NULL;
774 subreg = gen_lowpart_SUBREG (mode, reg);
775 if (extension_code == SIGN_EXTEND)
776 extension = gen_rtx_SIGN_EXTEND (GET_MODE (reg), subreg);
777 else
778 extension = gen_rtx_ZERO_EXTEND (GET_MODE (reg), subreg);
780 start_sequence ();
781 emit_insn (gen_rtx_SET (VOIDmode, reg, extension));
782 insn = get_insns ();
783 end_sequence ();
785 if (insn_invalid_p (insn))
786 /* Recognition failed, this is very bad for this optimization.
787 Abort the optimization. */
788 return NULL;
789 return insn;
792 /* Hashes and splay_trees related functions implementation. */
794 /* Helper functions for the pre_extension hash.
795 This kind of hash will hold see_pre_extension_expr structures.
797 The key is the right hand side of the se_insn field.
798 Note that the se_insn is an expression that looks like:
800 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
801 (subreg:NARROWmode (reg:WIDEmode r2)))) */
803 /* Return TRUE if P1 has the same value in its rhs as P2.
804 Otherwise, return FALSE.
805 P1 and P2 are see_pre_extension_expr structures. */
807 static int
808 eq_descriptor_pre_extension (const void *p1, const void *p2)
810 const struct see_pre_extension_expr *extension1 = p1;
811 const struct see_pre_extension_expr *extension2 = p2;
812 rtx set1 = single_set (extension1->se_insn);
813 rtx set2 = single_set (extension2->se_insn);
814 rtx rhs1, rhs2;
816 gcc_assert (set1 && set2);
817 rhs1 = SET_SRC (set1);
818 rhs2 = SET_SRC (set2);
820 return rtx_equal_p (rhs1, rhs2);
824 /* P is a see_pre_extension_expr struct, use the RHS of the se_insn field.
825 Note that the RHS is an expression that looks like this:
826 (sign_extend:WIDEmode (subreg:NARROWmode (reg:WIDEmode r))) */
828 static hashval_t
829 hash_descriptor_pre_extension (const void *p)
831 const struct see_pre_extension_expr *extension = p;
832 rtx set = single_set (extension->se_insn);
833 rtx rhs;
835 gcc_assert (set);
836 rhs = SET_SRC (set);
838 return hash_rtx (rhs, GET_MODE (rhs), 0, NULL, 0);
842 /* Free the allocated memory of the current see_pre_extension_expr struct.
844 It frees the two linked list of the occurrences structures. */
846 static void
847 hash_del_pre_extension (void *p)
849 struct see_pre_extension_expr *extension = p;
850 struct see_occr *curr_occr = extension->antic_occr;
851 struct see_occr *next_occr = NULL;
853 /* Free the linked list of the anticipatable occurrences. */
854 while (curr_occr)
856 next_occr = curr_occr->next;
857 free (curr_occr);
858 curr_occr = next_occr;
861 /* Free the linked list of the available occurrences. */
862 curr_occr = extension->avail_occr;
863 while (curr_occr)
865 next_occr = curr_occr->next;
866 free (curr_occr);
867 curr_occr = next_occr;
870 /* Free the see_pre_extension_expr structure itself. */
871 free (extension);
875 /* Helper functions for the register_properties hash.
876 This kind of hash will hold see_register_properties structures.
878 The value of the key is the regno field of the structure. */
880 /* Return TRUE if P1 has the same value in the regno field as P2.
881 Otherwise, return FALSE.
882 Where P1 and P2 are see_register_properties structures. */
884 static int
885 eq_descriptor_properties (const void *p1, const void *p2)
887 const struct see_register_properties *curr_prop1 = p1;
888 const struct see_register_properties *curr_prop2 = p2;
890 return curr_prop1->regno == curr_prop2->regno;
894 /* P is a see_register_properties struct, use the register number in the
895 regno field. */
897 static hashval_t
898 hash_descriptor_properties (const void *p)
900 const struct see_register_properties *curr_prop = p;
901 return curr_prop->regno;
905 /* Free the allocated memory of the current see_register_properties struct. */
906 static void
907 hash_del_properties (void *p)
909 struct see_register_properties *curr_prop = p;
910 free (curr_prop);
914 /* Helper functions for an extension hash.
915 This kind of hash will hold insns that look like:
917 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
918 (subreg:NARROWmode (reg:WIDEmode r2))))
920 set ((reg:WIDEmode r1) (sign_extend:WIDEmode (reg:NARROWmode r2)))
922 The value of the key is (REGNO (reg:WIDEmode r1))
923 It is possible to search this hash in two ways:
924 1. By a register rtx. The Value that is been compared to the keys is the
925 REGNO of it.
926 2. By an insn with the above pattern. The Value that is been compared to
927 the keys is the REGNO of the reg on the lhs. */
929 /* Return TRUE if P1 has the same value as P2. Otherwise, return FALSE.
930 Where P1 is an insn and P2 is an insn or a register. */
932 static int
933 eq_descriptor_extension (const void *p1, const void *p2)
935 const_rtx const insn = (const_rtx) p1;
936 const_rtx const element = (const_rtx) p2;
937 rtx set1 = single_set (insn);
938 rtx dest_reg1;
939 rtx set2 = NULL;
940 const_rtx dest_reg2 = NULL;
942 gcc_assert (set1 && element && (REG_P (element) || INSN_P (element)));
944 dest_reg1 = SET_DEST (set1);
946 if (INSN_P (element))
948 set2 = single_set (element);
949 dest_reg2 = SET_DEST (set2);
951 else
952 dest_reg2 = element;
954 return REGNO (dest_reg1) == REGNO (dest_reg2);
958 /* If P is an insn, use the register number of its lhs
959 otherwise, P is a register, use its number. */
961 static hashval_t
962 hash_descriptor_extension (const void *p)
964 const_rtx const r = (const_rtx) p;
965 rtx set, lhs;
967 if (r && REG_P (r))
968 return REGNO (r);
970 gcc_assert (r && INSN_P (r));
971 set = single_set (r);
972 gcc_assert (set);
973 lhs = SET_DEST (set);
974 return REGNO (lhs);
978 /* Helper function for a see_bb_splay_ar[i] splay tree.
979 It frees all the allocated memory of a struct see_ref_s pointer.
981 VALUE is the value of a splay tree node. */
983 static void
984 see_free_ref_s (splay_tree_value value)
986 struct see_ref_s *ref_s = (struct see_ref_s *)value;
988 if (ref_s->unmerged_def_se_hash)
989 htab_delete (ref_s->unmerged_def_se_hash);
990 if (ref_s->merged_def_se_hash)
991 htab_delete (ref_s->merged_def_se_hash);
992 if (ref_s->use_se_hash)
993 htab_delete (ref_s->use_se_hash);
994 free (ref_s);
998 /* Rest of the implementation. */
1000 /* Search the extension hash for a suitable entry for EXTENSION.
1001 TYPE is the type of EXTENSION (USE_EXTENSION or DEF_EXTENSION).
1003 If TYPE is DEF_EXTENSION we need to normalize EXTENSION before searching the
1004 extension hash.
1006 If a suitable entry was found, return the slot. Otherwise, store EXTENSION
1007 in the hash and return NULL. */
1009 static struct see_pre_extension_expr *
1010 see_seek_pre_extension_expr (rtx extension, enum extension_type type)
1012 struct see_pre_extension_expr **slot_pre_exp, temp_pre_exp;
1013 rtx dest_extension_reg = see_get_extension_reg (extension, 1);
1014 enum rtx_code extension_code;
1015 enum machine_mode source_extension_mode;
1017 if (type == DEF_EXTENSION)
1019 extension_code = see_get_extension_data (extension,
1020 &source_extension_mode);
1021 gcc_assert (extension_code != UNKNOWN);
1022 extension =
1023 see_gen_normalized_extension (dest_extension_reg, extension_code,
1024 source_extension_mode);
1026 temp_pre_exp.se_insn = extension;
1027 slot_pre_exp =
1028 (struct see_pre_extension_expr **) htab_find_slot (see_pre_extension_hash,
1029 &temp_pre_exp, INSERT);
1030 if (*slot_pre_exp == NULL)
1031 /* This is the first time this extension instruction is encountered. Store
1032 it in the hash. */
1034 (*slot_pre_exp) = xmalloc (sizeof (struct see_pre_extension_expr));
1035 (*slot_pre_exp)->se_insn = extension;
1036 (*slot_pre_exp)->bitmap_index =
1037 (htab_elements (see_pre_extension_hash) - 1);
1038 (*slot_pre_exp)->antic_occr = NULL;
1039 (*slot_pre_exp)->avail_occr = NULL;
1040 return NULL;
1042 return *slot_pre_exp;
1046 /* This function defines how to update the extra_info of the web_entry.
1048 FIRST is the pointer of the extra_info of the first web_entry.
1049 SECOND is the pointer of the extra_info of the second web_entry.
1050 The first web_entry will be the predecessor (leader) of the second web_entry
1051 after the union.
1053 Return true if FIRST and SECOND points to the same web entry structure and
1054 nothing is done. Otherwise, return false. */
1056 static bool
1057 see_update_leader_extra_info (struct web_entry *first, struct web_entry *second)
1059 struct see_entry_extra_info *first_ei, *second_ei;
1061 first = unionfind_root (first);
1062 second = unionfind_root (second);
1064 if (unionfind_union (first, second))
1065 return true;
1067 first_ei = (struct see_entry_extra_info *) first->extra_info;
1068 second_ei = (struct see_entry_extra_info *) second->extra_info;
1070 gcc_assert (first_ei && second_ei);
1072 if (second_ei->relevancy == NOT_RELEVANT)
1074 first_ei->relevancy = NOT_RELEVANT;
1075 return false;
1077 switch (first_ei->relevancy)
1079 case NOT_RELEVANT:
1080 break;
1081 case RELEVANT_USE:
1082 switch (second_ei->relevancy)
1084 case RELEVANT_USE:
1085 break;
1086 case EXTENDED_DEF:
1087 first_ei->relevancy = second_ei->relevancy;
1088 first_ei->source_mode_signed = second_ei->source_mode_signed;
1089 first_ei->source_mode_unsigned = second_ei->source_mode_unsigned;
1090 break;
1091 case SIGN_EXTENDED_DEF:
1092 case ZERO_EXTENDED_DEF:
1093 first_ei->relevancy = second_ei->relevancy;
1094 first_ei->source_mode = second_ei->source_mode;
1095 break;
1096 default:
1097 gcc_unreachable ();
1099 break;
1100 case SIGN_EXTENDED_DEF:
1101 switch (second_ei->relevancy)
1103 case SIGN_EXTENDED_DEF:
1104 /* The mode of the root should be the wider one in this case. */
1105 first_ei->source_mode =
1106 (first_ei->source_mode > second_ei->source_mode) ?
1107 first_ei->source_mode : second_ei->source_mode;
1108 break;
1109 case RELEVANT_USE:
1110 break;
1111 case ZERO_EXTENDED_DEF:
1112 /* Don't mix webs with zero extend and sign extend. */
1113 first_ei->relevancy = NOT_RELEVANT;
1114 break;
1115 case EXTENDED_DEF:
1116 if (second_ei->source_mode_signed == MAX_MACHINE_MODE)
1117 first_ei->relevancy = NOT_RELEVANT;
1118 else
1119 /* The mode of the root should be the wider one in this case. */
1120 first_ei->source_mode =
1121 (first_ei->source_mode > second_ei->source_mode_signed) ?
1122 first_ei->source_mode : second_ei->source_mode_signed;
1123 break;
1124 default:
1125 gcc_unreachable ();
1127 break;
1128 /* This case is similar to the previous one, with little changes. */
1129 case ZERO_EXTENDED_DEF:
1130 switch (second_ei->relevancy)
1132 case SIGN_EXTENDED_DEF:
1133 /* Don't mix webs with zero extend and sign extend. */
1134 first_ei->relevancy = NOT_RELEVANT;
1135 break;
1136 case RELEVANT_USE:
1137 break;
1138 case ZERO_EXTENDED_DEF:
1139 /* The mode of the root should be the wider one in this case. */
1140 first_ei->source_mode =
1141 (first_ei->source_mode > second_ei->source_mode) ?
1142 first_ei->source_mode : second_ei->source_mode;
1143 break;
1144 case EXTENDED_DEF:
1145 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE)
1146 first_ei->relevancy = NOT_RELEVANT;
1147 else
1148 /* The mode of the root should be the wider one in this case. */
1149 first_ei->source_mode =
1150 (first_ei->source_mode > second_ei->source_mode_unsigned) ?
1151 first_ei->source_mode : second_ei->source_mode_unsigned;
1152 break;
1153 default:
1154 gcc_unreachable ();
1156 break;
1157 case EXTENDED_DEF:
1158 if (first_ei->source_mode_signed != MAX_MACHINE_MODE
1159 && first_ei->source_mode_unsigned != MAX_MACHINE_MODE)
1161 switch (second_ei->relevancy)
1163 case SIGN_EXTENDED_DEF:
1164 first_ei->relevancy = SIGN_EXTENDED_DEF;
1165 first_ei->source_mode =
1166 (first_ei->source_mode_signed > second_ei->source_mode) ?
1167 first_ei->source_mode_signed : second_ei->source_mode;
1168 break;
1169 case RELEVANT_USE:
1170 break;
1171 case ZERO_EXTENDED_DEF:
1172 first_ei->relevancy = ZERO_EXTENDED_DEF;
1173 first_ei->source_mode =
1174 (first_ei->source_mode_unsigned > second_ei->source_mode) ?
1175 first_ei->source_mode_unsigned : second_ei->source_mode;
1176 break;
1177 case EXTENDED_DEF:
1178 if (second_ei->source_mode_unsigned != MAX_MACHINE_MODE)
1179 first_ei->source_mode_unsigned =
1180 (first_ei->source_mode_unsigned >
1181 second_ei->source_mode_unsigned) ?
1182 first_ei->source_mode_unsigned :
1183 second_ei->source_mode_unsigned;
1184 if (second_ei->source_mode_signed != MAX_MACHINE_MODE)
1185 first_ei->source_mode_signed =
1186 (first_ei->source_mode_signed >
1187 second_ei->source_mode_signed) ?
1188 first_ei->source_mode_signed : second_ei->source_mode_signed;
1189 break;
1190 default:
1191 gcc_unreachable ();
1194 else if (first_ei->source_mode_signed == MAX_MACHINE_MODE)
1196 gcc_assert (first_ei->source_mode_unsigned != MAX_MACHINE_MODE);
1197 switch (second_ei->relevancy)
1199 case SIGN_EXTENDED_DEF:
1200 first_ei->relevancy = NOT_RELEVANT;
1201 break;
1202 case RELEVANT_USE:
1203 break;
1204 case ZERO_EXTENDED_DEF:
1205 first_ei->relevancy = ZERO_EXTENDED_DEF;
1206 first_ei->source_mode =
1207 (first_ei->source_mode_unsigned > second_ei->source_mode) ?
1208 first_ei->source_mode_unsigned : second_ei->source_mode;
1209 break;
1210 case EXTENDED_DEF:
1211 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE)
1212 first_ei->relevancy = NOT_RELEVANT;
1213 else
1214 first_ei->source_mode_unsigned =
1215 (first_ei->source_mode_unsigned >
1216 second_ei->source_mode_unsigned) ?
1217 first_ei->source_mode_unsigned :
1218 second_ei->source_mode_unsigned;
1219 break;
1220 default:
1221 gcc_unreachable ();
1224 else
1226 gcc_assert (first_ei->source_mode_unsigned == MAX_MACHINE_MODE);
1227 gcc_assert (first_ei->source_mode_signed != MAX_MACHINE_MODE);
1228 switch (second_ei->relevancy)
1230 case SIGN_EXTENDED_DEF:
1231 first_ei->relevancy = SIGN_EXTENDED_DEF;
1232 first_ei->source_mode =
1233 (first_ei->source_mode_signed > second_ei->source_mode) ?
1234 first_ei->source_mode_signed : second_ei->source_mode;
1235 break;
1236 case RELEVANT_USE:
1237 break;
1238 case ZERO_EXTENDED_DEF:
1239 first_ei->relevancy = NOT_RELEVANT;
1240 break;
1241 case EXTENDED_DEF:
1242 if (second_ei->source_mode_signed == MAX_MACHINE_MODE)
1243 first_ei->relevancy = NOT_RELEVANT;
1244 else
1245 first_ei->source_mode_signed =
1246 (first_ei->source_mode_signed >
1247 second_ei->source_mode_signed) ?
1248 first_ei->source_mode_signed : second_ei->source_mode_signed;
1249 break;
1250 default:
1251 gcc_unreachable ();
1254 break;
1255 default:
1256 /* Unknown patern type. */
1257 gcc_unreachable ();
1260 return false;
1264 /* Free global data structures. */
1266 static void
1267 see_free_data_structures (void)
1269 int i;
1270 unsigned int j;
1272 /* Free the bitmap vectors. */
1273 if (transp)
1275 sbitmap_vector_free (transp);
1276 transp = NULL;
1277 sbitmap_vector_free (comp);
1278 comp = NULL;
1279 sbitmap_vector_free (antloc);
1280 antloc = NULL;
1281 sbitmap_vector_free (ae_kill);
1282 ae_kill = NULL;
1284 if (pre_insert_map)
1286 sbitmap_vector_free (pre_insert_map);
1287 pre_insert_map = NULL;
1289 if (pre_delete_map)
1291 sbitmap_vector_free (pre_delete_map);
1292 pre_delete_map = NULL;
1294 if (edge_list)
1296 free_edge_list (edge_list);
1297 edge_list = NULL;
1300 /* Free the extension hash. */
1301 htab_delete (see_pre_extension_hash);
1303 /* Free the array of hashes. */
1304 for (i = 0; i < last_bb; i++)
1305 if (see_bb_hash_ar[i])
1306 htab_delete (see_bb_hash_ar[i]);
1307 free (see_bb_hash_ar);
1309 /* Free the array of splay trees. */
1310 for (i = 0; i < last_bb; i++)
1311 if (see_bb_splay_ar[i])
1312 splay_tree_delete (see_bb_splay_ar[i]);
1313 free (see_bb_splay_ar);
1315 /* Free the array of web entries and their extra info field. */
1316 for (j = 0; j < defs_num; j++)
1317 free (def_entry[j].extra_info);
1318 free (def_entry);
1319 for (j = 0; j < uses_num; j++)
1320 free (use_entry[j].extra_info);
1321 free (use_entry);
1325 /* Initialize global data structures and variables. */
1327 static void
1328 see_initialize_data_structures (void)
1330 unsigned int max_reg = max_reg_num ();
1331 unsigned int i;
1333 /* Build the df object. */
1334 df_set_flags (DF_EQ_NOTES);
1335 df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
1336 df_analyze ();
1337 df_set_flags (DF_DEFER_INSN_RESCAN);
1339 if (dump_file)
1340 df_dump (dump_file);
1342 /* Record the last basic block at the beginning of the optimization. */
1343 last_bb = last_basic_block;
1345 /* Record the number of uses and defs at the beginning of the optimization. */
1346 uses_num = 0;
1347 defs_num = 0;
1348 for (i = 0; i < max_reg; i++)
1350 uses_num += DF_REG_USE_COUNT (i) + DF_REG_EQ_USE_COUNT (i);
1351 defs_num += DF_REG_DEF_COUNT (i);
1354 /* Allocate web entries array for the union-find data structure. */
1355 def_entry = xcalloc (defs_num, sizeof (struct web_entry));
1356 use_entry = xcalloc (uses_num, sizeof (struct web_entry));
1358 /* Allocate an array of splay trees.
1359 One splay tree for each basic block. */
1360 see_bb_splay_ar = xcalloc (last_bb, sizeof (splay_tree));
1362 /* Allocate an array of hashes.
1363 One hash for each basic block. */
1364 see_bb_hash_ar = xcalloc (last_bb, sizeof (htab_t));
1366 /* Allocate the extension hash. It will hold the extensions that we want
1367 to PRE. */
1368 see_pre_extension_hash = htab_create (10,
1369 hash_descriptor_pre_extension,
1370 eq_descriptor_pre_extension,
1371 hash_del_pre_extension);
1375 /* Function called by note_uses to check if a register is used in a
1376 subexpressions.
1378 X is a pointer to the subexpression and DATA is a pointer to a
1379 see_mentioned_reg_data structure that contains the register to look for and
1380 a place for the result. */
1382 static void
1383 see_mentioned_reg (rtx *x, void *data)
1385 struct see_mentioned_reg_data *d
1386 = (struct see_mentioned_reg_data *) data;
1388 if (reg_mentioned_p (d->reg, *x))
1389 d->mentioned = true;
1393 /* We don't want to merge a use extension with a reference if the extended
1394 register is used only in a simple move instruction. We also don't want to
1395 merge a def extension with a reference if the source register of the
1396 extension is defined only in a simple move in the reference.
1398 REF is the reference instruction.
1399 EXTENSION is the use extension or def extension instruction.
1400 TYPE is the type of the extension (use or def).
1402 Return true if the reference is complicated enough, so we would like to merge
1403 it with the extension. Otherwise, return false. */
1405 static bool
1406 see_want_to_be_merged_with_extension (rtx ref, rtx extension,
1407 enum extension_type type)
1409 rtx pat;
1410 rtx dest_extension_reg = see_get_extension_reg (extension, 1);
1411 rtx source_extension_reg = see_get_extension_reg (extension, 0);
1412 enum rtx_code code;
1413 struct see_mentioned_reg_data d;
1414 int i;
1416 pat = PATTERN (ref);
1417 code = GET_CODE (pat);
1419 if (code == PARALLEL)
1421 for (i = 0; i < XVECLEN (pat, 0); i++)
1423 rtx sub = XVECEXP (pat, 0, i);
1425 if (GET_CODE (sub) == SET
1426 && (REG_P (SET_DEST (sub))
1427 || (GET_CODE (SET_DEST (sub)) == SUBREG
1428 && REG_P (SUBREG_REG (SET_DEST (sub)))))
1429 && (REG_P (SET_SRC (sub))
1430 || (GET_CODE (SET_SRC (sub)) == SUBREG
1431 && REG_P (SUBREG_REG (SET_SRC (sub))))))
1433 /* This is a simple move SET. */
1434 if (type == DEF_EXTENSION
1435 && reg_mentioned_p (source_extension_reg, SET_DEST (sub)))
1436 return false;
1438 else
1440 /* This is not a simple move SET.
1441 Check if it uses the source of the extension. */
1442 if (type == USE_EXTENSION)
1444 d.reg = dest_extension_reg;
1445 d.mentioned = false;
1446 note_uses (&sub, see_mentioned_reg, &d);
1447 if (d.mentioned)
1448 return true;
1452 if (type == USE_EXTENSION)
1453 return false;
1455 else
1457 if (code == SET
1458 && (REG_P (SET_DEST (pat))
1459 || (GET_CODE (SET_DEST (pat)) == SUBREG
1460 && REG_P (SUBREG_REG (SET_DEST (pat)))))
1461 && (REG_P (SET_SRC (pat))
1462 || (GET_CODE (SET_SRC (pat)) == SUBREG
1463 && REG_P (SUBREG_REG (SET_SRC (pat))))))
1464 /* This is a simple move SET. */
1465 return false;
1468 return true;
1472 /* Print the register number of the current see_register_properties
1473 structure.
1475 This is a subroutine of see_main called via htab_traverse.
1476 SLOT contains the current see_register_properties structure pointer. */
1478 static int
1479 see_print_register_properties (void **slot, void *b ATTRIBUTE_UNUSED)
1481 struct see_register_properties *prop = *slot;
1483 gcc_assert (prop);
1484 fprintf (dump_file, "Property found for register %d\n", prop->regno);
1485 return 1;
1489 /* Print the extension instruction of the current see_register_properties
1490 structure.
1492 This is a subroutine of see_main called via htab_traverse.
1493 SLOT contains the current see_pre_extension_expr structure pointer. */
1495 static int
1496 see_print_pre_extension_expr (void **slot, void *b ATTRIBUTE_UNUSED)
1498 struct see_pre_extension_expr *pre_extension = *slot;
1500 gcc_assert (pre_extension
1501 && pre_extension->se_insn
1502 && INSN_P (pre_extension->se_insn));
1504 fprintf (dump_file, "Index %d for:\n", pre_extension->bitmap_index);
1505 print_rtl_single (dump_file, pre_extension->se_insn);
1507 return 1;
1511 /* Phase 4 implementation: Commit changes to the insn stream. */
1513 /* Delete the merged def extension.
1515 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1517 SLOT contains the current def extension instruction.
1518 B is the see_ref_s structure pointer. */
1520 static int
1521 see_delete_merged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1523 rtx def_se = *slot;
1525 if (dump_file)
1527 fprintf (dump_file, "Deleting merged def extension:\n");
1528 print_rtl_single (dump_file, def_se);
1531 if (INSN_DELETED_P (def_se))
1532 /* This def extension is an implicit one. No need to delete it since
1533 it is not in the insn stream. */
1534 return 1;
1536 delete_insn (def_se);
1537 return 1;
1541 /* Delete the unmerged def extension.
1543 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1545 SLOT contains the current def extension instruction.
1546 B is the see_ref_s structure pointer. */
1548 static int
1549 see_delete_unmerged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1551 rtx def_se = *slot;
1553 if (dump_file)
1555 fprintf (dump_file, "Deleting unmerged def extension:\n");
1556 print_rtl_single (dump_file, def_se);
1559 delete_insn (def_se);
1560 return 1;
1564 /* Emit the non-redundant use extension to the instruction stream.
1566 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1568 SLOT contains the current use extension instruction.
1569 B is the see_ref_s structure pointer. */
1571 static int
1572 see_emit_use_extension (void **slot, void *b)
1574 rtx use_se = *slot;
1575 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1577 if (INSN_DELETED_P (use_se))
1578 /* This use extension was previously removed according to the lcm
1579 output. */
1580 return 1;
1582 if (dump_file)
1584 fprintf (dump_file, "Inserting use extension:\n");
1585 print_rtl_single (dump_file, use_se);
1588 add_insn_before (use_se, curr_ref_s->insn, NULL);
1590 return 1;
1594 /* For each relevant reference:
1595 a. Emit the non-redundant use extensions.
1596 b. Delete the def extensions.
1597 c. Replace the original reference with the merged one (if exists) and add the
1598 move instructions that were generated.
1600 This is a subroutine of see_commit_changes called via splay_tree_foreach.
1602 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
1603 see_ref_s structure. */
1605 static int
1606 see_commit_ref_changes (splay_tree_node stn,
1607 void *data ATTRIBUTE_UNUSED)
1609 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
1610 htab_t unmerged_def_se_hash =
1611 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
1612 htab_t merged_def_se_hash =
1613 ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
1614 rtx ref = ((struct see_ref_s *) (stn->value))->insn;
1615 rtx merged_ref = ((struct see_ref_s *) (stn->value))->merged_insn;
1617 /* Emit the non-redundant use extensions. */
1618 if (use_se_hash)
1619 htab_traverse_noresize (use_se_hash, see_emit_use_extension,
1620 (PTR) (stn->value));
1622 /* Delete the def extensions. */
1623 if (unmerged_def_se_hash)
1624 htab_traverse (unmerged_def_se_hash, see_delete_unmerged_def_extension,
1625 (PTR) (stn->value));
1627 if (merged_def_se_hash)
1628 htab_traverse (merged_def_se_hash, see_delete_merged_def_extension,
1629 (PTR) (stn->value));
1631 /* Replace the original reference with the merged one (if exists) and add the
1632 move instructions that were generated. */
1633 if (merged_ref && !INSN_DELETED_P (ref))
1635 if (dump_file)
1637 fprintf (dump_file, "Replacing orig reference:\n");
1638 print_rtl_single (dump_file, ref);
1639 fprintf (dump_file, "With merged reference:\n");
1640 print_rtl_single (dump_file, merged_ref);
1642 emit_insn_after (merged_ref, ref);
1643 delete_insn (ref);
1646 /* Continue to the next reference. */
1647 return 0;
1651 /* Insert partially redundant expressions on edges to make the expressions fully
1652 redundant.
1654 INDEX_MAP is a mapping of an index to an expression.
1655 Return true if an instruction was inserted on an edge.
1656 Otherwise, return false. */
1658 static bool
1659 see_pre_insert_extensions (struct see_pre_extension_expr **index_map)
1661 int num_edges = NUM_EDGES (edge_list);
1662 int set_size = pre_insert_map[0]->size;
1663 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
1665 int did_insert = 0;
1666 int e;
1667 int i;
1668 int j;
1670 for (e = 0; e < num_edges; e++)
1672 int indx;
1673 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
1675 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
1677 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
1679 for (j = indx; insert && j < (int) pre_extension_num;
1680 j++, insert >>= 1)
1681 if (insert & 1)
1683 struct see_pre_extension_expr *expr = index_map[j];
1684 int idx = expr->bitmap_index;
1685 rtx se_insn = NULL;
1686 edge eg = INDEX_EDGE (edge_list, e);
1688 start_sequence ();
1689 emit_insn (PATTERN (expr->se_insn));
1690 se_insn = get_insns ();
1691 end_sequence ();
1693 if (eg->flags & EDGE_ABNORMAL)
1695 rtx new_insn = NULL;
1697 new_insn = insert_insn_end_bb_new (se_insn, bb);
1698 gcc_assert (new_insn && INSN_P (new_insn));
1700 if (dump_file)
1702 fprintf (dump_file,
1703 "PRE: end of bb %d, insn %d, ",
1704 bb->index, INSN_UID (new_insn));
1705 fprintf (dump_file,
1706 "inserting expression %d\n", idx);
1709 else
1711 insert_insn_on_edge (se_insn, eg);
1713 if (dump_file)
1715 fprintf (dump_file, "PRE: edge (%d,%d), ",
1716 bb->index,
1717 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
1718 fprintf (dump_file, "inserting expression %d\n", idx);
1721 did_insert = true;
1725 return did_insert;
1729 /* Since all the redundant extensions must be anticipatable, they must be a use
1730 extensions. Mark them as deleted. This will prevent them from been emitted
1731 in the first place.
1733 This is a subroutine of see_commit_changes called via htab_traverse.
1735 SLOT contains the current see_pre_extension_expr structure pointer. */
1737 static int
1738 see_pre_delete_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1740 struct see_pre_extension_expr *expr = *slot;
1741 struct see_occr *occr;
1742 int indx = expr->bitmap_index;
1744 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1746 if (TEST_BIT (pre_delete_map[occr->block_num], indx))
1748 /* Mark as deleted. */
1749 INSN_DELETED_P (occr->insn) = 1;
1750 if (dump_file)
1752 fprintf (dump_file,"Redundant extension deleted:\n");
1753 print_rtl_single (dump_file, occr->insn);
1757 return 1;
1761 /* Create the index_map mapping of an index to an expression.
1763 This is a subroutine of see_commit_changes called via htab_traverse.
1765 SLOT contains the current see_pre_extension_expr structure pointer.
1766 B a pointer to see_pre_extension_expr structure pointer. */
1768 static int
1769 see_map_extension (void **slot, void *b)
1771 struct see_pre_extension_expr *expr = *slot;
1772 struct see_pre_extension_expr **index_map =
1773 (struct see_pre_extension_expr **) b;
1775 index_map[expr->bitmap_index] = expr;
1777 return 1;
1781 /* Phase 4 top level function.
1782 In this phase we finally change the instruction stream.
1783 Here we insert extensions at their best placements and delete the
1784 redundant ones according to the output of the LCM. We also replace
1785 some of the instructions according to phase 2 merges results. */
1787 static void
1788 see_commit_changes (void)
1790 struct see_pre_extension_expr **index_map;
1791 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
1792 bool did_insert = false;
1793 int i;
1795 index_map = xcalloc (pre_extension_num,
1796 sizeof (struct see_pre_extension_expr *));
1798 if (dump_file)
1799 fprintf (dump_file,
1800 "* Phase 4: Commit changes to the insn stream. *\n");
1802 /* Produce a mapping of all the pre_extensions. */
1803 htab_traverse (see_pre_extension_hash, see_map_extension, (PTR) index_map);
1805 /* Delete redundant extension. This will prevent them from been emitted in
1806 the first place. */
1807 htab_traverse (see_pre_extension_hash, see_pre_delete_extension, NULL);
1809 /* Insert extensions on edges, according to the LCM result. */
1810 did_insert = see_pre_insert_extensions (index_map);
1812 if (did_insert)
1813 commit_edge_insertions ();
1815 /* Commit the rest of the changes. */
1816 for (i = 0; i < last_bb; i++)
1818 if (see_bb_splay_ar[i])
1820 /* Traverse over all the references in the basic block in forward
1821 order. */
1822 splay_tree_foreach (see_bb_splay_ar[i],
1823 see_commit_ref_changes, NULL);
1827 free (index_map);
1831 /* Phase 3 implementation: Eliminate globally redundant extensions. */
1833 /* Analyze the properties of a merged def extension for the LCM and record avail
1834 occurrences.
1836 This is a subroutine of see_analyze_ref_local_prop called
1837 via htab_traverse.
1839 SLOT contains the current def extension instruction.
1840 B is the see_ref_s structure pointer. */
1842 static int
1843 see_analyze_merged_def_local_prop (void **slot, void *b)
1845 rtx def_se = *slot;
1846 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1847 rtx ref = curr_ref_s->insn;
1848 struct see_pre_extension_expr *extension_expr;
1849 int indx;
1850 int bb_num = BLOCK_NUM (ref);
1851 htab_t curr_bb_hash;
1852 struct see_register_properties *curr_prop, **slot_prop;
1853 struct see_register_properties temp_prop;
1854 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
1855 struct see_occr *curr_occr = NULL;
1856 struct see_occr *tmp_occr = NULL;
1858 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
1859 /* The extension_expr must be found. */
1860 gcc_assert (extension_expr);
1862 curr_bb_hash = see_bb_hash_ar[bb_num];
1863 gcc_assert (curr_bb_hash);
1864 temp_prop.regno = REGNO (dest_extension_reg);
1865 slot_prop =
1866 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1867 &temp_prop, INSERT);
1868 curr_prop = *slot_prop;
1869 gcc_assert (curr_prop);
1871 indx = extension_expr->bitmap_index;
1873 /* Reset the transparency bit. */
1874 RESET_BIT (transp[bb_num], indx);
1875 /* Reset the killed bit. */
1876 RESET_BIT (ae_kill[bb_num], indx);
1878 if (curr_prop->first_se_after_last_def == DF_INSN_LUID (ref))
1880 /* Set the available bit. */
1881 SET_BIT (comp[bb_num], indx);
1882 /* Record the available occurrence. */
1883 curr_occr = xmalloc (sizeof (struct see_occr));
1884 curr_occr->next = NULL;
1885 curr_occr->insn = def_se;
1886 curr_occr->block_num = bb_num;
1887 tmp_occr = extension_expr->avail_occr;
1888 if (!tmp_occr)
1889 extension_expr->avail_occr = curr_occr;
1890 else
1892 while (tmp_occr->next)
1893 tmp_occr = tmp_occr->next;
1894 tmp_occr->next = curr_occr;
1898 return 1;
1902 /* Analyze the properties of a unmerged def extension for the LCM.
1904 This is a subroutine of see_analyze_ref_local_prop called
1905 via htab_traverse.
1907 SLOT contains the current def extension instruction.
1908 B is the see_ref_s structure pointer. */
1910 static int
1911 see_analyze_unmerged_def_local_prop (void **slot, void *b)
1913 rtx def_se = *slot;
1914 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1915 rtx ref = curr_ref_s->insn;
1916 struct see_pre_extension_expr *extension_expr;
1917 int indx;
1918 int bb_num = BLOCK_NUM (ref);
1919 htab_t curr_bb_hash;
1920 struct see_register_properties *curr_prop, **slot_prop;
1921 struct see_register_properties temp_prop;
1922 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
1924 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
1925 /* The extension_expr must be found. */
1926 gcc_assert (extension_expr);
1928 curr_bb_hash = see_bb_hash_ar[bb_num];
1929 gcc_assert (curr_bb_hash);
1930 temp_prop.regno = REGNO (dest_extension_reg);
1931 slot_prop =
1932 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1933 &temp_prop, INSERT);
1934 curr_prop = *slot_prop;
1935 gcc_assert (curr_prop);
1937 indx = extension_expr->bitmap_index;
1939 /* Reset the transparency bit. */
1940 RESET_BIT (transp[bb_num], indx);
1941 /* Set the killed bit. */
1942 SET_BIT (ae_kill[bb_num], indx);
1944 return 1;
1948 /* Analyze the properties of a use extension for the LCM and record anic and
1949 avail occurrences.
1951 This is a subroutine of see_analyze_ref_local_prop called
1952 via htab_traverse.
1954 SLOT contains the current use extension instruction.
1955 B is the see_ref_s structure pointer. */
1957 static int
1958 see_analyze_use_local_prop (void **slot, void *b)
1960 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1961 rtx use_se = *slot;
1962 rtx ref = curr_ref_s->insn;
1963 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
1964 struct see_pre_extension_expr *extension_expr;
1965 struct see_register_properties *curr_prop, **slot_prop;
1966 struct see_register_properties temp_prop;
1967 struct see_occr *curr_occr = NULL;
1968 struct see_occr *tmp_occr = NULL;
1969 htab_t curr_bb_hash;
1970 int indx;
1971 int bb_num = BLOCK_NUM (ref);
1973 extension_expr = see_seek_pre_extension_expr (use_se, USE_EXTENSION);
1974 /* The extension_expr must be found. */
1975 gcc_assert (extension_expr);
1977 curr_bb_hash = see_bb_hash_ar[bb_num];
1978 gcc_assert (curr_bb_hash);
1979 temp_prop.regno = REGNO (dest_extension_reg);
1980 slot_prop =
1981 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1982 &temp_prop, INSERT);
1983 curr_prop = *slot_prop;
1984 gcc_assert (curr_prop);
1986 indx = extension_expr->bitmap_index;
1988 if (curr_prop->first_se_before_any_def == DF_INSN_LUID (ref))
1990 /* Set the anticipatable bit. */
1991 SET_BIT (antloc[bb_num], indx);
1992 /* Record the anticipatable occurrence. */
1993 curr_occr = xmalloc (sizeof (struct see_occr));
1994 curr_occr->next = NULL;
1995 curr_occr->insn = use_se;
1996 curr_occr->block_num = bb_num;
1997 tmp_occr = extension_expr->antic_occr;
1998 if (!tmp_occr)
1999 extension_expr->antic_occr = curr_occr;
2000 else
2002 while (tmp_occr->next)
2003 tmp_occr = tmp_occr->next;
2004 tmp_occr->next = curr_occr;
2006 if (curr_prop->last_def < 0)
2008 /* Set the available bit. */
2009 SET_BIT (comp[bb_num], indx);
2010 /* Record the available occurrence. */
2011 curr_occr = xmalloc (sizeof (struct see_occr));
2012 curr_occr->next = NULL;
2013 curr_occr->insn = use_se;
2014 curr_occr->block_num = bb_num;
2015 tmp_occr = extension_expr->avail_occr;
2016 if (!tmp_occr)
2017 extension_expr->avail_occr = curr_occr;
2018 else
2020 while (tmp_occr->next)
2021 tmp_occr = tmp_occr->next;
2022 tmp_occr->next = curr_occr;
2025 /* Note: there is no need to reset the killed bit since it must be zero at
2026 this point. */
2028 else if (curr_prop->first_se_after_last_def == DF_INSN_LUID (ref))
2030 /* Set the available bit. */
2031 SET_BIT (comp[bb_num], indx);
2032 /* Reset the killed bit. */
2033 RESET_BIT (ae_kill[bb_num], indx);
2034 /* Record the available occurrence. */
2035 curr_occr = xmalloc (sizeof (struct see_occr));
2036 curr_occr->next = NULL;
2037 curr_occr->insn = use_se;
2038 curr_occr->block_num = bb_num;
2039 tmp_occr = extension_expr->avail_occr;
2040 if (!tmp_occr)
2041 extension_expr->avail_occr = curr_occr;
2042 else
2044 while (tmp_occr->next)
2045 tmp_occr = tmp_occr->next;
2046 tmp_occr->next = curr_occr;
2049 return 1;
2053 /* Here we traverse over all the merged and unmerged extensions of the reference
2054 and analyze their properties for the LCM.
2056 This is a subroutine of see_execute_LCM called via splay_tree_foreach.
2058 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2059 see_ref_s structure. */
2061 static int
2062 see_analyze_ref_local_prop (splay_tree_node stn,
2063 void *data ATTRIBUTE_UNUSED)
2065 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
2066 htab_t unmerged_def_se_hash =
2067 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
2068 htab_t merged_def_se_hash =
2069 ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
2071 /* Analyze use extensions that were not merged with the reference. */
2072 if (use_se_hash)
2073 htab_traverse_noresize (use_se_hash, see_analyze_use_local_prop,
2074 (PTR) (stn->value));
2076 /* Analyze def extensions that were not merged with the reference. */
2077 if (unmerged_def_se_hash)
2078 htab_traverse (unmerged_def_se_hash, see_analyze_unmerged_def_local_prop,
2079 (PTR) (stn->value));
2081 /* Analyze def extensions that were merged with the reference. */
2082 if (merged_def_se_hash)
2083 htab_traverse (merged_def_se_hash, see_analyze_merged_def_local_prop,
2084 (PTR) (stn->value));
2086 /* Continue to the next definition. */
2087 return 0;
2091 /* Phase 3 top level function.
2092 In this phase, we set the input bit vectors of the LCM according to data
2093 gathered in phase 2.
2094 Then we run the edge based LCM. */
2096 static void
2097 see_execute_LCM (void)
2099 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
2100 int i = 0;
2102 if (dump_file)
2103 fprintf (dump_file,
2104 "* Phase 3: Eliminate globally redundant extensions. *\n");
2106 /* Initialize the global sbitmap vectors. */
2107 transp = sbitmap_vector_alloc (last_bb, pre_extension_num);
2108 comp = sbitmap_vector_alloc (last_bb, pre_extension_num);
2109 antloc = sbitmap_vector_alloc (last_bb, pre_extension_num);
2110 ae_kill = sbitmap_vector_alloc (last_bb, pre_extension_num);
2111 sbitmap_vector_ones (transp, last_bb);
2112 sbitmap_vector_zero (comp, last_bb);
2113 sbitmap_vector_zero (antloc, last_bb);
2114 sbitmap_vector_zero (ae_kill, last_bb);
2116 /* Traverse over all the splay trees of the basic blocks. */
2117 for (i = 0; i < last_bb; i++)
2119 if (see_bb_splay_ar[i])
2121 /* Traverse over all the references in the basic block in forward
2122 order. */
2123 splay_tree_foreach (see_bb_splay_ar[i],
2124 see_analyze_ref_local_prop, NULL);
2128 /* Add fake exit edges before running the lcm. */
2129 add_noreturn_fake_exit_edges ();
2131 /* Run the LCM. */
2132 edge_list = pre_edge_lcm (pre_extension_num, transp, comp, antloc,
2133 ae_kill, &pre_insert_map, &pre_delete_map);
2135 /* Remove the fake edges. */
2136 remove_fake_exit_edges ();
2140 /* Phase 2 implementation: Merge and eliminate locally redundant extensions. */
2142 /* In this function we set the register properties for the register that is
2143 defined and extended in the reference.
2144 The properties are defined in see_register_properties structure which is
2145 allocated per basic block and per register.
2146 Later the extension is inserted into the see_pre_extension_hash for the next
2147 phase of the optimization.
2149 This is a subroutine of see_handle_extensions_for_one_ref called
2150 via htab_traverse.
2152 SLOT contains the current def extension instruction.
2153 B is the see_ref_s structure pointer. */
2155 static int
2156 see_set_prop_merged_def (void **slot, void *b)
2158 rtx def_se = *slot;
2159 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2160 rtx insn = curr_ref_s->insn;
2161 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2162 htab_t curr_bb_hash;
2163 struct see_register_properties *curr_prop = NULL;
2164 struct see_register_properties **slot_prop;
2165 struct see_register_properties temp_prop;
2166 int ref_luid = DF_INSN_LUID (insn);
2168 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2169 if (!curr_bb_hash)
2171 /* The hash doesn't exist yet. Create it. */
2172 curr_bb_hash = htab_create (10,
2173 hash_descriptor_properties,
2174 eq_descriptor_properties,
2175 hash_del_properties);
2176 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2179 /* Find the right register properties in the right basic block. */
2180 temp_prop.regno = REGNO (dest_extension_reg);
2181 slot_prop =
2182 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2183 &temp_prop, INSERT);
2185 if (slot_prop && *slot_prop != NULL)
2187 /* Property already exists. */
2188 curr_prop = *slot_prop;
2189 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2191 curr_prop->last_def = ref_luid;
2192 curr_prop->first_se_after_last_def = ref_luid;
2194 else
2196 /* Property doesn't exist yet. */
2197 curr_prop = xmalloc (sizeof (struct see_register_properties));
2198 curr_prop->regno = REGNO (dest_extension_reg);
2199 curr_prop->last_def = ref_luid;
2200 curr_prop->first_se_before_any_def = -1;
2201 curr_prop->first_se_after_last_def = ref_luid;
2202 *slot_prop = curr_prop;
2205 /* Insert the def_se into see_pre_extension_hash if it isn't already
2206 there. */
2207 see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
2209 return 1;
2213 /* In this function we set the register properties for the register that is
2214 defined but not extended in the reference.
2215 The properties are defined in see_register_properties structure which is
2216 allocated per basic block and per register.
2217 Later the extension is inserted into the see_pre_extension_hash for the next
2218 phase of the optimization.
2220 This is a subroutine of see_handle_extensions_for_one_ref called
2221 via htab_traverse.
2223 SLOT contains the current def extension instruction.
2224 B is the see_ref_s structure pointer. */
2226 static int
2227 see_set_prop_unmerged_def (void **slot, void *b)
2229 rtx def_se = *slot;
2230 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2231 rtx insn = curr_ref_s->insn;
2232 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2233 htab_t curr_bb_hash;
2234 struct see_register_properties *curr_prop = NULL;
2235 struct see_register_properties **slot_prop;
2236 struct see_register_properties temp_prop;
2237 int ref_luid = DF_INSN_LUID (insn);
2239 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2240 if (!curr_bb_hash)
2242 /* The hash doesn't exist yet. Create it. */
2243 curr_bb_hash = htab_create (10,
2244 hash_descriptor_properties,
2245 eq_descriptor_properties,
2246 hash_del_properties);
2247 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2250 /* Find the right register properties in the right basic block. */
2251 temp_prop.regno = REGNO (dest_extension_reg);
2252 slot_prop =
2253 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2254 &temp_prop, INSERT);
2256 if (slot_prop && *slot_prop != NULL)
2258 /* Property already exists. */
2259 curr_prop = *slot_prop;
2260 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2262 curr_prop->last_def = ref_luid;
2263 curr_prop->first_se_after_last_def = -1;
2265 else
2267 /* Property doesn't exist yet. */
2268 curr_prop = xmalloc (sizeof (struct see_register_properties));
2269 curr_prop->regno = REGNO (dest_extension_reg);
2270 curr_prop->last_def = ref_luid;
2271 curr_prop->first_se_before_any_def = -1;
2272 curr_prop->first_se_after_last_def = -1;
2273 *slot_prop = curr_prop;
2276 /* Insert the def_se into see_pre_extension_hash if it isn't already
2277 there. */
2278 see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
2280 return 1;
2284 /* In this function we set the register properties for the register that is used
2285 in the reference.
2286 The properties are defined in see_register_properties structure which is
2287 allocated per basic block and per register.
2288 When a redundant use extension is found it is removed from the hash of the
2289 reference.
2290 If the extension is non redundant it is inserted into the
2291 see_pre_extension_hash for the next phase of the optimization.
2293 This is a subroutine of see_handle_extensions_for_one_ref called
2294 via htab_traverse.
2296 SLOT contains the current use extension instruction.
2297 B is the see_ref_s structure pointer. */
2299 static int
2300 see_set_prop_unmerged_use (void **slot, void *b)
2302 rtx use_se = *slot;
2303 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2304 rtx insn = curr_ref_s->insn;
2305 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
2306 htab_t curr_bb_hash;
2307 struct see_register_properties *curr_prop = NULL;
2308 struct see_register_properties **slot_prop;
2309 struct see_register_properties temp_prop;
2310 bool locally_redundant = false;
2311 int ref_luid = DF_INSN_LUID (insn);
2313 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2314 if (!curr_bb_hash)
2316 /* The hash doesn't exist yet. Create it. */
2317 curr_bb_hash = htab_create (10,
2318 hash_descriptor_properties,
2319 eq_descriptor_properties,
2320 hash_del_properties);
2321 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2324 /* Find the right register properties in the right basic block. */
2325 temp_prop.regno = REGNO (dest_extension_reg);
2326 slot_prop =
2327 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2328 &temp_prop, INSERT);
2330 if (slot_prop && *slot_prop != NULL)
2332 /* Property already exists. */
2333 curr_prop = *slot_prop;
2334 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2337 if (curr_prop->last_def < 0 && curr_prop->first_se_before_any_def < 0)
2338 curr_prop->first_se_before_any_def = ref_luid;
2339 else if (curr_prop->last_def < 0
2340 && curr_prop->first_se_before_any_def >= 0)
2342 /* In this case the extension is locally redundant. */
2343 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2344 locally_redundant = true;
2346 else if (curr_prop->last_def >= 0
2347 && curr_prop->first_se_after_last_def < 0)
2348 curr_prop->first_se_after_last_def = ref_luid;
2349 else if (curr_prop->last_def >= 0
2350 && curr_prop->first_se_after_last_def >= 0)
2352 /* In this case the extension is locally redundant. */
2353 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2354 locally_redundant = true;
2356 else
2357 gcc_unreachable ();
2359 else
2361 /* Property doesn't exist yet. Create a new one. */
2362 curr_prop = xmalloc (sizeof (struct see_register_properties));
2363 curr_prop->regno = REGNO (dest_extension_reg);
2364 curr_prop->last_def = -1;
2365 curr_prop->first_se_before_any_def = ref_luid;
2366 curr_prop->first_se_after_last_def = -1;
2367 *slot_prop = curr_prop;
2370 /* Insert the use_se into see_pre_extension_hash if it isn't already
2371 there. */
2372 if (!locally_redundant)
2373 see_seek_pre_extension_expr (use_se, USE_EXTENSION);
2374 if (locally_redundant && dump_file)
2376 fprintf (dump_file, "Locally redundant extension:\n");
2377 print_rtl_single (dump_file, use_se);
2379 return 1;
2383 /* Print an extension instruction.
2385 This is a subroutine of see_handle_extensions_for_one_ref called
2386 via htab_traverse.
2387 SLOT contains the extension instruction. */
2389 static int
2390 see_print_one_extension (void **slot, void *b ATTRIBUTE_UNUSED)
2392 rtx def_se = *slot;
2394 gcc_assert (def_se && INSN_P (def_se));
2395 print_rtl_single (dump_file, def_se);
2397 return 1;
2400 /* Function called by note_uses to replace used subexpressions.
2402 X is a pointer to the subexpression and DATA is a pointer to a
2403 see_replace_data structure that contains the data for the replacement. */
2405 static void
2406 see_replace_src (rtx *x, void *data)
2408 struct see_replace_data *d
2409 = (struct see_replace_data *) data;
2411 *x = replace_rtx (*x, d->from, d->to);
2415 static rtx
2416 see_copy_insn (rtx insn)
2418 rtx pat = copy_insn (PATTERN (insn)), ret;
2420 if (NONJUMP_INSN_P (insn))
2421 ret = make_insn_raw (pat);
2422 else if (JUMP_P (insn))
2423 ret = make_jump_insn_raw (pat);
2424 else if (CALL_P (insn))
2426 start_sequence ();
2427 ret = emit_call_insn (pat);
2428 end_sequence ();
2429 if (CALL_INSN_FUNCTION_USAGE (insn))
2430 CALL_INSN_FUNCTION_USAGE (ret)
2431 = copy_rtx (CALL_INSN_FUNCTION_USAGE (insn));
2432 SIBLING_CALL_P (ret) = SIBLING_CALL_P (insn);
2433 RTL_CONST_CALL_P (ret) = RTL_CONST_CALL_P (insn);
2434 RTL_PURE_CALL_P (ret) = RTL_PURE_CALL_P (insn);
2435 RTL_LOOPING_CONST_OR_PURE_CALL_P (ret)
2436 = RTL_LOOPING_CONST_OR_PURE_CALL_P (insn);
2438 else
2439 gcc_unreachable ();
2440 if (REG_NOTES (insn))
2441 REG_NOTES (ret) = copy_rtx (REG_NOTES (insn));
2442 INSN_LOCATOR (ret) = INSN_LOCATOR (insn);
2443 RTX_FRAME_RELATED_P (ret) = RTX_FRAME_RELATED_P (insn);
2444 PREV_INSN (ret) = NULL_RTX;
2445 NEXT_INSN (ret) = NULL_RTX;
2446 return ret;
2450 /* At this point the pattern is expected to be:
2452 ref: set (dest_reg) (rhs)
2453 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2455 The merge of these two instructions didn't succeed.
2457 We try to generate the pattern:
2458 set (subreg (dest_extension_reg)) (rhs)
2460 We do this in 4 steps:
2461 a. Replace every use of dest_reg with a new pseudo register.
2462 b. Replace every instance of dest_reg with the subreg.
2463 c. Replace every use of the new pseudo register back to dest_reg.
2464 d. Try to recognize and simplify.
2466 If the manipulation failed, leave the original ref but try to generate and
2467 recognize a simple move instruction:
2468 set (subreg (dest_extension_reg)) (dest_reg)
2469 This move instruction will be emitted right after the ref to the instruction
2470 stream and assure the correctness of the code after def_se will be removed.
2472 CURR_REF_S is the current reference.
2473 DEF_SE is the extension that couldn't be merged. */
2475 static void
2476 see_def_extension_not_merged (struct see_ref_s *curr_ref_s, rtx def_se)
2478 struct see_replace_data d;
2479 /* If the original insn was already merged with an extension before,
2480 take the merged one. */
2481 rtx ref = curr_ref_s->merged_insn
2482 ? curr_ref_s->merged_insn : curr_ref_s->insn;
2483 rtx merged_ref_next = curr_ref_s->merged_insn
2484 ? NEXT_INSN (curr_ref_s->merged_insn) : NULL_RTX;
2485 rtx ref_copy = see_copy_insn (ref);
2486 rtx source_extension_reg = see_get_extension_reg (def_se, 0);
2487 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2488 rtx set, rhs;
2489 rtx dest_reg, dest_real_reg;
2490 rtx new_pseudo_reg, subreg;
2491 enum machine_mode source_extension_mode = GET_MODE (source_extension_reg);
2492 enum machine_mode dest_mode;
2494 set = single_set (def_se);
2495 gcc_assert (set);
2496 rhs = SET_SRC (set);
2497 gcc_assert (GET_CODE (rhs) == SIGN_EXTEND
2498 || GET_CODE (rhs) == ZERO_EXTEND);
2499 dest_reg = XEXP (rhs, 0);
2500 gcc_assert (REG_P (dest_reg)
2501 || (GET_CODE (dest_reg) == SUBREG
2502 && REG_P (SUBREG_REG (dest_reg))));
2503 dest_real_reg = REG_P (dest_reg) ? dest_reg : SUBREG_REG (dest_reg);
2504 dest_mode = GET_MODE (dest_reg);
2506 subreg = gen_lowpart_SUBREG (dest_mode, dest_extension_reg);
2507 new_pseudo_reg = gen_reg_rtx (source_extension_mode);
2509 /* Step a: Replace every use of dest_real_reg with a new pseudo register. */
2510 d.from = dest_real_reg;
2511 d.to = new_pseudo_reg;
2512 note_uses (&PATTERN (ref_copy), see_replace_src, &d);
2513 /* Step b: Replace every instance of dest_reg with the subreg. */
2514 ref_copy = replace_rtx (ref_copy, dest_reg, copy_rtx (subreg));
2516 /* Step c: Replace every use of the new pseudo register back to
2517 dest_real_reg. */
2518 d.from = new_pseudo_reg;
2519 d.to = dest_real_reg;
2520 note_uses (&PATTERN (ref_copy), see_replace_src, &d);
2522 if (rtx_equal_p (PATTERN (ref), PATTERN (ref_copy))
2523 || insn_invalid_p (ref_copy))
2525 /* The manipulation failed. */
2526 df_insn_delete (NULL, INSN_UID (ref_copy));
2528 /* Create a new copy. */
2529 ref_copy = see_copy_insn (ref);
2531 if (curr_ref_s->merged_insn)
2532 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2534 /* Create a simple move instruction that will replace the def_se. */
2535 start_sequence ();
2536 emit_insn (ref_copy);
2537 emit_move_insn (subreg, dest_reg);
2538 if (merged_ref_next != NULL_RTX)
2539 emit_insn (merged_ref_next);
2540 curr_ref_s->merged_insn = get_insns ();
2541 end_sequence ();
2543 if (dump_file)
2545 fprintf (dump_file, "Following def merge failure a move ");
2546 fprintf (dump_file, "insn was added after the ref.\n");
2547 fprintf (dump_file, "Original ref:\n");
2548 print_rtl_single (dump_file, ref);
2549 fprintf (dump_file, "Move insn that was added:\n");
2550 print_rtl_single (dump_file, NEXT_INSN (curr_ref_s->merged_insn));
2552 return;
2555 /* The manipulation succeeded. Store the new manipulated reference. */
2557 /* Try to simplify the new manipulated insn. */
2558 validate_simplify_insn (ref_copy);
2560 if (curr_ref_s->merged_insn)
2561 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2563 /* Create a simple move instruction to assure the correctness of the code. */
2564 start_sequence ();
2565 emit_insn (ref_copy);
2566 emit_move_insn (dest_reg, subreg);
2567 if (merged_ref_next != NULL_RTX)
2568 emit_insn (merged_ref_next);
2569 curr_ref_s->merged_insn = get_insns ();
2570 end_sequence ();
2572 if (dump_file)
2574 fprintf (dump_file, "Following merge failure the ref was transformed!\n");
2575 fprintf (dump_file, "Original ref:\n");
2576 print_rtl_single (dump_file, ref);
2577 fprintf (dump_file, "Transformed ref:\n");
2578 print_rtl_single (dump_file, curr_ref_s->merged_insn);
2579 fprintf (dump_file, "Move insn that was added:\n");
2580 print_rtl_single (dump_file, NEXT_INSN (curr_ref_s->merged_insn));
2585 /* Merge the reference instruction (ref) with the current use extension.
2587 use_se extends a NARROWmode register to a WIDEmode register.
2588 ref uses the WIDEmode register.
2590 The pattern we try to merge is this:
2591 use_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2592 ref: use (dest_extension_reg)
2594 where dest_extension_reg and source_extension_reg can be subregs.
2596 The merge is done by generating, simplifying and recognizing the pattern:
2597 use (sign/zero_extend (source_extension_reg))
2599 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2600 we don't try to merge it with use_se and we continue as if the merge failed.
2602 This is a subroutine of see_handle_extensions_for_one_ref called
2603 via htab_traverse.
2604 SLOT contains the current use extension instruction.
2605 B is the see_ref_s structure pointer. */
2607 static int
2608 see_merge_one_use_extension (void **slot, void *b)
2610 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2611 rtx use_se = *slot;
2612 rtx ref = curr_ref_s->merged_insn
2613 ? curr_ref_s->merged_insn : curr_ref_s->insn;
2614 rtx merged_ref_next = curr_ref_s->merged_insn
2615 ? NEXT_INSN (curr_ref_s->merged_insn) : NULL_RTX;
2616 rtx ref_copy = see_copy_insn (ref);
2617 rtx extension_set = single_set (use_se);
2618 rtx extension_rhs = NULL;
2619 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
2620 rtx note = NULL;
2621 rtx simplified_note = NULL;
2623 gcc_assert (use_se && curr_ref_s && extension_set);
2625 extension_rhs = SET_SRC (extension_set);
2627 /* In REG_EQUIV and REG_EQUAL notes that mention the register we need to
2628 replace the uses of the dest_extension_reg with the rhs of the extension
2629 instruction. This is necessary since there might not be an extension in
2630 the path between the definition and the note when this optimization is
2631 over. */
2632 note = find_reg_equal_equiv_note (ref_copy);
2633 if (note)
2635 simplified_note = simplify_replace_rtx (XEXP (note, 0),
2636 dest_extension_reg,
2637 extension_rhs);
2638 if (rtx_equal_p (XEXP (note, 0), simplified_note))
2639 /* Replacement failed. Remove the note. */
2640 remove_note (ref_copy, note);
2641 else
2642 set_unique_reg_note (ref_copy, REG_NOTE_KIND (note),
2643 simplified_note);
2646 if (!see_want_to_be_merged_with_extension (ref, use_se, USE_EXTENSION))
2648 /* The use in the reference is too simple. Don't try to merge. */
2649 if (dump_file)
2651 fprintf (dump_file, "Use merge skipped!\n");
2652 fprintf (dump_file, "Original instructions:\n");
2653 print_rtl_single (dump_file, use_se);
2654 print_rtl_single (dump_file, ref);
2656 df_insn_delete (NULL, INSN_UID (ref_copy));
2657 /* Don't remove the current use_se from the use_se_hash and continue to
2658 the next extension. */
2659 return 1;
2662 validate_replace_src_group (dest_extension_reg, extension_rhs, ref_copy);
2664 if (!num_changes_pending ())
2665 /* In this case this is not a real use (the only use is/was in the notes
2666 list). Remove the use extension from the hash. This will prevent it
2667 from been emitted in the first place. */
2669 if (dump_file)
2671 fprintf (dump_file, "Use extension not necessary before:\n");
2672 print_rtl_single (dump_file, ref);
2674 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2676 if (curr_ref_s->merged_insn)
2677 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2679 if (merged_ref_next != NULL_RTX)
2681 start_sequence ();
2682 emit_insn (ref_copy);
2683 emit_insn (merged_ref_next);
2684 curr_ref_s->merged_insn = get_insns ();
2685 end_sequence ();
2687 else
2688 curr_ref_s->merged_insn = ref_copy;
2689 return 1;
2692 if (!apply_change_group ())
2694 /* The merge failed. */
2695 if (dump_file)
2697 fprintf (dump_file, "Use merge failed!\n");
2698 fprintf (dump_file, "Original instructions:\n");
2699 print_rtl_single (dump_file, use_se);
2700 print_rtl_single (dump_file, ref);
2702 df_insn_delete (NULL, INSN_UID (ref_copy));
2703 /* Don't remove the current use_se from the use_se_hash and continue to
2704 the next extension. */
2705 return 1;
2708 /* The merge succeeded! */
2710 /* Try to simplify the new merged insn. */
2711 validate_simplify_insn (ref_copy);
2713 if (curr_ref_s->merged_insn)
2714 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2716 if (merged_ref_next != NULL_RTX)
2718 start_sequence ();
2719 emit_insn (ref_copy);
2720 emit_insn (merged_ref_next);
2721 curr_ref_s->merged_insn = get_insns ();
2722 end_sequence ();
2724 else
2725 curr_ref_s->merged_insn = ref_copy;
2727 if (dump_file)
2729 fprintf (dump_file, "Use merge succeeded!\n");
2730 fprintf (dump_file, "Original instructions:\n");
2731 print_rtl_single (dump_file, use_se);
2732 print_rtl_single (dump_file, ref);
2733 fprintf (dump_file, "Merged instruction:\n");
2734 print_rtl_single (dump_file, curr_ref_s->merged_insn);
2737 /* Remove the current use_se from the use_se_hash. This will prevent it from
2738 been emitted in the first place. */
2739 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2740 return 1;
2744 /* Merge the reference instruction (ref) with the extension that follows it
2745 in the same basic block (def_se).
2746 ref sets a NARROWmode register and def_se extends it to WIDEmode register.
2748 The pattern we try to merge is this:
2749 ref: set (dest_reg) (rhs)
2750 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2752 where dest_reg and source_extension_reg can both be subregs (together)
2753 and (REGNO (dest_reg) == REGNO (source_extension_reg))
2755 The merge is done by generating, simplifying and recognizing the pattern:
2756 set (dest_extension_reg) (sign/zero_extend (rhs))
2757 If ref is a parallel instruction we just replace the relevant set in it.
2759 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2760 we don't try to merge it with def_se and we continue as if the merge failed.
2762 This is a subroutine of see_handle_extensions_for_one_ref called
2763 via htab_traverse.
2765 SLOT contains the current def extension instruction.
2766 B is the see_ref_s structure pointer. */
2768 static int
2769 see_merge_one_def_extension (void **slot, void *b)
2771 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2772 rtx def_se = *slot;
2773 /* If the original insn was already merged with an extension before,
2774 take the merged one. */
2775 rtx ref = curr_ref_s->merged_insn
2776 ? curr_ref_s->merged_insn : curr_ref_s->insn;
2777 rtx merged_ref_next = curr_ref_s->merged_insn
2778 ? NEXT_INSN (curr_ref_s->merged_insn) : NULL_RTX;
2779 rtx ref_copy = see_copy_insn (ref);
2780 rtx new_set = NULL;
2781 rtx source_extension_reg = see_get_extension_reg (def_se, 0);
2782 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2783 rtx *rtx_slot, subreg;
2784 rtx temp_extension = NULL;
2785 rtx simplified_temp_extension = NULL;
2786 rtx *pat;
2787 enum rtx_code code;
2788 enum rtx_code extension_code;
2789 enum machine_mode source_extension_mode;
2790 enum machine_mode source_mode;
2791 enum machine_mode dest_extension_mode;
2792 bool merge_success = false;
2793 int i;
2795 gcc_assert (def_se
2796 && INSN_P (def_se)
2797 && curr_ref_s
2798 && ref
2799 && INSN_P (ref));
2801 if (!see_want_to_be_merged_with_extension (ref, def_se, DEF_EXTENSION))
2803 /* The definition in the reference is too simple. Don't try to merge. */
2804 if (dump_file)
2806 fprintf (dump_file, "Def merge skipped!\n");
2807 fprintf (dump_file, "Original instructions:\n");
2808 print_rtl_single (dump_file, ref);
2809 print_rtl_single (dump_file, def_se);
2812 df_insn_delete (NULL, INSN_UID (ref_copy));
2813 see_def_extension_not_merged (curr_ref_s, def_se);
2814 /* Continue to the next extension. */
2815 return 1;
2818 extension_code = see_get_extension_data (def_se, &source_mode);
2820 /* Try to merge and simplify the extension. */
2821 source_extension_mode = GET_MODE (source_extension_reg);
2822 dest_extension_mode = GET_MODE (dest_extension_reg);
2824 pat = &PATTERN (ref_copy);
2825 code = GET_CODE (*pat);
2827 if (code == PARALLEL)
2829 bool need_to_apply_change = false;
2831 for (i = 0; i < XVECLEN (*pat, 0); i++)
2833 rtx *sub = &XVECEXP (*pat, 0, i);
2835 if (GET_CODE (*sub) == SET
2836 && GET_MODE (SET_SRC (*sub)) != VOIDmode
2837 && GET_MODE (SET_DEST (*sub)) == source_mode
2838 && ((REG_P (SET_DEST (*sub))
2839 && REGNO (SET_DEST (*sub)) == REGNO (source_extension_reg))
2840 || (GET_CODE (SET_DEST (*sub)) == SUBREG
2841 && REG_P (SUBREG_REG (SET_DEST (*sub)))
2842 && (REGNO (SUBREG_REG (SET_DEST (*sub))) ==
2843 REGNO (source_extension_reg)))))
2845 rtx orig_src = SET_SRC (*sub);
2847 if (extension_code == SIGN_EXTEND)
2848 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode,
2849 orig_src);
2850 else
2851 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode,
2852 orig_src);
2853 simplified_temp_extension = simplify_rtx (temp_extension);
2854 temp_extension =
2855 (simplified_temp_extension) ? simplified_temp_extension :
2856 temp_extension;
2857 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg,
2858 temp_extension);
2859 validate_change (ref_copy, sub, new_set, 1);
2860 need_to_apply_change = true;
2863 if (need_to_apply_change)
2864 if (apply_change_group ())
2865 merge_success = true;
2867 else if (code == SET
2868 && GET_MODE (SET_SRC (*pat)) != VOIDmode
2869 && GET_MODE (SET_DEST (*pat)) == source_mode
2870 && ((REG_P (SET_DEST (*pat))
2871 && REGNO (SET_DEST (*pat)) == REGNO (source_extension_reg))
2872 || (GET_CODE (SET_DEST (*pat)) == SUBREG
2873 && REG_P (SUBREG_REG (SET_DEST (*pat)))
2874 && (REGNO (SUBREG_REG (SET_DEST (*pat))) ==
2875 REGNO (source_extension_reg)))))
2877 rtx orig_src = SET_SRC (*pat);
2879 if (extension_code == SIGN_EXTEND)
2880 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode, orig_src);
2881 else
2882 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode, orig_src);
2883 simplified_temp_extension = simplify_rtx (temp_extension);
2884 temp_extension = (simplified_temp_extension) ? simplified_temp_extension :
2885 temp_extension;
2886 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg, temp_extension);
2887 if (validate_change (ref_copy, pat, new_set, 0))
2888 merge_success = true;
2890 if (!merge_success)
2892 /* The merge failed. */
2893 if (dump_file)
2895 fprintf (dump_file, "Def merge failed!\n");
2896 fprintf (dump_file, "Original instructions:\n");
2897 print_rtl_single (dump_file, ref);
2898 print_rtl_single (dump_file, def_se);
2901 df_insn_delete (NULL, INSN_UID (ref_copy));
2902 see_def_extension_not_merged (curr_ref_s, def_se);
2903 /* Continue to the next extension. */
2904 return 1;
2907 /* The merge succeeded! */
2908 if (curr_ref_s->merged_insn)
2909 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2911 /* Create a simple move instruction to assure the correctness of the code. */
2912 subreg = gen_lowpart_SUBREG (source_extension_mode, dest_extension_reg);
2913 start_sequence ();
2914 emit_insn (ref_copy);
2915 emit_move_insn (source_extension_reg, subreg);
2916 if (merged_ref_next != NULL_RTX)
2917 emit_insn (merged_ref_next);
2918 curr_ref_s->merged_insn = get_insns ();
2919 end_sequence ();
2921 if (dump_file)
2923 fprintf (dump_file, "Def merge succeeded!\n");
2924 fprintf (dump_file, "Original instructions:\n");
2925 print_rtl_single (dump_file, ref);
2926 print_rtl_single (dump_file, def_se);
2927 fprintf (dump_file, "Merged instruction:\n");
2928 print_rtl_single (dump_file, curr_ref_s->merged_insn);
2929 fprintf (dump_file, "Move instruction that was added:\n");
2930 print_rtl_single (dump_file, NEXT_INSN (curr_ref_s->merged_insn));
2933 /* Remove the current def_se from the unmerged_def_se_hash and insert it to
2934 the merged_def_se_hash. */
2935 htab_clear_slot (curr_ref_s->unmerged_def_se_hash, (PTR *)slot);
2936 if (!curr_ref_s->merged_def_se_hash)
2937 curr_ref_s->merged_def_se_hash = htab_create (10,
2938 hash_descriptor_extension,
2939 eq_descriptor_extension,
2940 NULL);
2941 rtx_slot = (rtx *) htab_find_slot (curr_ref_s->merged_def_se_hash,
2942 dest_extension_reg, INSERT);
2943 gcc_assert (*rtx_slot == NULL);
2944 *rtx_slot = def_se;
2946 return 1;
2950 /* Try to eliminate extensions in this order:
2951 a. Try to merge only the def extensions, one by one.
2952 b. Try to merge only the use extensions, one by one.
2954 TODO:
2955 Try to merge any couple of use extensions simultaneously.
2956 Try to merge any def extension with one or two uses extensions
2957 simultaneously.
2959 After all the merges are done, update the register properties for the basic
2960 block and eliminate locally redundant use extensions.
2962 This is a subroutine of see_merge_and_eliminate_extensions called
2963 via splay_tree_foreach.
2964 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2965 see_ref_s structure. */
2967 static int
2968 see_handle_extensions_for_one_ref (splay_tree_node stn,
2969 void *data ATTRIBUTE_UNUSED)
2971 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
2972 htab_t unmerged_def_se_hash =
2973 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
2974 htab_t merged_def_se_hash;
2975 rtx ref = ((struct see_ref_s *) (stn->value))->insn;
2977 if (dump_file)
2979 fprintf (dump_file, "Handling ref:\n");
2980 print_rtl_single (dump_file, ref);
2983 /* a. Try to eliminate only def extensions, one by one. */
2984 if (unmerged_def_se_hash)
2985 htab_traverse_noresize (unmerged_def_se_hash, see_merge_one_def_extension,
2986 (PTR) (stn->value));
2988 if (use_se_hash)
2989 /* b. Try to eliminate only use extensions, one by one. */
2990 htab_traverse_noresize (use_se_hash, see_merge_one_use_extension,
2991 (PTR) (stn->value));
2993 merged_def_se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
2995 if (dump_file)
2997 fprintf (dump_file, "The hashes of the current reference:\n");
2998 if (unmerged_def_se_hash)
3000 fprintf (dump_file, "unmerged_def_se_hash:\n");
3001 htab_traverse (unmerged_def_se_hash, see_print_one_extension, NULL);
3003 if (merged_def_se_hash)
3005 fprintf (dump_file, "merged_def_se_hash:\n");
3006 htab_traverse (merged_def_se_hash, see_print_one_extension, NULL);
3008 if (use_se_hash)
3010 fprintf (dump_file, "use_se_hash:\n");
3011 htab_traverse (use_se_hash, see_print_one_extension, NULL);
3015 /* Now that all the merges are done, update the register properties of the
3016 basic block and eliminate locally redundant extensions.
3017 It is important that we first traverse the use extensions hash and
3018 afterwards the def extensions hashes. */
3020 if (use_se_hash)
3021 htab_traverse_noresize (use_se_hash, see_set_prop_unmerged_use,
3022 (PTR) (stn->value));
3024 if (unmerged_def_se_hash)
3025 htab_traverse (unmerged_def_se_hash, see_set_prop_unmerged_def,
3026 (PTR) (stn->value));
3028 if (merged_def_se_hash)
3029 htab_traverse (merged_def_se_hash, see_set_prop_merged_def,
3030 (PTR) (stn->value));
3032 /* Continue to the next definition. */
3033 return 0;
3037 /* Phase 2 top level function.
3038 In this phase, we try to merge def extensions and use extensions with their
3039 references, and eliminate redundant extensions in the same basic block.
3040 We also gather information for the next phases. */
3042 static void
3043 see_merge_and_eliminate_extensions (void)
3045 int i = 0;
3047 if (dump_file)
3048 fprintf (dump_file,
3049 "* Phase 2: Merge and eliminate locally redundant extensions. *\n");
3051 /* Traverse over all the splay trees of the basic blocks. */
3052 for (i = 0; i < last_bb; i++)
3054 if (see_bb_splay_ar[i])
3056 if (dump_file)
3057 fprintf (dump_file, "Handling references for bb %d\n", i);
3058 /* Traverse over all the references in the basic block in forward
3059 order. */
3060 splay_tree_foreach (see_bb_splay_ar[i],
3061 see_handle_extensions_for_one_ref, NULL);
3067 /* Phase 1 implementation: Propagate extensions to uses. */
3069 /* Insert REF_INSN into the splay tree of its basic block.
3070 SE_INSN is the extension to store in the proper hash according to TYPE.
3072 Return true if everything went well.
3073 Otherwise, return false (this will cause the optimization to be aborted). */
3075 static bool
3076 see_store_reference_and_extension (rtx ref_insn, rtx se_insn,
3077 enum extension_type type)
3079 rtx *rtx_slot;
3080 int curr_bb_num;
3081 splay_tree_node stn = NULL;
3082 htab_t se_hash = NULL;
3083 struct see_ref_s *ref_s = NULL;
3085 /* Check the arguments. */
3086 gcc_assert (ref_insn && se_insn);
3087 if (!see_bb_splay_ar)
3088 return false;
3090 curr_bb_num = BLOCK_NUM (ref_insn);
3091 gcc_assert (curr_bb_num < last_bb && curr_bb_num >= 0);
3093 /* Insert the reference to the splay tree of its basic block. */
3094 if (!see_bb_splay_ar[curr_bb_num])
3095 /* The splay tree for this block doesn't exist yet, create it. */
3096 see_bb_splay_ar[curr_bb_num] = splay_tree_new (splay_tree_compare_ints,
3097 NULL, see_free_ref_s);
3098 else
3099 /* Splay tree already exists, check if the current reference is already
3100 in it. */
3102 stn = splay_tree_lookup (see_bb_splay_ar[curr_bb_num],
3103 DF_INSN_LUID (ref_insn));
3104 if (stn)
3105 switch (type)
3107 case EXPLICIT_DEF_EXTENSION:
3108 se_hash =
3109 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
3110 if (!se_hash)
3112 se_hash = htab_create (10,
3113 hash_descriptor_extension,
3114 eq_descriptor_extension,
3115 NULL);
3116 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash =
3117 se_hash;
3119 break;
3120 case IMPLICIT_DEF_EXTENSION:
3121 se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
3122 if (!se_hash)
3124 se_hash = htab_create (10,
3125 hash_descriptor_extension,
3126 eq_descriptor_extension,
3127 NULL);
3128 ((struct see_ref_s *) (stn->value))->merged_def_se_hash =
3129 se_hash;
3131 break;
3132 case USE_EXTENSION:
3133 se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
3134 if (!se_hash)
3136 se_hash = htab_create (10,
3137 hash_descriptor_extension,
3138 eq_descriptor_extension,
3139 NULL);
3140 ((struct see_ref_s *) (stn->value))->use_se_hash = se_hash;
3142 break;
3143 default:
3144 gcc_unreachable ();
3148 /* Initialize a new see_ref_s structure and insert it to the splay
3149 tree. */
3150 if (!stn)
3152 ref_s = xmalloc (sizeof (struct see_ref_s));
3153 ref_s->luid = DF_INSN_LUID (ref_insn);
3154 ref_s->insn = ref_insn;
3155 ref_s->merged_insn = NULL;
3157 /* Initialize the hashes. */
3158 switch (type)
3160 case EXPLICIT_DEF_EXTENSION:
3161 ref_s->unmerged_def_se_hash = htab_create (10,
3162 hash_descriptor_extension,
3163 eq_descriptor_extension,
3164 NULL);
3165 se_hash = ref_s->unmerged_def_se_hash;
3166 ref_s->merged_def_se_hash = NULL;
3167 ref_s->use_se_hash = NULL;
3168 break;
3169 case IMPLICIT_DEF_EXTENSION:
3170 ref_s->merged_def_se_hash = htab_create (10,
3171 hash_descriptor_extension,
3172 eq_descriptor_extension,
3173 NULL);
3174 se_hash = ref_s->merged_def_se_hash;
3175 ref_s->unmerged_def_se_hash = NULL;
3176 ref_s->use_se_hash = NULL;
3177 break;
3178 case USE_EXTENSION:
3179 ref_s->use_se_hash = htab_create (10,
3180 hash_descriptor_extension,
3181 eq_descriptor_extension,
3182 NULL);
3183 se_hash = ref_s->use_se_hash;
3184 ref_s->unmerged_def_se_hash = NULL;
3185 ref_s->merged_def_se_hash = NULL;
3186 break;
3187 default:
3188 gcc_unreachable ();
3192 /* Insert the new extension instruction into the correct se_hash of the
3193 current reference. */
3194 rtx_slot = (rtx *) htab_find_slot (se_hash, se_insn, INSERT);
3195 if (*rtx_slot != NULL)
3197 gcc_assert (type == USE_EXTENSION);
3198 gcc_assert (rtx_equal_p (PATTERN (*rtx_slot), PATTERN (se_insn)));
3200 else
3201 *rtx_slot = se_insn;
3203 /* If this is a new reference, insert it into the splay_tree. */
3204 if (!stn)
3205 splay_tree_insert (see_bb_splay_ar[curr_bb_num],
3206 DF_INSN_LUID (ref_insn), (splay_tree_value) ref_s);
3207 return true;
3211 /* Go over all the defs, for each relevant definition (defined below) store its
3212 instruction as a reference.
3214 A definition is relevant if its root has
3215 ((entry_type == SIGN_EXTENDED_DEF) || (entry_type == ZERO_EXTENDED_DEF)) and
3216 his source_mode is not narrower then the roots source_mode.
3218 Return the number of relevant defs or negative number if something bad had
3219 happened and the optimization should be aborted. */
3221 static int
3222 see_handle_relevant_defs (struct df_ref *ref, rtx insn)
3224 struct web_entry *root_entry = NULL;
3225 rtx se_insn = NULL;
3226 enum rtx_code extension_code;
3227 rtx reg = DF_REF_REAL_REG (ref);
3228 rtx ref_insn = NULL;
3229 unsigned int i = DF_REF_ID (ref);
3231 root_entry = unionfind_root (&def_entry[DF_REF_ID (ref)]);
3233 if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF
3234 && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF)
3235 /* The current web is not relevant. Continue to the next def. */
3236 return 0;
3238 if (root_entry->reg)
3239 /* It isn't possible to have two different register for the same
3240 web. */
3241 gcc_assert (rtx_equal_p (root_entry->reg, reg));
3242 else
3243 root_entry->reg = reg;
3245 /* The current definition is an EXTENDED_DEF or a definition that its
3246 source_mode is narrower then its web's source_mode.
3247 This means that we need to generate the implicit extension explicitly
3248 and store it in the current reference's merged_def_se_hash. */
3249 if (ENTRY_EI (&def_entry[i])->local_relevancy == EXTENDED_DEF
3250 || (ENTRY_EI (&def_entry[i])->local_source_mode <
3251 ENTRY_EI (root_entry)->source_mode))
3254 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF)
3255 extension_code = SIGN_EXTEND;
3256 else
3257 extension_code = ZERO_EXTEND;
3259 se_insn =
3260 see_gen_normalized_extension (reg, extension_code,
3261 ENTRY_EI (root_entry)->source_mode);
3263 /* This is a dummy extension, mark it as deleted. */
3264 INSN_DELETED_P (se_insn) = 1;
3266 if (!see_store_reference_and_extension (insn, se_insn,
3267 IMPLICIT_DEF_EXTENSION))
3268 /* Something bad happened. Abort the optimization. */
3269 return -1;
3270 return 1;
3273 ref_insn = PREV_INSN (insn);
3274 gcc_assert (BLOCK_NUM (ref_insn) == BLOCK_NUM (insn));
3276 if (!see_store_reference_and_extension (ref_insn, insn,
3277 EXPLICIT_DEF_EXTENSION))
3278 /* Something bad happened. Abort the optimization. */
3279 return -1;
3281 return 0;
3284 /* Go over all the uses, for each use in relevant web store its instruction as
3285 a reference and generate an extension before it.
3287 Return the number of relevant uses or negative number if something bad had
3288 happened and the optimization should be aborted. */
3290 static int
3291 see_handle_relevant_uses (struct df_ref *ref, rtx insn)
3293 struct web_entry *root_entry = NULL;
3294 rtx se_insn = NULL;
3295 enum rtx_code extension_code;
3296 rtx reg = DF_REF_REAL_REG (ref);
3298 root_entry = unionfind_root (&use_entry[DF_REF_ID (ref)]);
3300 if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF
3301 && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF)
3302 /* The current web is not relevant. Continue to the next use. */
3303 return 0;
3305 if (root_entry->reg)
3306 /* It isn't possible to have two different register for the same
3307 web. */
3308 gcc_assert (rtx_equal_p (root_entry->reg, reg));
3309 else
3310 root_entry->reg = reg;
3312 /* Generate the use extension. */
3313 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF)
3314 extension_code = SIGN_EXTEND;
3315 else
3316 extension_code = ZERO_EXTEND;
3318 se_insn =
3319 see_gen_normalized_extension (reg, extension_code,
3320 ENTRY_EI (root_entry)->source_mode);
3321 if (!se_insn)
3322 /* This is very bad, abort the transformation. */
3323 return -1;
3325 if (!see_store_reference_and_extension (insn, se_insn,
3326 USE_EXTENSION))
3327 /* Something bad happened. Abort the optimization. */
3328 return -1;
3329 return 1;
3332 static int
3333 see_handle_relevant_refs (void)
3335 int num_relevant_refs = 0;
3336 basic_block bb;
3338 FOR_ALL_BB (bb)
3340 rtx insn;
3341 FOR_BB_INSNS (bb, insn)
3343 unsigned int uid = INSN_UID (insn);
3345 if (INSN_P (insn))
3347 struct df_ref **use_rec;
3348 struct df_ref **def_rec;
3350 for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
3352 struct df_ref *use = *use_rec;
3353 int result = see_handle_relevant_uses (use, insn);
3354 if (result == -1)
3355 return -1;
3356 num_relevant_refs += result;
3358 for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++)
3360 struct df_ref *use = *use_rec;
3361 int result = see_handle_relevant_uses (use, insn);
3362 if (result == -1)
3363 return -1;
3364 num_relevant_refs += result;
3366 for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
3368 struct df_ref *def = *def_rec;
3369 int result = see_handle_relevant_defs (def, insn);
3370 if (result == -1)
3371 return -1;
3372 num_relevant_refs += result;
3377 return num_relevant_refs;
3381 /* Initialized the use_entry field for REF in INSN at INDEX with ET. */
3383 static void
3384 see_update_uses_relevancy (rtx insn, struct df_ref *ref,
3385 enum entry_type et, unsigned int index)
3387 struct see_entry_extra_info *curr_entry_extra_info;
3389 if (dump_file)
3391 rtx reg = DF_REF_REAL_REG (ref);
3392 fprintf (dump_file, "u%i insn %i reg %i ",
3393 index, (insn ? INSN_UID (insn) : -1), REGNO (reg));
3394 if (et == NOT_RELEVANT)
3395 fprintf (dump_file, "NOT RELEVANT \n");
3396 else
3397 fprintf (dump_file, "RELEVANT USE \n");
3400 DF_REF_ID (ref) = index;
3401 curr_entry_extra_info = xmalloc (sizeof (struct see_entry_extra_info));
3402 curr_entry_extra_info->relevancy = et;
3403 curr_entry_extra_info->local_relevancy = et;
3404 use_entry[index].extra_info = curr_entry_extra_info;
3405 use_entry[index].reg = NULL;
3406 use_entry[index].pred = NULL;
3410 /* A definition in a candidate for this optimization only if its pattern is
3411 recognized as relevant in this function.
3412 INSN is the instruction to be recognized.
3414 - If this is the pattern of a common sign extension after definition:
3415 PREV_INSN (INSN): def (reg:NARROWmode r)
3416 INSN: set ((reg:WIDEmode r')
3417 (sign_extend:WIDEmode (reg:NARROWmode r)))
3418 return SIGN_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3420 - If this is the pattern of a common zero extension after definition:
3421 PREV_INSN (INSN): def (reg:NARROWmode r)
3422 INSN: set ((reg:WIDEmode r')
3423 (zero_extend:WIDEmode (reg:NARROWmode r)))
3424 return ZERO_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3426 - Otherwise,
3428 For the pattern:
3429 INSN: set ((reg:WIDEmode r) (sign_extend:WIDEmode (...expr...)))
3430 return EXTENDED_DEF and set SOURCE_MODE to the mode of expr.
3432 For the pattern:
3433 INSN: set ((reg:WIDEmode r) (zero_extend:WIDEmode (...expr...)))
3434 return EXTENDED_DEF and set SOURCE_MODE_UNSIGNED to the mode of expr.
3436 For the pattern:
3437 INSN: set ((reg:WIDEmode r) (CONST_INT (...)))
3438 return EXTENDED_DEF and set SOURCE_MODE(_UNSIGNED) to the narrowest mode that
3439 is implicitly sign(zero) extended to WIDEmode in the INSN.
3441 - FIXME: Extensions that are not adjacent to their definition and EXTENDED_DEF
3442 that is part of a PARALLEL instruction are not handled.
3443 These restriction can be relaxed. */
3445 static enum entry_type
3446 see_analyze_one_def (rtx insn, enum machine_mode *source_mode,
3447 enum machine_mode *source_mode_unsigned)
3449 enum rtx_code extension_code;
3450 rtx rhs = NULL;
3451 rtx lhs = NULL;
3452 rtx set = NULL;
3453 rtx source_register = NULL;
3454 rtx prev_insn = NULL;
3455 rtx next_insn = NULL;
3456 enum machine_mode mode;
3457 enum machine_mode next_source_mode;
3458 HOST_WIDE_INT val = 0;
3459 HOST_WIDE_INT val2 = 0;
3460 int i = 0;
3462 *source_mode = MAX_MACHINE_MODE;
3463 *source_mode_unsigned = MAX_MACHINE_MODE;
3465 extension_code = see_get_extension_data (insn, source_mode);
3466 switch (extension_code)
3468 case SIGN_EXTEND:
3469 case ZERO_EXTEND:
3470 source_register = see_get_extension_reg (insn, 0);
3471 /* FIXME: This restriction can be relaxed. The only thing that is
3472 important is that the reference would be inside the same basic block
3473 as the extension. */
3474 prev_insn = PREV_INSN (insn);
3475 if (!prev_insn || !INSN_P (prev_insn))
3476 return NOT_RELEVANT;
3478 if (!reg_set_between_p (source_register, PREV_INSN (prev_insn), insn))
3479 return NOT_RELEVANT;
3481 if (find_reg_note (prev_insn, REG_LIBCALL, NULL_RTX))
3482 return NOT_RELEVANT;
3484 if (find_reg_note (prev_insn, REG_RETVAL, NULL_RTX))
3485 return NOT_RELEVANT;
3487 /* If we can't use copy_rtx on the reference it can't be a reference. */
3488 if (GET_CODE (PATTERN (prev_insn)) == PARALLEL
3489 && asm_noperands (PATTERN (prev_insn)) >= 0)
3490 return NOT_RELEVANT;
3492 /* Now, check if this extension is a reference itself. If so, it is not
3493 relevant. Handling this extension as relevant would make things much
3494 more complicated. */
3495 next_insn = NEXT_INSN (insn);
3496 if (next_insn
3497 && INSN_P (next_insn)
3498 && (see_get_extension_data (next_insn, &next_source_mode) !=
3499 NOT_RELEVANT))
3501 rtx curr_dest_register = see_get_extension_reg (insn, 1);
3502 rtx next_source_register = see_get_extension_reg (next_insn, 0);
3504 if (REGNO (curr_dest_register) == REGNO (next_source_register))
3505 return NOT_RELEVANT;
3508 if (extension_code == SIGN_EXTEND)
3509 return SIGN_EXTENDED_DEF;
3510 else
3511 return ZERO_EXTENDED_DEF;
3513 case UNKNOWN:
3514 /* This may still be an EXTENDED_DEF. */
3516 /* FIXME: This restriction can be relaxed. It is possible to handle
3517 PARALLEL insns too. */
3518 set = single_set (insn);
3519 if (!set)
3520 return NOT_RELEVANT;
3521 rhs = SET_SRC (set);
3522 lhs = SET_DEST (set);
3524 /* Don't handle extensions to something other then register or
3525 subregister. */
3526 if (!REG_P (lhs) && !SUBREG_REG (lhs))
3527 return NOT_RELEVANT;
3529 switch (GET_CODE (rhs))
3531 case SIGN_EXTEND:
3532 *source_mode = GET_MODE (XEXP (rhs, 0));
3533 *source_mode_unsigned = MAX_MACHINE_MODE;
3534 return EXTENDED_DEF;
3535 case ZERO_EXTEND:
3536 *source_mode = MAX_MACHINE_MODE;
3537 *source_mode_unsigned = GET_MODE (XEXP (rhs, 0));
3538 return EXTENDED_DEF;
3539 case CONST_INT:
3541 val = INTVAL (rhs);
3543 /* Find the narrowest mode, val could fit into. */
3544 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT), i = 0;
3545 GET_MODE_BITSIZE (mode) < BITS_PER_WORD;
3546 mode = GET_MODE_WIDER_MODE (mode), i++)
3548 val2 = trunc_int_for_mode (val, mode);
3549 if (val2 == val && *source_mode == MAX_MACHINE_MODE)
3550 *source_mode = mode;
3551 if (val == (val & (HOST_WIDE_INT)GET_MODE_MASK (mode))
3552 && *source_mode_unsigned == MAX_MACHINE_MODE)
3553 *source_mode_unsigned = mode;
3554 if (*source_mode != MAX_MACHINE_MODE
3555 && *source_mode_unsigned !=MAX_MACHINE_MODE)
3556 return EXTENDED_DEF;
3558 if (*source_mode != MAX_MACHINE_MODE
3559 || *source_mode_unsigned !=MAX_MACHINE_MODE)
3560 return EXTENDED_DEF;
3561 return NOT_RELEVANT;
3562 default:
3563 return NOT_RELEVANT;
3565 default:
3566 gcc_unreachable ();
3571 /* Initialized the def_entry field for REF in INSN at INDEX with ET. */
3573 static void
3574 see_update_defs_relevancy (rtx insn, struct df_ref *ref,
3575 enum entry_type et,
3576 enum machine_mode source_mode,
3577 enum machine_mode source_mode_unsigned,
3578 unsigned int index)
3580 struct see_entry_extra_info *curr_entry_extra_info
3581 = xmalloc (sizeof (struct see_entry_extra_info));
3582 curr_entry_extra_info->relevancy = et;
3583 curr_entry_extra_info->local_relevancy = et;
3585 DF_REF_ID (ref) = index;
3587 if (et != EXTENDED_DEF)
3589 curr_entry_extra_info->source_mode = source_mode;
3590 curr_entry_extra_info->local_source_mode = source_mode;
3592 else
3594 curr_entry_extra_info->source_mode_signed = source_mode;
3595 curr_entry_extra_info->source_mode_unsigned = source_mode_unsigned;
3597 def_entry[index].extra_info = curr_entry_extra_info;
3598 def_entry[index].reg = NULL;
3599 def_entry[index].pred = NULL;
3601 if (dump_file)
3603 rtx reg = DF_REF_REAL_REG (ref);
3604 if (et == NOT_RELEVANT)
3606 fprintf (dump_file, "d%i insn %i reg %i ",
3607 index, (insn ? INSN_UID (insn) : -1), REGNO (reg));
3608 fprintf (dump_file, "NOT RELEVANT \n");
3610 else
3612 fprintf (dump_file, "d%i insn %i reg %i ",
3613 index, INSN_UID (insn), REGNO (reg));
3614 fprintf (dump_file, "RELEVANT - ");
3615 switch (et)
3617 case SIGN_EXTENDED_DEF :
3618 fprintf (dump_file, "SIGN_EXTENDED_DEF, source_mode = %s\n",
3619 GET_MODE_NAME (source_mode));
3620 break;
3621 case ZERO_EXTENDED_DEF :
3622 fprintf (dump_file, "ZERO_EXTENDED_DEF, source_mode = %s\n",
3623 GET_MODE_NAME (source_mode));
3624 break;
3625 case EXTENDED_DEF :
3626 fprintf (dump_file, "EXTENDED_DEF, ");
3627 if (source_mode != MAX_MACHINE_MODE
3628 && source_mode_unsigned != MAX_MACHINE_MODE)
3630 fprintf (dump_file, "positive const, ");
3631 fprintf (dump_file, "source_mode_signed = %s, ",
3632 GET_MODE_NAME (source_mode));
3633 fprintf (dump_file, "source_mode_unsigned = %s\n",
3634 GET_MODE_NAME (source_mode_unsigned));
3636 else if (source_mode != MAX_MACHINE_MODE)
3637 fprintf (dump_file, "source_mode_signed = %s\n",
3638 GET_MODE_NAME (source_mode));
3639 else
3640 fprintf (dump_file, "source_mode_unsigned = %s\n",
3641 GET_MODE_NAME (source_mode_unsigned));
3642 break;
3643 default :
3644 gcc_unreachable ();
3651 /* Updates the relevancy of all the uses and all defs.
3653 The information of the u'th use is stored in use_entry[u] and the
3654 information of the d'th definition is stored in def_entry[d].
3656 Currently all the uses are relevant for the optimization except for
3657 uses that are in LIBCALL or RETVAL instructions. */
3659 static void
3660 see_update_relevancy (void)
3662 unsigned int d = 0;
3663 unsigned int u = 0;
3664 enum entry_type et;
3665 enum machine_mode source_mode;
3666 enum machine_mode source_mode_unsigned;
3667 basic_block bb;
3669 if (!def_entry)
3670 return;
3672 FOR_ALL_BB (bb)
3674 struct df_ref **use_rec;
3675 struct df_ref **def_rec;
3676 rtx insn;
3677 FOR_BB_INSNS (bb, insn)
3679 unsigned int uid = INSN_UID (insn);
3680 if (INSN_P (insn))
3682 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX)
3683 || find_reg_note (insn, REG_RETVAL, NULL_RTX))
3684 et = NOT_RELEVANT;
3685 else
3686 et = RELEVANT_USE;
3688 for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
3690 struct df_ref *use = *use_rec;
3691 see_update_uses_relevancy (insn, use, et, u);
3692 u++;
3695 for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++)
3697 struct df_ref *use = *use_rec;
3698 see_update_uses_relevancy (insn, use, et, u);
3699 u++;
3702 et = see_analyze_one_def (insn, &source_mode, &source_mode_unsigned);
3703 for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
3705 struct df_ref *def = *def_rec;
3706 see_update_defs_relevancy (insn, def, et, source_mode,
3707 source_mode_unsigned, d);
3708 d++;
3713 for (use_rec = df_get_artificial_uses (bb->index); *use_rec; use_rec++)
3715 struct df_ref *use = *use_rec;
3716 see_update_uses_relevancy (NULL, use, NOT_RELEVANT, u);
3717 u++;
3720 for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
3722 struct df_ref *def = *def_rec;
3723 see_update_defs_relevancy (NULL, def, NOT_RELEVANT,
3724 MAX_MACHINE_MODE, MAX_MACHINE_MODE, d);
3725 d++;
3731 /* Phase 1 top level function.
3732 In this phase the relevancy of all the definitions and uses are checked,
3733 later the webs are produces and the extensions are generated.
3734 These extensions are not emitted yet into the insns stream.
3736 returns true if at list one relevant web was found and there were no
3737 problems, otherwise return false. */
3739 static bool
3740 see_propagate_extensions_to_uses (void)
3742 int num_relevant_refs;
3743 basic_block bb;
3745 if (dump_file)
3746 fprintf (dump_file,
3747 "* Phase 1: Propagate extensions to uses. *\n");
3749 /* Update the relevancy of references using the DF object. */
3750 see_update_relevancy ();
3752 /* Produce the webs and update the extra_info of the root.
3753 In general, a web is relevant if all its definitions and uses are relevant
3754 and there is at least one definition that was marked as SIGN_EXTENDED_DEF
3755 or ZERO_EXTENDED_DEF. */
3756 FOR_ALL_BB (bb)
3758 rtx insn;
3759 struct df_ref **use_rec;
3761 FOR_BB_INSNS (bb, insn)
3763 unsigned int uid = INSN_UID (insn);
3764 if (INSN_P (insn))
3766 for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
3768 struct df_ref *use = *use_rec;
3769 union_defs (use, def_entry, use_entry, see_update_leader_extra_info);
3772 for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++)
3774 struct df_ref *use = *use_rec;
3775 union_defs (use, def_entry, use_entry, see_update_leader_extra_info);
3780 for (use_rec = df_get_artificial_uses (bb->index); *use_rec; use_rec++)
3782 struct df_ref *use = *use_rec;
3783 union_defs (use, def_entry, use_entry, see_update_leader_extra_info);
3787 /* Generate use extensions for references and insert these
3788 references to see_bb_splay_ar data structure. */
3789 num_relevant_refs = see_handle_relevant_refs ();
3791 return num_relevant_refs > 0;
3795 /* Main entry point for the sign extension elimination optimization. */
3797 static void
3798 see_main (void)
3800 bool cont = false;
3801 int i = 0;
3803 /* Initialize global data structures. */
3804 see_initialize_data_structures ();
3806 /* Phase 1: Propagate extensions to uses. */
3807 cont = see_propagate_extensions_to_uses ();
3809 if (cont)
3811 init_recog ();
3813 /* Phase 2: Merge and eliminate locally redundant extensions. */
3814 see_merge_and_eliminate_extensions ();
3816 /* Phase 3: Eliminate globally redundant extensions. */
3817 see_execute_LCM ();
3819 /* Phase 4: Commit changes to the insn stream. */
3820 see_commit_changes ();
3822 if (dump_file)
3824 /* For debug purpose only. */
3825 fprintf (dump_file, "see_pre_extension_hash:\n");
3826 htab_traverse (see_pre_extension_hash, see_print_pre_extension_expr,
3827 NULL);
3829 for (i = 0; i < last_bb; i++)
3831 if (see_bb_hash_ar[i])
3832 /* Traverse over all the references in the basic block in
3833 forward order. */
3835 fprintf (dump_file,
3836 "Searching register properties in bb %d\n", i);
3837 htab_traverse (see_bb_hash_ar[i],
3838 see_print_register_properties, NULL);
3844 /* Free global data structures. */
3845 see_free_data_structures ();
3849 static bool
3850 gate_handle_see (void)
3852 return optimize > 1 && flag_see;
3855 static unsigned int
3856 rest_of_handle_see (void)
3858 see_main ();
3859 df_clear_flags (DF_DEFER_INSN_RESCAN);
3860 df_process_deferred_rescans ();
3861 run_fast_dce ();
3862 return 0;
3865 struct rtl_opt_pass pass_see =
3868 RTL_PASS,
3869 "see", /* name */
3870 gate_handle_see, /* gate */
3871 rest_of_handle_see, /* execute */
3872 NULL, /* sub */
3873 NULL, /* next */
3874 0, /* static_pass_number */
3875 TV_SEE, /* tv_id */
3876 0, /* properties_required */
3877 0, /* properties_provided */
3878 0, /* properties_destroyed */
3879 0, /* todo_flags_start */
3880 TODO_df_verify |
3881 TODO_df_finish | TODO_verify_rtl_sharing |
3882 TODO_dump_func /* todo_flags_finish */