r113612@merlin: rearnsha | 2006-05-07 00:19:18 +0100
[official-gcc.git] / gcc / see.c
blob603d72e9e232a092914640a57e46899679679fc3
1 /* Sign extension elimination optimization for GNU compiler.
2 Copyright (C) 2005 Free Software Foundation, Inc.
3 Contributed by Leehod Baruch <leehod@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 -Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
22 Problem description:
23 --------------------
24 In order to support 32bit computations on a 64bit machine, sign
25 extension instructions are generated to ensure the correctness of
26 the computation.
27 A possible policy (as currently implemented) is to generate a sign
28 extension right after each 32bit computation.
29 Depending on the instruction set of the architecture, some of these
30 sign extension instructions may be redundant.
31 There are two cases in which the extension may be redundant:
33 Case1:
34 The instruction that uses the 64bit operands that are sign
35 extended has a dual mode that works with 32bit operands.
36 For example:
38 int32 a, b;
40 a = .... --> a = ....
41 a = sign extend a -->
42 b = .... --> b = ....
43 b = sign extend a -->
44 -->
45 cmpd a, b --> cmpw a, b //half word compare
47 Case2:
48 The instruction that defines the 64bit operand (which is later sign
49 extended) has a dual mode that defines and sign-extends simultaneously
50 a 32bit operand. For example:
52 int32 a;
54 ld a --> lwa a // load half word and sign extend
55 a = sign extend a -->
56 -->
57 return a --> return a
60 General idea for solution:
61 --------------------------
62 First, try to merge the sign extension with the instruction that
63 defines the source of the extension and (separately) with the
64 instructions that uses the extended result. By doing this, both cases
65 of redundancies (as described above) will be eliminated.
67 Then, use partial redundancy elimination to place the non redundant
68 ones at optimal placements.
71 Implementation by example:
72 --------------------------
73 Note: The instruction stream is not changed till the last phase.
75 Phase 0: Initial code, as currently generated by gcc.
77 def1 def3
78 se1 def2 se3
79 | \ | / |
80 | \ | / |
81 | \ | / |
82 | \ | / |
83 | \ | / |
84 | \|/ |
85 use1 use2 use3
86 use4
87 def1 + se1:
88 set ((reg:SI 10) (..def1rhs..))
89 set ((reg:DI 100) (sign_extend:DI (reg:SI 10)))
91 def2:
92 set ((reg:DI 100) (const_int 7))
94 def3 + se3:
95 set ((reg:SI 20) (..def3rhs..))
96 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
98 use1:
99 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
101 use2, use3, use4:
102 set ((...) (reg:DI 100))
104 Phase 1: Propagate extensions to uses.
106 def1 def3
107 se1 def2 se3
108 | \ | / |
109 | \ | / |
110 | \ | / |
111 | \ | / |
112 | \ | / |
113 | \|/ |
114 se se se
115 use1 use2 use3
117 use4
119 From here, all of the subregs are lowpart !
121 def1, def2, def3: No change.
123 use1:
124 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
125 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
127 use2, use3, use4:
128 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
129 set ((...) (reg:DI 100))
132 Phase 2: Merge and eliminate locally redundant extensions.
135 *def1 def2 *def3
136 [se removed] se se3
137 | \ | / |
138 | \ | / |
139 | \ | / |
140 | \ | / |
141 | \ | / |
142 | \|/ |
143 [se removed] se se
144 *use1 use2 use3
145 [se removed]
146 use4
148 The instructions that were changed at this phase are marked with
149 asterisk.
151 *def1: Merge failed.
152 Remove the sign extension instruction, modify def1 and
153 insert a move instruction to assure to correctness of the code.
154 set ((subreg:SI (reg:DI 100)) (..def1rhs..))
155 set ((reg:SI 10) (subreg:SI (reg:DI 100)))
157 def2 + se: There is no need for merge.
158 Def2 is not changed but a sign extension instruction is
159 created.
160 set ((reg:DI 100) (const_int 7))
161 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
163 *def3 + se3: Merge succeeded.
164 set ((reg:DI 100) (sign_extend:DI (..def3rhs..)))
165 set ((reg:SI 20) (reg:DI 100))
166 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
167 (The extension instruction is the original one).
169 *use1: Merge succeeded. Remove the sign extension instruction.
170 set ((reg:CC...)
171 (compare:CC (subreg:SI (reg:DI 100)) (...)))
173 use2, use3: Merge failed. No change.
175 use4: The extension is locally redundant, therefore it is eliminated
176 at this point.
179 Phase 3: Eliminate globally redundant extensions.
181 Following the LCM output:
183 def1 def2 def3
184 se se3
185 | \ | / |
186 | \ | / |
187 | se | / |
188 | \ | / |
189 | \ | / |
190 | \|/ |
191 [ses removed]
192 use1 use2 use3
193 use4
196 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
198 se3:
199 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
202 Phase 4: Commit changes to the insn stream.
205 def1 def3 *def1 def2 *def3
206 se1 def2 se3 [se removed] [se removed]
207 | \ | / | | \ | / |
208 | \ | / | ------> | \ | / |
209 | \ | / | ------> | se | / |
210 | \ | / | | \ | / |
211 | \ | / | | \ | / |
212 | \|/ | | \|/ |
213 use1 use2 use3 *use1 use2 use3
214 use4 use4
216 The instructions that were changed during the whole optimization are
217 marked with asterisk.
219 The result:
221 def1 + se1:
222 [ set ((reg:SI 10) (..def1rhs..)) ] - Deleted
223 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 10))) ] - Deleted
224 set ((subreg:SI (reg:DI 100)) (..def3rhs..)) - Inserted
225 set ((reg:SI 10) (subreg:SI (reg:DI 100))) - Inserted
227 def2:
228 set ((reg:DI 100) (const_int 7)) - No change
230 def3 + se3:
231 [ set ((reg:SI 20) (..def3rhs..)) ] - Deleted
232 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) ] - Deleted
233 set ((reg:DI 100) (sign_extend:DI (..def3rhs..))) - Inserted
234 set ((reg:SI 20) (reg:DI 100)) - Inserted
236 use1:
237 [ set ((reg:CC...) (compare:CC (reg:DI 100) (...))) ] - Deleted
238 set ((reg:CC...) - Inserted
239 (compare:CC (subreg:SI (reg:DI 100)) (...)))
241 use2, use3, use4:
242 set ((...) (reg:DI 100)) - No change
244 se: - Inserted
245 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
247 Note: Most of the simple move instructions that were inserted will be
248 trivially dead and therefore eliminated.
250 The implementation outline:
251 ---------------------------
252 Some definitions:
253 A web is RELEVANT if at the end of phase 1, his leader's
254 relevancy is {ZERO, SIGN}_EXTENDED_DEF. The source_mode of
255 the web is the source_mode of his leader.
256 A definition is a candidate for the optimization if it is part
257 of a RELEVANT web and his local source_mode is not narrower
258 then the source_mode of its web.
259 A use is a candidate for the optimization if it is part of a
260 RELEVANT web.
261 A simple explicit extension is a single set instruction that
262 extends a register (or a subregister) to a register (or
263 subregister).
264 A complex explicit extension is an explicit extension instruction
265 that is not simple.
266 A def extension is a simple explicit extension that is
267 also a candidate for the optimization. This extension is part
268 of the instruction stream, it is not generated by this
269 optimization.
270 A use extension is a simple explicit extension that is generated
271 and stored for candidate use during this optimization. It is
272 not emitted to the instruction stream till the last phase of
273 the optimization.
274 A reference is an instruction that satisfy at least on of these
275 criteria:
276 - It contains a definition with EXTENDED_DEF relevancy in a RELEVANT web.
277 - It is followed by a def extension.
278 - It contains a candidate use.
280 Phase 1: Propagate extensions to uses.
281 In this phase, we find candidate extensions for the optimization
282 and we generate (but not emit) proper extensions "right before the
283 uses".
285 a. Build a DF object.
286 b. Traverse over all the instructions that contains a definition
287 and set their local relevancy and local source_mode like this:
288 - If the instruction is a simple explicit extension instruction,
289 mark it as {ZERO, SIGN}_EXTENDED_DEF according to the extension
290 type and mark its source_mode to be the mode of the quantity
291 that is been extended.
292 - Otherwise, If the instruction has an implicit extension,
293 which means that its high part is an extension of its low part,
294 or if it is a complicated explicit extension, mark it as
295 EXTENDED_DEF and set its source_mode to be the narrowest
296 mode that is been extended in the instruction.
297 c. Traverse over all the instructions that contains a use and set
298 their local relevancy to RELEVANT_USE (except for few corner
299 cases).
300 d. Produce the web. During union of two entries, update the
301 relevancy and source_mode of the leader. There are two major
302 guide lines for this update:
303 - If one of the entries is NOT_RELEVANT, mark the leader
304 NOT_RELEVANT.
305 - If one is ZERO_EXTENDED_DEF and the other is SIGN_EXTENDED_DEF
306 (or vice versa) mark the leader as NOT_RELEVANT. We don't
307 handle this kind of mixed webs.
308 (For more details about this update process,
309 see see_update_leader_extra_info ()).
310 e. Generate uses extensions according to the relevancy and
311 source_mode of the webs.
313 Phase 2: Merge and eliminate locally redundant extensions.
314 In this phase, we try to merge def extensions and use
315 extensions with their references, and eliminate redundant extensions
316 in the same basic block.
318 Traverse over all the references. Do this in basic block number and
319 luid number forward order.
320 For each reference do:
321 a. Peephole optimization - try to merge it with all its
322 def extensions and use extensions in the following
323 order:
324 - Try to merge only the def extensions, one by one.
325 - Try to merge only the use extensions, one by one.
326 - Try to merge any couple of use extensions simultaneously.
327 - Try to merge any def extension with one or two uses
328 extensions simultaneously.
329 b. Handle each EXTENDED_DEF in it as if it was already merged with
330 an extension.
332 During the merge process we save the following data for each
333 register in each basic block:
334 a. The first instruction that defines the register in the basic
335 block.
336 b. The last instruction that defines the register in the basic
337 block.
338 c. The first extension of this register before the first
339 instruction that defines it in the basic block.
340 c. The first extension of this register after the last
341 instruction that defines it in the basic block.
342 This data will help us eliminate (or more precisely, not generate)
343 locally redundant extensions, and will be useful in the next stage.
345 While merging extensions with their reference there are 4 possible
346 situations:
347 a. A use extension was merged with the reference:
348 Delete the extension instruction and save the merged reference
349 for phase 4. (For details, see see_use_extension_merged ())
350 b. A use extension failed to be merged with the reference:
351 If there is already such an extension in the same basic block
352 and it is not dead at this point, delete the unmerged extension
353 (it is locally redundant), otherwise properly update the above
354 basic block data.
355 (For details, see see_merge_one_use_extension ())
356 c. A def extension was merged with the reference:
357 Mark this extension as a merged_def extension and properly
358 update the above basic block data.
359 (For details, see see_merge_one_def_extension ())
360 d. A def extension failed to be merged with the reference:
361 Replace the definition of the NARROWmode register in the
362 reference with the proper subreg of WIDEmode register and save
363 the result as a merged reference. Also, properly update the
364 the above basic block data.
365 (For details, see see_def_extension_not_merged ())
367 Phase 3: Eliminate globally redundant extensions.
368 In this phase, we set the bit vectors input of the edge based LCM
369 using the recorded data on the registers in each basic block.
370 We also save pointers for all the anticipatable and available
371 occurrences of the relevant extensions. Then we run the LCM.
373 a. Initialize the comp, antloc, kill bit vectors to zero and the
374 transp bit vector to ones.
376 b. Traverse over all the references. Do this in basic block number
377 and luid number forward order. For each reference:
378 - Go over all its use extensions. For each such extension -
379 If it is not dead from the beginning of the basic block SET
380 the antloc bit of the current extension in the current
381 basic block bits.
382 If it is not dead till the end of the basic block SET the
383 comp bit of the current extension in the current basic
384 block bits.
385 - Go over all its def extensions that were merged with
386 it. For each such extension -
387 If it is not dead till the end of the basic block SET the
388 comp bit of the current extension in the current basic
389 block bits.
390 RESET the proper transp and kill bits.
391 - Go over all its def extensions that were not merged
392 with it. For each such extension -
393 RESET the transp bit and SET the kill bit of the current
394 extension in the current basic block bits.
396 c. Run the edge based LCM.
398 Phase 4: Commit changes to the insn stream.
399 This is the only phase that actually changes the instruction stream.
400 Up to this point the optimization could be aborted at any time.
401 Here we insert extensions at their best placements and delete the
402 redundant ones according to the output of the LCM. We also replace
403 some of the instructions according to the second phase merges results.
405 a. Use the pre_delete_map (from the output of the LCM) in order to
406 delete redundant extensions. This will prevent them from been
407 emitted in the first place.
409 b. Insert extensions on edges where needed according to
410 pre_insert_map and edge_list (from the output of the LCM).
412 c. For each reference do-
413 - Emit all the uses extensions that were not deleted until now,
414 right before the reference.
415 - Delete all the merged and unmerged def extensions from
416 the instruction stream.
417 - Replace the reference with the merged one, if exist.
419 The implementation consists of four data structures:
420 - Data structure I
421 Purpose: To handle the relevancy of the uses, definitions and webs.
422 Relevant structures: web_entry (from df.h), see_entry_extra_info.
423 Details: This is a disjoint-set data structure. Most of its functions are
424 implemented in web.c. Each definition and use in the code are
425 elements. A web_entry structure is allocated for each element to
426 hold the element's relevancy and source_mode. The union rules are
427 defined in see_update_leader_extra_info ().
428 - Data structure II
429 Purpose: To store references and their extensions (uses and defs)
430 and to enable traverse over these references according to basic
431 block order.
432 Relevant structure: see_ref_s.
433 Details: This data structure consists of an array of splay trees. One splay
434 tree for each basic block. The splay tree nodes are references and
435 the keys are the luids of the references.
436 A see_ref_s structure is allocated for each reference. It holds the
437 reference itself, its def and uses extensions and later the merged
438 version of the reference.
439 Using this data structure we can traverse over all the references of
440 a basic block and their extensions in forward order.
441 - Data structure III.
442 Purpose: To store local properties of registers for each basic block.
443 This data will later help us build the LCM sbitmap_vectors
444 input.
445 Relevant structure: see_register_properties.
446 Details: This data structure consists of an array of hash tables. One hash
447 for each basic block. The hash node are a register properties
448 and the keys are the numbers of the registers.
449 A see_register_properties structure is allocated for each register
450 that we might be interested in its properties.
451 Using this data structure we can easily find the properties of a
452 register in a specific basic block. This is necessary for locally
453 redundancy elimination and for setting up the LCM input.
454 - Data structure IV.
455 Purpose: To store the extensions that are candidate for PRE and their
456 anticipatable and available occurrences.
457 Relevant structure: see_occr, see_pre_extension_expr.
458 Details: This data structure is a hash tables. Its nodes are the extensions
459 that are candidate for PRE.
460 A see_pre_extension_expr structure is allocated for each candidate
461 extension. It holds a copy of the extension and a linked list of all
462 the anticipatable and available occurrences of it.
463 We use this data structure when we read the output of the LCM. */
465 #include "config.h"
466 #include "system.h"
467 #include "coretypes.h"
468 #include "tm.h"
470 #include "obstack.h"
471 #include "rtl.h"
472 #include "output.h"
473 #include "df.h"
474 #include "insn-config.h"
475 #include "recog.h"
476 #include "expr.h"
477 #include "splay-tree.h"
478 #include "hashtab.h"
479 #include "regs.h"
480 #include "timevar.h"
481 #include "tree-pass.h"
483 void
484 see_main (void);
486 /* Used to classify defs and uses according to relevancy. */
487 enum entry_type {
488 NOT_RELEVANT,
489 SIGN_EXTENDED_DEF,
490 ZERO_EXTENDED_DEF,
491 EXTENDED_DEF,
492 RELEVANT_USE
495 /* Used to classify extensions in relevant webs. */
496 enum extension_type {
497 DEF_EXTENSION,
498 EXPLICIT_DEF_EXTENSION,
499 IMPLICIT_DEF_EXTENSION,
500 USE_EXTENSION
503 /* Global data structures and flags. */
505 /* This structure will be assigned for each web_entry structure (defined
506 in df.h). It is placed in the extra_info field of a web_entry and holds the
507 relevancy and source mode of the web_entry. */
509 struct see_entry_extra_info
511 /* The relevancy of the ref. */
512 enum entry_type relevancy;
513 /* The relevancy of the ref.
514 This field is updated only once - when this structure is created. */
515 enum entry_type local_relevancy;
516 /* The source register mode. */
517 enum machine_mode source_mode;
518 /* This field is used only if the relevancy is ZERO/SIGN_EXTENDED_DEF.
519 It is updated only once when this structure is created. */
520 enum machine_mode local_source_mode;
521 /* This field is used only if the relevancy is EXTENDED_DEF.
522 It holds the narrowest mode that is sign extended. */
523 enum machine_mode source_mode_signed;
524 /* This field is used only if the relevancy is EXTENDED_DEF.
525 It holds the narrowest mode that is zero extended. */
526 enum machine_mode source_mode_unsigned;
529 /* There is one such structure for every reference. It stores the reference
530 itself as well as its extensions (uses and definitions).
531 Used as the value in splay_tree see_bb_splay_ar[]. */
532 struct see_ref_s
534 /* The luid of the insn. */
535 unsigned int luid;
536 /* The insn of the ref. */
537 rtx insn;
538 /* The merged insn that was formed from the reference's insn and extensions.
539 If all merges faile it remains NULL. */
540 rtx merged_insn;
541 /* The def extensions of the reference that were not merged with
542 it. */
543 htab_t unmerged_def_se_hash;
544 /* The def extensions of the reference that were merged with
545 it. Implicit extensions of the reference will be stored here too. */
546 htab_t merged_def_se_hash;
547 /* The uses extensions of reference. */
548 htab_t use_se_hash;
551 /* There is one such structure for every relevant extended register in a
552 specific basic block. This data will help us build the LCM sbitmap_vectors
553 input. */
554 struct see_register_properties
556 /* The register number. */
557 unsigned int regno;
558 /* The last luid of the reference that defines this register in this basic
559 block. */
560 int last_def;
561 /* The luid of the reference that has the first extension of this register
562 that appears before any definition in this basic block. */
563 int first_se_before_any_def;
564 /* The luid of the reference that has the first extension of this register
565 that appears after the last definition in this basic block. */
566 int first_se_after_last_def;
569 /* Occurrence of an expression.
570 There must be at most one available occurance and at most one anticipatable
571 occurrence per basic block. */
572 struct see_occr
574 /* Next occurrence of this expression. */
575 struct see_occr *next;
576 /* The insn that computes the expression. */
577 rtx insn;
578 int block_num;
581 /* There is one such structure for every relevant extension expression.
582 It holds a copy of this extension instruction as well as a linked lists of
583 pointers to all the antic and avail occurrences of it. */
584 struct see_pre_extension_expr
586 /* A copy of the extension instruction. */
587 rtx se_insn;
588 /* Index in the available expression bitmaps. */
589 int bitmap_index;
590 /* List of anticipatable occurrences in basic blocks in the function.
591 An "anticipatable occurrence" is the first occurrence in the basic block,
592 the operands are not modified in the basic block prior to the occurrence
593 and the output is not used between the start of the block and the
594 occurrence. */
595 struct see_occr *antic_occr;
596 /* List of available occurrence in basic blocks in the function.
597 An "available occurrence" is the last occurrence in the basic block and
598 the operands are not modified by following statements in the basic block
599 [including this insn]. */
600 struct see_occr *avail_occr;
603 /* Helper structure for the note_uses and see_replace_src functions. */
604 struct see_replace_data
606 rtx from;
607 rtx to;
610 /* Helper structure for the note_uses and see_mentioned_reg functions. */
611 struct see_mentioned_reg_data
613 rtx reg;
614 bool mentioned;
617 /* A data flow object that will be created once and used throughout the
618 optimization. */
619 static struct df *df = NULL;
620 /* An array of web_entries. The i'th definition in the df object is associated
621 with def_entry[i] */
622 static struct web_entry *def_entry = NULL;
623 /* An array of web_entries. The i'th use in the df object is associated with
624 use_entry[i] */
625 static struct web_entry *use_entry = NULL;
626 /* Array of splay_trees.
627 see_bb_splay_ar[i] refers to the splay tree of the i'th basic block.
628 The splay tree will hold see_ref_s structures. The key is the luid
629 of the insn. This way we can traverse over the references of each basic
630 block in forward or backward order. */
631 static splay_tree *see_bb_splay_ar = NULL;
632 /* Array of hashes.
633 see_bb_hash_ar[i] refers to the hash of the i'th basic block.
634 The hash will hold see_register_properties structure. The key is regno. */
635 static htab_t *see_bb_hash_ar = NULL;
636 /* Hash table that holds a copy of all the extensions. The key is the right
637 hand side of the se_insn field. */
638 static htab_t see_pre_extension_hash = NULL;
640 /* Local LCM properties of expressions. */
641 /* Nonzero for expressions that are transparent in the block. */
642 static sbitmap *transp = NULL;
643 /* Nonzero for expressions that are computed (available) in the block. */
644 static sbitmap *comp = NULL;
645 /* Nonzero for expressions that are locally anticipatable in the block. */
646 static sbitmap *antloc = NULL;
647 /* Nonzero for expressions that are locally killed in the block. */
648 static sbitmap *ae_kill = NULL;
649 /* Nonzero for expressions which should be inserted on a specific edge. */
650 static sbitmap *pre_insert_map = NULL;
651 /* Nonzero for expressions which should be deleted in a specific block. */
652 static sbitmap *pre_delete_map = NULL;
653 /* Contains the edge_list returned by pre_edge_lcm. */
654 static struct edge_list *edge_list = NULL;
655 /* Records the last basic block at the beginning of the optimization. */
656 static int last_bb;
657 /* Records the number of uses at the beginning of the optimization. */
658 static unsigned int uses_num;
659 /* Records the number of definitions at the beginning of the optimization. */
660 static unsigned int defs_num;
662 #define ENTRY_EI(ENTRY) ((struct see_entry_extra_info *)(ENTRY)->extra_info)
664 /* Functions implementation. */
666 /* Verifies that EXTENSION's pattern is this:
668 set (reg/subreg reg1) (sign/zero_extend:WIDEmode (reg/subreg reg2))
670 If it doesn't have the expected pattern return NULL.
671 Otherwise, if RETURN_DEST_REG is set, return reg1 else return reg2. */
673 static rtx
674 see_get_extension_reg (rtx extension, bool return_dest_reg)
676 rtx set = NULL;
677 rtx rhs = NULL;
678 rtx lhs = NULL;
679 rtx reg1 = NULL;
680 rtx reg2 = NULL;
682 set = single_set (extension);
683 if (!set)
684 return NULL;
685 lhs = SET_DEST (set);
686 rhs = SET_SRC (set);
688 if (REG_P (lhs))
689 reg1 = lhs;
690 else if (REG_P (SUBREG_REG (lhs)))
691 reg1 = SUBREG_REG (lhs);
692 else
693 return NULL;
695 if ((GET_CODE (rhs) != SIGN_EXTEND) && (GET_CODE (rhs) != ZERO_EXTEND))
696 return NULL;
698 rhs = XEXP (rhs, 0);
699 if (REG_P (rhs))
700 reg2 = rhs;
701 else if (REG_P (SUBREG_REG (rhs)))
702 reg2 = SUBREG_REG (rhs);
703 else
704 return NULL;
706 if (return_dest_reg)
707 return reg1;
708 return reg2;
711 /* Verifies that EXTENSION's pattern is this:
713 set (reg/subreg reg1) (sign/zero_extend: (...expr...)
715 If it doesn't have the expected pattern return UNKNOWN.
716 Otherwise, set SOURCE_MODE to be the mode of the extended expr and return
717 the rtx code of the extension. */
719 static enum rtx_code
720 see_get_extension_data (rtx extension, enum machine_mode *source_mode)
722 rtx rhs = NULL;
723 rtx lhs = NULL;
724 rtx set = NULL;
726 if (!extension || !INSN_P (extension))
727 return UNKNOWN;
729 set = single_set (extension);
730 if (!set)
731 return NOT_RELEVANT;
732 rhs = SET_SRC (set);
733 lhs = SET_DEST (set);
735 /* Don't handle extensions to something other then register or
736 subregister. */
737 if (!REG_P (lhs) && !SUBREG_REG (lhs))
738 return UNKNOWN;
740 if ((GET_CODE (rhs) != SIGN_EXTEND) && (GET_CODE (rhs) != ZERO_EXTEND))
741 return UNKNOWN;
743 if (!REG_P (XEXP (rhs, 0))
744 && !((GET_CODE (XEXP (rhs, 0)) == SUBREG)
745 && (REG_P (SUBREG_REG (XEXP (rhs, 0))))))
746 return UNKNOWN;
748 *source_mode = GET_MODE (XEXP (rhs, 0));
750 if (GET_CODE (rhs) == SIGN_EXTEND)
751 return SIGN_EXTEND;
752 else
753 return ZERO_EXTEND;
757 /* Generate instruction with the pattern:
758 set ((reg r) (sign/zero_extend (subreg:mode (reg r))))
759 (the register r on both sides of the set is the same register).
760 And recognize it.
761 If the recognition failed, this is very bad, return NULL (This will abort
762 the entier optimization).
763 Otherwise, return the generated instruction. */
765 static rtx
766 see_gen_normalized_extension (rtx reg, enum rtx_code extension_code,
767 enum machine_mode mode)
769 rtx subreg = NULL;
770 rtx extension = NULL;
771 rtx insn = NULL;
773 if (!reg
774 || !REG_P (reg)
775 || ((extension_code != SIGN_EXTEND) && (extension_code != ZERO_EXTEND)))
776 return NULL;
778 subreg = gen_lowpart_SUBREG (mode, reg);
779 if (extension_code == SIGN_EXTEND)
780 extension = gen_rtx_SIGN_EXTEND (GET_MODE (reg), subreg);
781 else
782 extension = gen_rtx_ZERO_EXTEND (GET_MODE (reg), subreg);
784 start_sequence ();
785 emit_insn (gen_rtx_SET (VOIDmode, reg, extension));
786 insn = get_insns ();
787 end_sequence ();
789 if (insn_invalid_p (insn))
790 /* Recognition failed, this is very bad for this optimization.
791 Abort the optimization. */
792 return NULL;
793 return insn;
796 /* Hashes and splay_trees related functions implementation. */
798 /* Helper functions for the pre_extension hash.
799 This kind of hash will hold see_pre_extension_expr structures.
801 The key is the right hand side of the se_insn field.
802 Note that the se_insn is an expression that looks like:
804 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
805 (subreg:NARROWmode (reg:WIDEmode r2)))) */
807 /* Return TRUE if P1 has the same value in its rhs as P2.
808 Otherwise, return FALSE.
809 P1 and P2 are see_pre_extension_expr structures. */
811 static int
812 eq_descriptor_pre_extension (const void *p1, const void *p2)
814 const struct see_pre_extension_expr *extension1 = p1;
815 const struct see_pre_extension_expr *extension2 = p2;
816 rtx set1 = single_set (extension1->se_insn);
817 rtx set2 = single_set (extension2->se_insn);
818 rtx rhs1 = NULL;
819 rtx rhs2 = NULL;
821 gcc_assert (set1 && set2);
822 rhs1 = SET_SRC (set1);
823 rhs2 = SET_SRC (set2);
825 return rtx_equal_p (rhs1, rhs2);
829 /* P is a see_pre_extension_expr struct, use the RHS of the se_insn field.
830 Note that the RHS is an expression that looks like this:
831 (sign_extend:WIDEmode (subreg:NARROWmode (reg:WIDEmode r))) */
833 static hashval_t
834 hash_descriptor_pre_extension (const void *p)
836 const struct see_pre_extension_expr *extension = p;
837 rtx set = single_set (extension->se_insn);
838 rtx rhs = NULL;
840 gcc_assert (set);
841 rhs = SET_SRC (set);
843 return hash_rtx (rhs, GET_MODE (rhs), 0, NULL, 0);
847 /* Free the allocated memory of the current see_pre_extension_expr struct.
849 It frees the two linked list of the occurrences structures. */
851 static void
852 hash_del_pre_extension (void *p)
854 struct see_pre_extension_expr *extension = p;
855 struct see_occr *curr_occr = extension->antic_occr;
856 struct see_occr *next_occr = NULL;
858 /* Free the linked list of the anticipatable occurrences. */
859 while (curr_occr)
861 next_occr = curr_occr->next;
862 free (curr_occr);
863 curr_occr = next_occr;
866 /* Free the linked list of the available occurrences. */
867 curr_occr = extension->avail_occr;
868 while (curr_occr)
870 next_occr = curr_occr->next;
871 free (curr_occr);
872 curr_occr = next_occr;
875 /* Free the see_pre_extension_expr structure itself. */
876 free (extension);
880 /* Helper functions for the register_properties hash.
881 This kind of hash will hold see_register_properties structures.
883 The value of the key is the regno field of the structure. */
885 /* Return TRUE if P1 has the same value in the regno field as P2.
886 Otherwise, return FALSE.
887 Where P1 and P2 are see_register_properties structures. */
889 static int
890 eq_descriptor_properties (const void *p1, const void *p2)
892 const struct see_register_properties *curr_prop1 = p1;
893 const struct see_register_properties *curr_prop2 = p2;
895 return (curr_prop1->regno == curr_prop2->regno);
899 /* P is a see_register_properties struct, use the register number in the
900 regno field. */
902 static hashval_t
903 hash_descriptor_properties (const void *p)
905 const struct see_register_properties *curr_prop = p;
906 return curr_prop->regno;
910 /* Free the allocated memory of the current see_register_properties struct. */
911 static void
912 hash_del_properties (void *p)
914 struct see_register_properties *curr_prop = p;
915 free (curr_prop);
919 /* Helper functions for an extension hash.
920 This kind of hash will hold insns that look like:
922 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
923 (subreg:NARROWmode (reg:WIDEmode r2))))
925 set ((reg:WIDEmode r1) (sign_extend:WIDEmode (reg:NARROWmode r2)))
927 The value of the key is (REGNO (reg:WIDEmode r1))
928 It is posibble to search this hash in two ways:
929 1. By a register rtx. The Value that is been compared to the keys is the
930 REGNO of it.
931 2. By an insn with the above pattern. The Value that is been compared to
932 the keys is the REGNO of the reg on the lhs. */
934 /* Return TRUE if P1 has the same value as P2. Otherwise, return FALSE.
935 Where P1 is an insn and P2 is an insn or a register. */
937 static int
938 eq_descriptor_extension (const void *p1, const void *p2)
940 const rtx insn = (rtx) p1;
941 const rtx element = (rtx) p2;
942 rtx set1 = single_set (insn);
943 rtx set2 = NULL;
944 rtx dest_reg1 = NULL;
945 rtx dest_reg2 = NULL;
947 gcc_assert (set1 && element && (REG_P (element) || INSN_P (element)));
949 dest_reg1 = SET_DEST (set1);
951 if (INSN_P (element))
953 set2 = single_set (element);
954 dest_reg2 = SET_DEST (set2);
956 else
957 dest_reg2 = element;
959 return (REGNO (dest_reg1) == REGNO (dest_reg2));
963 /* If P is an insn, use the register number of its lhs
964 otherwise, P is a register, use its number. */
966 static hashval_t
967 hash_descriptor_extension (const void *p)
969 const rtx r = (rtx) p;
970 rtx set = NULL;
971 rtx lhs = NULL;
973 if (r && REG_P (r))
974 return REGNO (r);
975 else
977 gcc_assert (r && INSN_P (r));
978 set = single_set (r);
979 gcc_assert (set);
980 lhs = SET_DEST (set);
981 return REGNO (lhs);
986 /* Helper function for a see_bb_splay_ar[i] splay tree.
987 It frees all the allocated memory of a struct see_ref_s pointer.
989 VALUE is the value of a splay tree node. */
991 static void
992 see_free_ref_s (splay_tree_value value)
994 struct see_ref_s *ref_s = (struct see_ref_s *)value;
996 if (ref_s->unmerged_def_se_hash)
997 htab_delete (ref_s->unmerged_def_se_hash);
998 if (ref_s->merged_def_se_hash)
999 htab_delete (ref_s->merged_def_se_hash);
1000 if (ref_s->use_se_hash)
1001 htab_delete (ref_s->use_se_hash);
1002 free (ref_s);
1006 /* Rest of the implementation. */
1008 /* Search the extension hash for a suitable entry for EXTENSION.
1009 TYPE is the type of EXTENSION (USE_EXTENSION or DEF_EXTENSION).
1011 If TYPE is DEF_EXTENSION we need to normalize EXTENSION before searching the
1012 extension hash.
1014 If a suitable entry was found, return the slot. Otherwise, store EXTENSION
1015 in the hash and return NULL. */
1017 static struct see_pre_extension_expr *
1018 see_seek_pre_extension_expr (rtx extension, enum extension_type type)
1020 struct see_pre_extension_expr **slot_pre_exp = NULL;
1021 struct see_pre_extension_expr temp_pre_exp;
1022 rtx dest_extension_reg = see_get_extension_reg (extension, 1);
1023 enum rtx_code extension_code;
1024 enum machine_mode source_extension_mode;
1026 if (type == DEF_EXTENSION)
1028 extension_code = see_get_extension_data (extension,
1029 &source_extension_mode);
1030 gcc_assert (extension_code != UNKNOWN);
1031 extension =
1032 see_gen_normalized_extension (dest_extension_reg, extension_code,
1033 source_extension_mode);
1035 temp_pre_exp.se_insn = extension;
1036 slot_pre_exp =
1037 (struct see_pre_extension_expr **) htab_find_slot (see_pre_extension_hash,
1038 &temp_pre_exp, INSERT);
1039 if (*slot_pre_exp == NULL)
1040 /* This is the first time this extension instruction is encountered. Store
1041 it in the hash. */
1043 (*slot_pre_exp) = xmalloc (sizeof (struct see_pre_extension_expr));
1044 (*slot_pre_exp)->se_insn = extension;
1045 (*slot_pre_exp)->bitmap_index =
1046 (htab_elements (see_pre_extension_hash) - 1);
1047 (*slot_pre_exp)->antic_occr = NULL;
1048 (*slot_pre_exp)->avail_occr = NULL;
1049 return NULL;
1051 return *slot_pre_exp;
1055 /* This function defines how to update the extra_info of the web_entry.
1057 FIRST is the pointer of the extra_info of the first web_entry.
1058 SECOND is the pointer of the extra_info of the second web_entry.
1059 The first web_entry will be the predecessor (leader) of the second web_entry
1060 after the union.
1062 Return true if FIRST and SECOND points to the same web entry structure and
1063 nothing is done. Otherwise, return false. */
1065 static bool
1066 see_update_leader_extra_info (struct web_entry *first, struct web_entry *second)
1068 struct see_entry_extra_info *first_ei = NULL;
1069 struct see_entry_extra_info *second_ei = NULL;
1071 first = unionfind_root (first);
1072 second = unionfind_root (second);
1074 if (unionfind_union (first, second))
1075 return true;
1077 first_ei = (struct see_entry_extra_info *)first->extra_info;
1078 second_ei = (struct see_entry_extra_info *)second->extra_info;
1080 gcc_assert (first_ei && second_ei);
1082 if (second_ei->relevancy == NOT_RELEVANT)
1084 first_ei->relevancy = NOT_RELEVANT;
1085 return false;
1087 switch (first_ei->relevancy)
1089 case NOT_RELEVANT:
1090 return false;
1091 case RELEVANT_USE:
1092 switch (second_ei->relevancy)
1094 case RELEVANT_USE:
1095 return false;
1096 case EXTENDED_DEF:
1097 first_ei->relevancy = second_ei->relevancy;
1098 first_ei->source_mode_signed = second_ei->source_mode_signed;
1099 first_ei->source_mode_unsigned = second_ei->source_mode_unsigned;
1100 return false;
1101 case SIGN_EXTENDED_DEF:
1102 case ZERO_EXTENDED_DEF:
1103 first_ei->relevancy = second_ei->relevancy;
1104 first_ei->source_mode = second_ei->source_mode;
1105 return false;
1106 default:
1107 gcc_unreachable ();
1109 case SIGN_EXTENDED_DEF:
1110 switch (second_ei->relevancy)
1112 case SIGN_EXTENDED_DEF:
1113 /* The mode of the root should be the wider one in this case. */
1114 first_ei->source_mode =
1115 (first_ei->source_mode > second_ei->source_mode) ?
1116 first_ei->source_mode : second_ei->source_mode;
1117 return false;
1118 case RELEVANT_USE:
1119 return false;
1120 case ZERO_EXTENDED_DEF:
1121 /* Don't mix webs with zero extend and sign extend. */
1122 first_ei->relevancy = NOT_RELEVANT;
1123 return false;
1124 case EXTENDED_DEF:
1125 if (second_ei->source_mode_signed == MAX_MACHINE_MODE)
1126 first_ei->relevancy = NOT_RELEVANT;
1127 else
1128 /* The mode of the root should be the wider one in this case. */
1129 first_ei->source_mode =
1130 (first_ei->source_mode > second_ei->source_mode_signed) ?
1131 first_ei->source_mode : second_ei->source_mode_signed;
1132 return false;
1133 default:
1134 gcc_unreachable ();
1136 /* This case is similar to the previous one, with little changes. */
1137 case ZERO_EXTENDED_DEF:
1138 switch (second_ei->relevancy)
1140 case SIGN_EXTENDED_DEF:
1141 /* Don't mix webs with zero extend and sign extend. */
1142 first_ei->relevancy = NOT_RELEVANT;
1143 return false;
1144 case RELEVANT_USE:
1145 return false;
1146 case ZERO_EXTENDED_DEF:
1147 /* The mode of the root should be the wider one in this case. */
1148 first_ei->source_mode =
1149 (first_ei->source_mode > second_ei->source_mode) ?
1150 first_ei->source_mode : second_ei->source_mode;
1151 return false;
1152 case EXTENDED_DEF:
1153 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE)
1154 first_ei->relevancy = NOT_RELEVANT;
1155 else
1156 /* The mode of the root should be the wider one in this case. */
1157 first_ei->source_mode =
1158 (first_ei->source_mode > second_ei->source_mode_unsigned) ?
1159 first_ei->source_mode : second_ei->source_mode_unsigned;
1160 return false;
1161 default:
1162 gcc_unreachable ();
1164 case EXTENDED_DEF:
1165 if ((first_ei->source_mode_signed != MAX_MACHINE_MODE)
1166 && (first_ei->source_mode_unsigned != MAX_MACHINE_MODE))
1168 switch (second_ei->relevancy)
1170 case SIGN_EXTENDED_DEF:
1171 first_ei->relevancy = SIGN_EXTENDED_DEF;
1172 first_ei->source_mode =
1173 (first_ei->source_mode_signed > second_ei->source_mode) ?
1174 first_ei->source_mode_signed : second_ei->source_mode;
1175 return false;
1176 case RELEVANT_USE:
1177 return false;
1178 case ZERO_EXTENDED_DEF:
1179 first_ei->relevancy = ZERO_EXTENDED_DEF;
1180 first_ei->source_mode =
1181 (first_ei->source_mode_unsigned > second_ei->source_mode) ?
1182 first_ei->source_mode_unsigned : second_ei->source_mode;
1183 return false;
1184 case EXTENDED_DEF:
1185 if (second_ei->source_mode_unsigned != MAX_MACHINE_MODE)
1186 first_ei->source_mode_unsigned =
1187 (first_ei->source_mode_unsigned >
1188 second_ei->source_mode_unsigned) ?
1189 first_ei->source_mode_unsigned :
1190 second_ei->source_mode_unsigned;
1191 if (second_ei->source_mode_signed != MAX_MACHINE_MODE)
1192 first_ei->source_mode_signed =
1193 (first_ei->source_mode_signed >
1194 second_ei->source_mode_signed) ?
1195 first_ei->source_mode_signed : second_ei->source_mode_signed;
1196 return false;
1197 default:
1198 gcc_unreachable ();
1201 else if (first_ei->source_mode_signed == MAX_MACHINE_MODE)
1203 gcc_assert (first_ei->source_mode_unsigned != MAX_MACHINE_MODE);
1204 switch (second_ei->relevancy)
1206 case SIGN_EXTENDED_DEF:
1207 first_ei->relevancy = NOT_RELEVANT;
1208 return false;
1209 case RELEVANT_USE:
1210 return false;
1211 case ZERO_EXTENDED_DEF:
1212 first_ei->relevancy = ZERO_EXTENDED_DEF;
1213 first_ei->source_mode =
1214 (first_ei->source_mode_unsigned > second_ei->source_mode) ?
1215 first_ei->source_mode_unsigned : second_ei->source_mode;
1216 return false;
1217 case EXTENDED_DEF:
1218 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE)
1219 first_ei->relevancy = NOT_RELEVANT;
1220 else
1221 first_ei->source_mode_unsigned =
1222 (first_ei->source_mode_unsigned >
1223 second_ei->source_mode_unsigned) ?
1224 first_ei->source_mode_unsigned :
1225 second_ei->source_mode_unsigned;
1226 return false;
1227 default:
1228 gcc_unreachable ();
1231 else
1233 gcc_assert (first_ei->source_mode_unsigned == MAX_MACHINE_MODE);
1234 gcc_assert (first_ei->source_mode_signed != MAX_MACHINE_MODE);
1235 switch (second_ei->relevancy)
1237 case SIGN_EXTENDED_DEF:
1238 first_ei->relevancy = SIGN_EXTENDED_DEF;
1239 first_ei->source_mode =
1240 (first_ei->source_mode_signed > second_ei->source_mode) ?
1241 first_ei->source_mode_signed : second_ei->source_mode;
1242 return false;
1243 case RELEVANT_USE:
1244 return false;
1245 case ZERO_EXTENDED_DEF:
1246 first_ei->relevancy = NOT_RELEVANT;
1247 return false;
1248 case EXTENDED_DEF:
1249 if (second_ei->source_mode_signed == MAX_MACHINE_MODE)
1250 first_ei->relevancy = NOT_RELEVANT;
1251 else
1252 first_ei->source_mode_signed =
1253 (first_ei->source_mode_signed >
1254 second_ei->source_mode_signed) ?
1255 first_ei->source_mode_signed : second_ei->source_mode_signed;
1256 return false;
1257 default:
1258 gcc_unreachable ();
1261 default:
1262 /* Unknown patern type. */
1263 gcc_unreachable ();
1268 /* Free global data structures. */
1270 static void
1271 see_free_data_structures (void)
1273 int i;
1274 unsigned int j;
1276 /* Free the bitmap vectors. */
1277 if (transp)
1279 sbitmap_vector_free (transp);
1280 transp = NULL;
1281 sbitmap_vector_free (comp);
1282 comp = NULL;
1283 sbitmap_vector_free (antloc);
1284 antloc = NULL;
1285 sbitmap_vector_free (ae_kill);
1286 ae_kill = NULL;
1288 if (pre_insert_map)
1290 sbitmap_vector_free (pre_insert_map);
1291 pre_insert_map = NULL;
1293 if (pre_delete_map)
1295 sbitmap_vector_free (pre_delete_map);
1296 pre_delete_map = NULL;
1298 if (edge_list)
1300 free_edge_list (edge_list);
1301 edge_list = NULL;
1304 /* Free the extension hash. */
1305 htab_delete (see_pre_extension_hash);
1307 /* Free the array of hashes. */
1308 for (i = 0; i < last_bb; i++)
1309 if (see_bb_hash_ar[i])
1310 htab_delete (see_bb_hash_ar[i]);
1311 free (see_bb_hash_ar);
1313 /* Free the array of splay trees. */
1314 for (i = 0; i < last_bb; i++)
1315 if (see_bb_splay_ar[i])
1316 splay_tree_delete (see_bb_splay_ar[i]);
1317 free (see_bb_splay_ar);
1319 /* Free the array of web entries and their extra info field. */
1320 for (j = 0; j < defs_num; j++)
1321 free (def_entry[j].extra_info);
1322 free (def_entry);
1323 for (j = 0; j < uses_num; j++)
1324 free (use_entry[j].extra_info);
1325 free (use_entry);
1329 /* Initialize global data structures and variables. */
1331 static void
1332 see_initialize_data_structures (void)
1334 /* Build the df object. */
1335 df = df_init (DF_HARD_REGS | DF_EQUIV_NOTES | DF_SUBREGS);
1336 df_rd_add_problem (df);
1337 /* df_ru_add_problem (df); */
1338 df_chain_add_problem (df, DF_DU_CHAIN | DF_UD_CHAIN);
1339 df_analyze (df);
1341 if (dump_file)
1342 df_dump (df, dump_file);
1344 /* Record the last basic block at the beginning of the optimization. */
1345 last_bb = last_basic_block;
1346 /* Record the number of uses at the beginning of the optimization. */
1347 uses_num = DF_USES_SIZE (df);
1348 /* Record the number of definitions at the beginning of the optimization. */
1349 defs_num = DF_DEFS_SIZE (df);
1351 /* Allocate web entries array for the union-find data structure. */
1352 def_entry = xcalloc (defs_num, sizeof (struct web_entry));
1353 use_entry = xcalloc (uses_num, sizeof (struct web_entry));
1355 /* Allocate an array of splay trees.
1356 One splay tree for each basic block. */
1357 see_bb_splay_ar = xcalloc (last_bb, sizeof (splay_tree));
1359 /* Allocate an array of hashes.
1360 One hash for each basic block. */
1361 see_bb_hash_ar = xcalloc (last_bb, sizeof (htab_t));
1363 /* Allocate the extension hash. It will hold the extensions that we want
1364 to PRE. */
1365 see_pre_extension_hash =
1366 htab_create (10, hash_descriptor_pre_extension, eq_descriptor_pre_extension,
1367 hash_del_pre_extension);
1371 /* Function called by note_uses to check if a register is used in a
1372 subexpressions.
1374 X is a pointer to the subexpression and DATA is a pointer to a
1375 see_mentioned_reg_data structure that contains the register to look for and
1376 a place for the result. */
1378 static void
1379 see_mentioned_reg (rtx *x, void *data)
1381 struct see_mentioned_reg_data *d
1382 = (struct see_mentioned_reg_data *) data;
1384 if (reg_mentioned_p (d->reg, *x))
1385 d->mentioned = true;
1389 /* We don't want to merge a use extension with a reference if the extended
1390 register is used only in a simple move instruction. We also don't want to
1391 merge a def extension with a reference if the source register of the
1392 extension is defined only in a simple move in the reference.
1394 REF is the reference instruction.
1395 EXTENSION is the use extension or def extension instruction.
1396 TYPE is the type of the extension (use or def).
1398 Return true if the reference is complicated enough, so we would like to merge
1399 it with the extension. Otherwise, return false. */
1401 static bool
1402 see_want_to_be_merged_with_extension (rtx ref, rtx extension,
1403 enum extension_type type)
1405 rtx pat = NULL;
1406 rtx dest_extension_reg = see_get_extension_reg (extension, 1);
1407 rtx source_extension_reg = see_get_extension_reg (extension, 0);
1408 enum rtx_code code;
1409 struct see_mentioned_reg_data d;
1410 int i;
1412 pat = PATTERN (ref);
1413 code = GET_CODE (pat);
1415 if (code == PARALLEL)
1417 for (i = 0; i < XVECLEN (pat, 0); i++)
1419 rtx sub = XVECEXP (pat, 0, i);
1421 if ((GET_CODE (sub) == SET)
1422 && (REG_P (SET_DEST (sub))
1423 || ((GET_CODE (SET_DEST (sub)) == SUBREG)
1424 && (REG_P (SUBREG_REG (SET_DEST (sub))))))
1425 && (REG_P (SET_SRC (sub))
1426 || ((GET_CODE (SET_SRC (sub)) == SUBREG)
1427 && (REG_P (SUBREG_REG (SET_SRC (sub)))))))
1429 /* This is a simple move SET. */
1430 if ((type == DEF_EXTENSION)
1431 && reg_mentioned_p (source_extension_reg, SET_DEST (sub)))
1432 return false;
1434 else
1436 /* This is not a simple move SET.
1437 Check if it uses the source of the extension. */
1438 if (type == USE_EXTENSION)
1440 d.reg = dest_extension_reg;
1441 d.mentioned = false;
1442 note_uses (&sub, see_mentioned_reg, &d);
1443 if (d.mentioned)
1444 return true;
1448 if (type == USE_EXTENSION)
1449 return false;
1451 else
1453 if ((code == SET)
1454 && (REG_P (SET_DEST (pat))
1455 || ((GET_CODE (SET_DEST (pat)) == SUBREG)
1456 && (REG_P (SUBREG_REG (SET_DEST (pat))))))
1457 && (REG_P (SET_SRC (pat))
1458 || ((GET_CODE (SET_SRC (pat)) == SUBREG)
1459 && (REG_P (SUBREG_REG (SET_SRC (pat)))))))
1460 /* This is a simple move SET. */
1461 return false;
1464 return true;
1468 /* Print the register number of the current see_register_properties
1469 structure.
1471 This is a subroutine of see_main called via htab_traverse.
1472 SLOT contains the current see_register_properties structure pointer. */
1474 static int
1475 see_print_register_properties (void **slot, void *b ATTRIBUTE_UNUSED)
1477 struct see_register_properties *prop = *slot;
1479 gcc_assert (prop);
1480 fprintf (dump_file, "Property found for register %d\n", prop->regno);
1481 return 1;
1485 /* Print the extension instruction of the current see_register_properties
1486 structure.
1488 This is a subroutine of see_main called via htab_traverse.
1489 SLOT contains the current see_pre_extension_expr structure pointer. */
1491 static int
1492 see_print_pre_extension_expr (void **slot, void *b ATTRIBUTE_UNUSED)
1494 struct see_pre_extension_expr *pre_extension = *slot;
1496 gcc_assert (pre_extension
1497 && pre_extension->se_insn
1498 && INSN_P (pre_extension->se_insn));
1500 fprintf (dump_file, "Index %d for:\n", pre_extension->bitmap_index);
1501 print_rtl_single (dump_file, pre_extension->se_insn);
1503 return 1;
1507 /* Phase 4 implementation: Commit changes to the insn stream. */
1509 /* Delete the merged def extension.
1511 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1513 SLOT contains the current def extension instruction.
1514 B is the see_ref_s structure pointer. */
1516 static int
1517 see_delete_merged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1519 rtx def_se = *slot;
1521 if (dump_file)
1523 fprintf (dump_file, "Deleting merged def extension:\n");
1524 print_rtl_single (dump_file, def_se);
1527 if (INSN_DELETED_P (def_se))
1528 /* This def extension is an implicit one. No need to delete it since
1529 it is not in the insn stream. */
1530 return 1;
1532 delete_insn (def_se);
1533 return 1;
1537 /* Delete the unmerged def extension.
1539 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1541 SLOT contains the current def extension instruction.
1542 B is the see_ref_s structure pointer. */
1544 static int
1545 see_delete_unmerged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1547 rtx def_se = *slot;
1549 if (dump_file)
1551 fprintf (dump_file, "Deleting unmerged def extension:\n");
1552 print_rtl_single (dump_file, def_se);
1555 delete_insn (def_se);
1556 return 1;
1560 /* Emit the non-redundant use extension to the instruction stream.
1562 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1564 SLOT contains the current use extension instruction.
1565 B is the see_ref_s structure pointer. */
1567 static int
1568 see_emit_use_extension (void **slot, void *b)
1570 rtx use_se = *slot;
1571 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1573 if (INSN_DELETED_P (use_se))
1574 /* This use extension was previously removed according to the lcm
1575 output. */
1576 return 1;
1578 if (dump_file)
1580 fprintf (dump_file, "Inserting use extension:\n");
1581 print_rtl_single (dump_file, use_se);
1584 add_insn_before (use_se, curr_ref_s->insn);
1586 return 1;
1590 /* For each relevant reference:
1591 a. Emit the non-redundant use extensions.
1592 b. Delete the def extensions.
1593 c. Replace the original reference with the merged one (if exists) and add the
1594 move instructions that were generated.
1596 This is a subroutine of see_commit_changes called via splay_tree_foreach.
1598 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
1599 see_ref_s structure. */
1601 static int
1602 see_commit_ref_changes (splay_tree_node stn,
1603 void *data ATTRIBUTE_UNUSED)
1605 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
1606 htab_t unmerged_def_se_hash =
1607 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
1608 htab_t merged_def_se_hash =
1609 ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
1610 rtx ref = ((struct see_ref_s *) (stn->value))->insn;
1611 rtx merged_ref = ((struct see_ref_s *) (stn->value))->merged_insn;
1613 /* Emit the non-redundant use extensions. */
1614 if (use_se_hash)
1615 htab_traverse_noresize (use_se_hash, see_emit_use_extension,
1616 (PTR) (stn->value));
1618 /* Delete the def extensions. */
1619 if (unmerged_def_se_hash)
1620 htab_traverse (unmerged_def_se_hash, see_delete_unmerged_def_extension,
1621 (PTR) (stn->value));
1623 if (merged_def_se_hash)
1624 htab_traverse (merged_def_se_hash, see_delete_merged_def_extension,
1625 (PTR) (stn->value));
1627 /* Replace the original reference with the merged one (if exists) and add the
1628 move instructions that were generated. */
1629 if (merged_ref && !INSN_DELETED_P (ref))
1631 if (dump_file)
1633 fprintf (dump_file, "Replacing orig reference:\n");
1634 print_rtl_single (dump_file, ref);
1635 fprintf (dump_file, "With merged reference:\n");
1636 print_rtl_single (dump_file, merged_ref);
1638 emit_insn_after (merged_ref, ref);
1639 delete_insn (ref);
1642 /* Continue to the next reference. */
1643 return 0;
1647 /* Insert partially redundant expressions on edges to make the expressions fully
1648 redundant.
1650 INDEX_MAP is a mapping of an index to an expression.
1651 Return true if an instruction was insertedon an edge.
1652 Otherwise, return false. */
1654 static bool
1655 see_pre_insert_extensions (struct see_pre_extension_expr **index_map)
1657 int num_edges = NUM_EDGES (edge_list);
1658 int set_size = pre_insert_map[0]->size;
1659 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
1661 int did_insert = 0;
1662 int e;
1663 int i;
1664 int j;
1666 for (e = 0; e < num_edges; e++)
1668 int indx;
1669 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
1671 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
1673 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
1675 for (j = indx; insert && j < (int) pre_extension_num;
1676 j++, insert >>= 1)
1677 if (insert & 1)
1679 struct see_pre_extension_expr *expr = index_map[j];
1680 int idx = expr->bitmap_index;
1681 rtx se_insn = NULL;
1682 edge eg = INDEX_EDGE (edge_list, e);
1684 start_sequence ();
1685 emit_insn (PATTERN (expr->se_insn));
1686 se_insn = get_insns ();
1687 end_sequence ();
1689 if (eg->flags & EDGE_ABNORMAL)
1691 rtx new_insn = NULL;
1693 new_insn = insert_insn_end_bb_new (se_insn, bb);
1694 gcc_assert (new_insn && INSN_P (new_insn));
1696 if (dump_file)
1698 fprintf (dump_file,
1699 "PRE: end of bb %d, insn %d, ",
1700 bb->index, INSN_UID (new_insn));
1701 fprintf (dump_file,
1702 "inserting expression %d\n", idx);
1705 else
1707 insert_insn_on_edge (se_insn, eg);
1709 if (dump_file)
1711 fprintf (dump_file, "PRE: edge (%d,%d), ",
1712 bb->index,
1713 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
1714 fprintf (dump_file, "inserting expression %d\n", idx);
1717 did_insert = true;
1721 return did_insert;
1725 /* Since all the redundant extensions must be anticipatable, they must be a use
1726 extensions. Mark them as deleted. This will prevent them from been emitted
1727 in the first place.
1729 This is a subroutine of see_commit_changes called via htab_traverse.
1731 SLOT contains the current see_pre_extension_expr structure pointer. */
1733 static int
1734 see_pre_delete_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1736 struct see_pre_extension_expr *expr = *slot;
1737 struct see_occr *occr;
1738 int indx = expr->bitmap_index;
1740 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1742 if (TEST_BIT (pre_delete_map[occr->block_num], indx))
1744 /* Mark as deleted. */
1745 INSN_DELETED_P (occr->insn) = 1;
1746 if (dump_file)
1748 fprintf (dump_file,"Redundant extension deleted:\n");
1749 print_rtl_single (dump_file, occr->insn);
1753 return 1;
1757 /* Create the index_map mapping of an index to an expression.
1759 This is a subroutine of see_commit_changes called via htab_traverse.
1761 SLOT contains the current see_pre_extension_expr structure pointer.
1762 B a pointer to see_pre_extension_expr structure pointer. */
1764 static int
1765 see_map_extension (void **slot, void *b)
1767 struct see_pre_extension_expr *expr = *slot;
1768 struct see_pre_extension_expr **index_map =
1769 (struct see_pre_extension_expr **) b;
1771 index_map[expr->bitmap_index] = expr;
1773 return 1;
1777 /* Phase 4 top level function.
1778 In this phase we finally change the instruction stream.
1779 Here we insert extensions at their best placements and delete the
1780 redundant ones according to the output of the LCM. We also replace
1781 some of the instructions according to phase 2 merges results. */
1783 static void
1784 see_commit_changes (void)
1786 struct see_pre_extension_expr **index_map;
1787 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
1788 bool did_insert = false;
1789 int i;
1791 index_map = xcalloc (pre_extension_num,
1792 sizeof (struct see_pre_extension_expr *));
1794 if (dump_file)
1795 fprintf (dump_file,
1796 "* Phase 4: Commit changes to the insn stream. *\n");
1798 /* Produce a mapping of all the pre_extensions. */
1799 htab_traverse (see_pre_extension_hash, see_map_extension, (PTR) index_map);
1801 /* Delete redundant extension. This will prevent them from been emitted in
1802 the first place. */
1803 htab_traverse (see_pre_extension_hash, see_pre_delete_extension, NULL);
1805 /* At this point, we must free the DF object, since the number of basic blocks
1806 may change. */
1807 df_finish (df);
1808 df = NULL;
1810 /* Insert extensions on edges, according to the LCM result. */
1811 did_insert = see_pre_insert_extensions (index_map);
1813 if (did_insert)
1814 commit_edge_insertions ();
1816 /* Commit the rest of the changes. */
1817 for (i = 0; i < last_bb; i++)
1819 if (see_bb_splay_ar[i])
1821 /* Traverse over all the references in the basic block in forward
1822 order. */
1823 splay_tree_foreach (see_bb_splay_ar[i],
1824 see_commit_ref_changes, NULL);
1828 free (index_map);
1832 /* Phase 3 implementation: Eliminate globally redundant extensions. */
1834 /* Analyze the properties of a merged def extension for the LCM and record avail
1835 occurrences.
1837 This is a subroutine of see_analyze_ref_local_prop called
1838 via htab_traverse.
1840 SLOT contains the current def extension instruction.
1841 B is the see_ref_s structure pointer. */
1843 static int
1844 see_analyze_merged_def_local_prop (void **slot, void *b)
1846 rtx def_se = *slot;
1847 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1848 rtx ref = curr_ref_s->insn;
1849 struct see_pre_extension_expr *extension_expr = NULL;
1850 int indx;
1851 int bb_num = BLOCK_NUM (ref);
1852 htab_t curr_bb_hash = NULL;
1853 struct see_register_properties *curr_prop = NULL;
1854 struct see_register_properties **slot_prop = NULL;
1855 struct see_register_properties temp_prop;
1856 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
1857 struct see_occr *curr_occr = NULL;
1858 struct see_occr *tmp_occr = NULL;
1860 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
1861 /* The extension_expr must be found. */
1862 gcc_assert (extension_expr);
1864 curr_bb_hash = see_bb_hash_ar[bb_num];
1865 gcc_assert (curr_bb_hash);
1866 temp_prop.regno = REGNO (dest_extension_reg);
1867 slot_prop =
1868 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1869 &temp_prop, INSERT);
1870 curr_prop = *slot_prop;
1871 gcc_assert (curr_prop);
1873 indx = extension_expr->bitmap_index;
1875 /* Reset the transparency bit. */
1876 RESET_BIT (transp[bb_num], indx);
1877 /* Reset the killed bit. */
1878 RESET_BIT (ae_kill[bb_num], indx);
1880 if (curr_prop->first_se_after_last_def == DF_INSN_LUID (df, ref))
1882 /* Set the available bit. */
1883 SET_BIT (comp[bb_num], indx);
1884 /* Record the available occurrence. */
1885 curr_occr = xmalloc (sizeof (struct see_occr));
1886 curr_occr->next = NULL;
1887 curr_occr->insn = def_se;
1888 curr_occr->block_num = bb_num;
1889 tmp_occr = extension_expr->avail_occr;
1890 if (!tmp_occr)
1891 extension_expr->avail_occr = curr_occr;
1892 else
1894 while (tmp_occr->next)
1895 tmp_occr = tmp_occr->next;
1896 tmp_occr->next = curr_occr;
1900 return 1;
1904 /* Analyze the properties of a unmerged def extension for the LCM.
1906 This is a subroutine of see_analyze_ref_local_prop called
1907 via htab_traverse.
1909 SLOT contains the current def extension instruction.
1910 B is the see_ref_s structure pointer. */
1912 static int
1913 see_analyze_unmerged_def_local_prop (void **slot, void *b)
1915 rtx def_se = *slot;
1916 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1917 rtx ref = curr_ref_s->insn;
1918 struct see_pre_extension_expr *extension_expr = NULL;
1919 int indx;
1920 int bb_num = BLOCK_NUM (ref);
1921 htab_t curr_bb_hash = NULL;
1922 struct see_register_properties *curr_prop = NULL;
1923 struct see_register_properties **slot_prop = NULL;
1924 struct see_register_properties temp_prop;
1925 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
1927 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
1928 /* The extension_expr must be found. */
1929 gcc_assert (extension_expr);
1931 curr_bb_hash = see_bb_hash_ar[bb_num];
1932 gcc_assert (curr_bb_hash);
1933 temp_prop.regno = REGNO (dest_extension_reg);
1934 slot_prop =
1935 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1936 &temp_prop, INSERT);
1937 curr_prop = *slot_prop;
1938 gcc_assert (curr_prop);
1940 indx = extension_expr->bitmap_index;
1942 /* Reset the transparency bit. */
1943 RESET_BIT (transp[bb_num], indx);
1944 /* Set the killed bit. */
1945 SET_BIT (ae_kill[bb_num], indx);
1947 return 1;
1951 /* Analyze the properties of a use extension for the LCM and record anic and
1952 avail occurrences.
1954 This is a subroutine of see_analyze_ref_local_prop called
1955 via htab_traverse.
1957 SLOT contains the current use extension instruction.
1958 B is the see_ref_s structure pointer. */
1960 static int
1961 see_analyze_use_local_prop (void **slot, void *b)
1963 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1964 rtx use_se = *slot;
1965 rtx ref = curr_ref_s->insn;
1966 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
1967 struct see_pre_extension_expr *extension_expr = NULL;
1968 struct see_register_properties *curr_prop = NULL;
1969 struct see_register_properties **slot_prop = NULL;
1970 struct see_register_properties temp_prop;
1971 struct see_occr *curr_occr = NULL;
1972 struct see_occr *tmp_occr = NULL;
1973 htab_t curr_bb_hash = NULL;
1974 int indx;
1975 int bb_num = BLOCK_NUM (ref);
1977 extension_expr = see_seek_pre_extension_expr (use_se, USE_EXTENSION);
1978 /* The extension_expr must be found. */
1979 gcc_assert (extension_expr);
1981 curr_bb_hash = see_bb_hash_ar[bb_num];
1982 gcc_assert (curr_bb_hash);
1983 temp_prop.regno = REGNO (dest_extension_reg);
1984 slot_prop =
1985 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1986 &temp_prop, INSERT);
1987 curr_prop = *slot_prop;
1988 gcc_assert (curr_prop);
1990 indx = extension_expr->bitmap_index;
1992 if (curr_prop->first_se_before_any_def == DF_INSN_LUID (df, ref))
1994 /* Set the anticipatable bit. */
1995 SET_BIT (antloc[bb_num], indx);
1996 /* Record the anticipatable occurrence. */
1997 curr_occr = xmalloc (sizeof (struct see_occr));
1998 curr_occr->next = NULL;
1999 curr_occr->insn = use_se;
2000 curr_occr->block_num = bb_num;
2001 tmp_occr = extension_expr->antic_occr;
2002 if (!tmp_occr)
2003 extension_expr->antic_occr = curr_occr;
2004 else
2006 while (tmp_occr->next)
2007 tmp_occr = tmp_occr->next;
2008 tmp_occr->next = curr_occr;
2010 if (curr_prop->last_def < 0)
2012 /* Set the available bit. */
2013 SET_BIT (comp[bb_num], indx);
2014 /* Record the available occurrence. */
2015 curr_occr = xmalloc (sizeof (struct see_occr));
2016 curr_occr->next = NULL;
2017 curr_occr->insn = use_se;
2018 curr_occr->block_num = bb_num;
2019 tmp_occr = extension_expr->avail_occr;
2020 if (!tmp_occr)
2021 extension_expr->avail_occr = curr_occr;
2022 else
2024 while (tmp_occr->next)
2025 tmp_occr = tmp_occr->next;
2026 tmp_occr->next = curr_occr;
2029 /* Note: there is no need to reset the killed bit since it must be zero at
2030 this point. */
2032 else if (curr_prop->first_se_after_last_def == DF_INSN_LUID (df, ref))
2034 /* Set the available bit. */
2035 SET_BIT (comp[bb_num], indx);
2036 /* Reset the killed bit. */
2037 RESET_BIT (ae_kill[bb_num], indx);
2038 /* Record the available occurrence. */
2039 curr_occr = xmalloc (sizeof (struct see_occr));
2040 curr_occr->next = NULL;
2041 curr_occr->insn = use_se;
2042 curr_occr->block_num = bb_num;
2043 tmp_occr = extension_expr->avail_occr;
2044 if (!tmp_occr)
2045 extension_expr->avail_occr = curr_occr;
2046 else
2048 while (tmp_occr->next)
2049 tmp_occr = tmp_occr->next;
2050 tmp_occr->next = curr_occr;
2053 return 1;
2057 /* Here we traverse over all the merged and unmerged extensions of the reference
2058 and analyze their properties for the LCM.
2060 This is a subroutine of see_execute_LCM called via splay_tree_foreach.
2062 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2063 see_ref_s structure. */
2065 static int
2066 see_analyze_ref_local_prop (splay_tree_node stn,
2067 void *data ATTRIBUTE_UNUSED)
2069 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
2070 htab_t unmerged_def_se_hash =
2071 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
2072 htab_t merged_def_se_hash =
2073 ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
2075 /* Analyze use extensions that were not merged with the reference. */
2076 if (use_se_hash)
2077 htab_traverse_noresize (use_se_hash, see_analyze_use_local_prop,
2078 (PTR) (stn->value));
2080 /* Analyze def extensions that were not merged with the reference. */
2081 if (unmerged_def_se_hash)
2082 htab_traverse (unmerged_def_se_hash, see_analyze_unmerged_def_local_prop,
2083 (PTR) (stn->value));
2085 /* Analyze def extensions that were merged with the reference. */
2086 if (merged_def_se_hash)
2087 htab_traverse (merged_def_se_hash, see_analyze_merged_def_local_prop,
2088 (PTR) (stn->value));
2090 /* Continue to the next definition. */
2091 return 0;
2095 /* Phase 3 top level function.
2096 In this phase, we set the input bit vectors of the LCM according to data
2097 gathered in phase 2.
2098 Then we run the edge based LCM. */
2100 static void
2101 see_execute_LCM (void)
2103 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
2104 int i = 0;
2106 if (dump_file)
2107 fprintf (dump_file,
2108 "* Phase 3: Eliminate globally redundant extensions. *\n");
2110 /* Initialize the global sbitmap vectors. */
2111 transp = sbitmap_vector_alloc (last_bb, pre_extension_num);
2112 comp = sbitmap_vector_alloc (last_bb, pre_extension_num);
2113 antloc = sbitmap_vector_alloc (last_bb, pre_extension_num);
2114 ae_kill = sbitmap_vector_alloc (last_bb, pre_extension_num);
2115 sbitmap_vector_ones (transp, last_bb);
2116 sbitmap_vector_zero (comp, last_bb);
2117 sbitmap_vector_zero (antloc, last_bb);
2118 sbitmap_vector_zero (ae_kill, last_bb);
2120 /* Traverse over all the splay trees of the basic blocks. */
2121 for (i = 0; i < last_bb; i++)
2123 if (see_bb_splay_ar[i])
2125 /* Traverse over all the references in the basic block in forward
2126 order. */
2127 splay_tree_foreach (see_bb_splay_ar[i],
2128 see_analyze_ref_local_prop, NULL);
2132 /* Add fake exit edges before running the lcm. */
2133 add_noreturn_fake_exit_edges ();
2135 /* Run the LCM. */
2136 edge_list = pre_edge_lcm (pre_extension_num, transp, comp, antloc,
2137 ae_kill, &pre_insert_map, &pre_delete_map);
2139 /* Remove the fake edges. */
2140 remove_fake_exit_edges ();
2144 /* Phase 2 implementation: Merge and eliminate locally redundant extensions. */
2146 /* In this function we set the register properties for the register that is
2147 defined and extended in the reference.
2148 The properties are defined in see_register_properties structure which is
2149 allocated per basic bloack and per register.
2150 Later the extension is inserted into the see_pre_extension_hash for the next
2151 phase of the optimization.
2153 This is a subroutine of see_handle_extensions_for_one_ref called
2154 via htab_traverse.
2156 SLOT contains the current def extension instruction.
2157 B is the see_ref_s structure pointer. */
2159 static int
2160 see_set_prop_merged_def (void **slot, void *b)
2162 rtx def_se = *slot;
2163 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2164 rtx insn = curr_ref_s->insn;
2165 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2166 htab_t curr_bb_hash = NULL;
2167 struct see_register_properties *curr_prop = NULL;
2168 struct see_register_properties **slot_prop = NULL;
2169 struct see_register_properties temp_prop;
2170 int ref_luid = DF_INSN_LUID (df, insn);
2172 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2173 if (!curr_bb_hash)
2175 /* The hash doesn't exist yet. Create it. */
2176 curr_bb_hash =
2177 htab_create (10, hash_descriptor_properties, eq_descriptor_properties,
2178 hash_del_properties);
2179 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2182 /* Find the right register properties in the right basic block. */
2183 temp_prop.regno = REGNO (dest_extension_reg);
2184 slot_prop =
2185 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2186 &temp_prop, INSERT);
2188 if (slot_prop && (*slot_prop != NULL))
2190 /* Property already exists. */
2191 curr_prop = *slot_prop;
2192 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2194 curr_prop->last_def = ref_luid;
2195 curr_prop->first_se_after_last_def = ref_luid;
2197 else
2199 /* Property doesn't exist yet. */
2200 curr_prop = xmalloc (sizeof (struct see_register_properties));
2201 curr_prop->regno = REGNO (dest_extension_reg);
2202 curr_prop->last_def = ref_luid;
2203 curr_prop->first_se_before_any_def = -1;
2204 curr_prop->first_se_after_last_def = ref_luid;
2205 *slot_prop = curr_prop;
2208 /* Insert the def_se into see_pre_extension_hash if it isn't already
2209 there. */
2210 see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
2212 return 1;
2216 /* In this function we set the register properties for the register that is
2217 defined but not extended in the reference.
2218 The properties are defined in see_register_properties structure which is
2219 allocated per basic bloack and per register.
2220 Later the extension is inserted into the see_pre_extension_hash for the next
2221 phase of the optimization.
2223 This is a subroutine of see_handle_extensions_for_one_ref called
2224 via htab_traverse.
2226 SLOT contains the current def extension instruction.
2227 B is the see_ref_s structure pointer. */
2229 static int
2230 see_set_prop_unmerged_def (void **slot, void *b)
2232 rtx def_se = *slot;
2233 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2234 rtx insn = curr_ref_s->insn;
2235 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2236 htab_t curr_bb_hash = NULL;
2237 struct see_register_properties *curr_prop = NULL;
2238 struct see_register_properties **slot_prop = NULL;
2239 struct see_register_properties temp_prop;
2240 int ref_luid = DF_INSN_LUID (df, insn);
2242 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2243 if (!curr_bb_hash)
2245 /* The hash doesn't exist yet. Create it. */
2246 curr_bb_hash =
2247 htab_create (10, hash_descriptor_properties, eq_descriptor_properties,
2248 hash_del_properties);
2249 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2252 /* Find the right register properties in the right basic block. */
2253 temp_prop.regno = REGNO (dest_extension_reg);
2254 slot_prop =
2255 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2256 &temp_prop, INSERT);
2258 if (slot_prop && (*slot_prop != NULL))
2260 /* Property already exists. */
2261 curr_prop = *slot_prop;
2262 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2264 curr_prop->last_def = ref_luid;
2265 curr_prop->first_se_after_last_def = -1;
2267 else
2269 /* Property doesn't exist yet. */
2270 curr_prop = xmalloc (sizeof (struct see_register_properties));
2271 curr_prop->regno = REGNO (dest_extension_reg);
2272 curr_prop->last_def = ref_luid;
2273 curr_prop->first_se_before_any_def = -1;
2274 curr_prop->first_se_after_last_def = -1;
2275 *slot_prop = curr_prop;
2278 /* Insert the def_se into see_pre_extension_hash if it isn't already
2279 there. */
2280 see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
2282 return 1;
2286 /* In this function we set the register properties for the register that is used
2287 in the reference.
2288 The properties are defined in see_register_properties structure which is
2289 allocated per basic bloack and per register.
2290 When a redundant use extension is found it is removed from the hash of the
2291 reference.
2292 If the extension is non redundant it is inserted into the
2293 see_pre_extension_hash for the next phase of the optimization.
2295 This is a subroutine of see_handle_extensions_for_one_ref called
2296 via htab_traverse.
2298 SLOT contains the current use extension instruction.
2299 B is the see_ref_s structure pointer. */
2301 static int
2302 see_set_prop_unmerged_use (void **slot, void *b)
2304 rtx use_se = *slot;
2305 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2306 rtx insn = curr_ref_s->insn;
2307 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
2308 htab_t curr_bb_hash = NULL;
2309 struct see_register_properties *curr_prop = NULL;
2310 struct see_register_properties **slot_prop = NULL;
2311 struct see_register_properties temp_prop;
2312 bool locally_redundant = false;
2313 int ref_luid = DF_INSN_LUID (df, insn);
2315 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2316 if (!curr_bb_hash)
2318 /* The hash doesn't exist yet. Create it. */
2319 curr_bb_hash =
2320 htab_create (10, hash_descriptor_properties, eq_descriptor_properties,
2321 hash_del_properties);
2322 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2325 /* Find the right register properties in the right basic block. */
2326 temp_prop.regno = REGNO (dest_extension_reg);
2327 slot_prop =
2328 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2329 &temp_prop, INSERT);
2331 if (slot_prop && (*slot_prop != NULL))
2333 /* Property already exists. */
2334 curr_prop = *slot_prop;
2335 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2338 if ((curr_prop->last_def < 0) && (curr_prop->first_se_before_any_def < 0))
2339 curr_prop->first_se_before_any_def = ref_luid;
2340 else if ((curr_prop->last_def < 0)
2341 && (curr_prop->first_se_before_any_def >= 0))
2343 /* In this case the extension is localy redundant. */
2344 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2345 locally_redundant = true;
2347 else if ((curr_prop->last_def >= 0)
2348 && (curr_prop->first_se_after_last_def < 0))
2349 curr_prop->first_se_after_last_def = ref_luid;
2350 else if ((curr_prop->last_def >= 0)
2351 && (curr_prop->first_se_after_last_def >= 0))
2353 /* In this case the extension is localy redundant. */
2354 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2355 locally_redundant = true;
2357 else
2358 gcc_unreachable ();
2360 else
2362 /* Property doesn't exist yet. Create a new one. */
2363 curr_prop = xmalloc (sizeof (struct see_register_properties));
2364 curr_prop->regno = REGNO (dest_extension_reg);
2365 curr_prop->last_def = -1;
2366 curr_prop->first_se_before_any_def = ref_luid;
2367 curr_prop->first_se_after_last_def = -1;
2368 *slot_prop = curr_prop;
2371 /* Insert the use_se into see_pre_extension_hash if it isn't already
2372 there. */
2373 if (!locally_redundant)
2374 see_seek_pre_extension_expr (use_se, USE_EXTENSION);
2375 if (locally_redundant && dump_file)
2377 fprintf (dump_file, "Locally redundant extension:\n");
2378 print_rtl_single (dump_file, use_se);
2380 return 1;
2384 /* Print an extension instruction.
2386 This is a subroutine of see_handle_extensions_for_one_ref called
2387 via htab_traverse.
2388 SLOT contains the extension instruction. */
2390 static int
2391 see_print_one_extension (void **slot, void *b ATTRIBUTE_UNUSED)
2393 rtx def_se = *slot;
2395 gcc_assert (def_se && INSN_P (def_se));
2396 print_rtl_single (dump_file, def_se);
2398 return 1;
2401 /* Function called by note_uses to replace used subexpressions.
2403 X is a pointer to the subexpression and DATA is a pointer to a
2404 see_replace_data structure that contains the data for the replacement. */
2406 static void
2407 see_replace_src (rtx *x, void *data)
2409 struct see_replace_data *d
2410 = (struct see_replace_data *) data;
2412 *x = replace_rtx (*x, d->from, d->to);
2416 /* At this point the pattern is expected to be:
2418 ref: set (dest_reg) (rhs)
2419 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2421 The merge of these two instructions didn't succeed.
2423 We try to generate the pattern:
2424 set (subreg (dest_extension_reg)) (rhs)
2426 We do this in 4 steps:
2427 a. Replace every use of dest_reg with a new preudo register.
2428 b. Replace every instance of dest_reg with the subreg.
2429 c. Replace every use of the new pseudo register back to dest_reg.
2430 d. Try to recognize and simplify.
2432 If the manipulation failed, leave the original ref but try to generate and
2433 recognize a simple move instruction:
2434 set (subreg (dest_extension_reg)) (dest_reg)
2435 This move instruction will be emitted right after the ref to the instruction
2436 stream and assure the correctness of the code after def_se will be removed.
2438 CURR_REF_S is the current reference.
2439 DEF_SE is the extension that couldn't be merged. */
2441 static void
2442 see_def_extension_not_merged (struct see_ref_s *curr_ref_s, rtx def_se)
2444 struct see_replace_data d;
2445 /* If the original insn was already merged with an extension before,
2446 take the merged one. */
2447 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn :
2448 curr_ref_s->insn;
2449 rtx merged_ref_next = (curr_ref_s->merged_insn) ?
2450 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX;
2451 rtx ref_copy = copy_rtx (ref);
2452 rtx dest_reg = NULL;
2453 rtx dest_real_reg = NULL;
2454 rtx source_extension_reg = see_get_extension_reg (def_se, 0);
2455 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2456 rtx new_pseudo_reg = NULL;
2457 rtx subreg = NULL;
2458 rtx move_insn = NULL;
2459 rtx set = NULL;
2460 rtx rhs = NULL;
2461 enum machine_mode source_extension_mode = GET_MODE (source_extension_reg);
2462 enum machine_mode dest_mode;
2464 set = single_set (def_se);
2465 gcc_assert (set);
2466 rhs = SET_SRC (set);
2467 gcc_assert ((GET_CODE (rhs) == SIGN_EXTEND)
2468 || (GET_CODE (rhs) == ZERO_EXTEND));
2469 dest_reg = XEXP (rhs, 0);
2470 gcc_assert (REG_P (dest_reg)
2471 || ((GET_CODE (dest_reg) == SUBREG)
2472 && REG_P (SUBREG_REG (dest_reg))));
2473 dest_real_reg = REG_P (dest_reg) ? dest_reg : SUBREG_REG (dest_reg);
2474 dest_mode = GET_MODE (dest_reg);
2476 subreg = gen_lowpart_SUBREG (dest_mode, dest_extension_reg);
2477 new_pseudo_reg = gen_reg_rtx (source_extension_mode);
2479 /* Step a: Replace every use of dest_real_reg with a new preudo register. */
2480 d.from = dest_real_reg;
2481 d.to = new_pseudo_reg;
2482 note_uses (&PATTERN (ref_copy), see_replace_src, &d);
2483 /* Step b: Replace every instance of dest_reg with the subreg. */
2484 ref_copy = replace_rtx (ref_copy, dest_reg, subreg);
2486 /* Step c: Replace every use of the new pseudo register back to
2487 dest_real_reg. */
2488 d.from = new_pseudo_reg;
2489 d.to = dest_real_reg;
2490 note_uses (&PATTERN (ref_copy), see_replace_src, &d);
2492 if (rtx_equal_p (PATTERN (ref), PATTERN (ref_copy))
2493 || insn_invalid_p (ref_copy))
2495 /* The manipulation failed. */
2497 /* Create a new copy. */
2498 ref_copy = copy_rtx (ref);
2500 /* Create a simple move instruction that will replace the def_se. */
2501 start_sequence ();
2502 emit_move_insn (subreg, dest_reg);
2503 move_insn = get_insns ();
2504 end_sequence ();
2506 /* Link the manipulated instruction to the newly created move instruction
2507 and to the former created move instructions. */
2508 PREV_INSN (ref_copy) = NULL_RTX;
2509 NEXT_INSN (ref_copy) = move_insn;
2510 PREV_INSN (move_insn) = ref_copy;
2511 NEXT_INSN (move_insn) = merged_ref_next;
2512 if (merged_ref_next != NULL_RTX)
2513 PREV_INSN (merged_ref_next) = move_insn;
2514 curr_ref_s->merged_insn = ref_copy;
2516 if (dump_file)
2518 fprintf (dump_file, "Following def merge failure a move ");
2519 fprintf (dump_file, "insn was added after the ref.\n");
2520 fprintf (dump_file, "Original ref:\n");
2521 print_rtl_single (dump_file, ref);
2522 fprintf (dump_file, "Move insn that was added:\n");
2523 print_rtl_single (dump_file, move_insn);
2525 return;
2528 /* The manipulation succeeded. Store the new manipulated reference. */
2530 /* Try to simplify the new manipulated insn. */
2531 validate_simplify_insn (ref_copy);
2533 /* Create a simple move instruction to assure the correctness of the code. */
2534 start_sequence ();
2535 emit_move_insn (dest_reg, subreg);
2536 move_insn = get_insns ();
2537 end_sequence ();
2539 /* Link the manipulated instruction to the newly created move instruction and
2540 to the former created move instructions. */
2541 PREV_INSN (ref_copy) = NULL_RTX;
2542 NEXT_INSN (ref_copy) = move_insn;
2543 PREV_INSN (move_insn) = ref_copy;
2544 NEXT_INSN (move_insn) = merged_ref_next;
2545 if (merged_ref_next != NULL_RTX)
2546 PREV_INSN (merged_ref_next) = move_insn;
2547 curr_ref_s->merged_insn = ref_copy;
2549 if (dump_file)
2551 fprintf (dump_file, "Following merge failure the ref was transformed!\n");
2552 fprintf (dump_file, "Original ref:\n");
2553 print_rtl_single (dump_file, ref);
2554 fprintf (dump_file, "Transformed ref:\n");
2555 print_rtl_single (dump_file, ref_copy);
2556 fprintf (dump_file, "Move insn that was added:\n");
2557 print_rtl_single (dump_file, move_insn);
2562 /* Merge the reference instruction (ref) with the current use extension.
2564 use_se extends a NARROWmode register to a WIDEmode register.
2565 ref uses the WIDEmode register.
2567 The pattern we try to merge is this:
2568 use_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2569 ref: use (dest_extension_reg)
2571 where dest_extension_reg and source_extension_reg can be subregs.
2573 The merge is done by generating, simplifying and recognizing the pattern:
2574 use (sign/zero_extend (source_extension_reg))
2576 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2577 we don't try to merge it with use_se and we continue as if the merge failed.
2579 This is a subroutine of see_handle_extensions_for_one_ref called
2580 via htab_traverse.
2581 SLOT contains the current use extension instruction.
2582 B is the see_ref_s structure pointer. */
2584 static int
2585 see_merge_one_use_extension (void **slot, void *b)
2587 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2588 rtx use_se = *slot;
2589 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn :
2590 curr_ref_s->insn;
2591 rtx merged_ref_next = (curr_ref_s->merged_insn) ?
2592 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX;
2593 rtx ref_copy = copy_rtx (ref);
2594 rtx extension_set = single_set (use_se);
2595 rtx extension_rhs = NULL;
2596 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
2597 rtx note = NULL;
2598 rtx simplified_note = NULL;
2600 gcc_assert (use_se && curr_ref_s && extension_set);
2602 extension_rhs = SET_SRC (extension_set);
2604 /* In REG_EQUIV and REG_EQUAL notes that mention the register we need to
2605 replace the uses of the dest_extension_reg with the rhs of the extension
2606 instruction. This is necessary since there might not be an extension in
2607 the path between the definition and the note when this optimization is
2608 over. */
2609 note = find_reg_equal_equiv_note (ref_copy);
2610 if (note)
2612 simplified_note = simplify_replace_rtx (XEXP (note, 0),
2613 dest_extension_reg,
2614 extension_rhs);
2615 if (rtx_equal_p (XEXP (note, 0), simplified_note))
2616 /* Replacement failed. Remove the note. */
2617 remove_note (ref_copy, note);
2618 else
2619 XEXP (note, 0) = simplified_note;
2622 if (!see_want_to_be_merged_with_extension (ref, use_se, USE_EXTENSION))
2624 /* The use in the reference is too simple. Don't try to merge. */
2625 if (dump_file)
2627 fprintf (dump_file, "Use merge skipped!\n");
2628 fprintf (dump_file, "Original instructions:\n");
2629 print_rtl_single (dump_file, use_se);
2630 print_rtl_single (dump_file, ref);
2632 /* Don't remove the current use_se from the use_se_hash and continue to
2633 the next extension. */
2634 return 1;
2637 validate_replace_src_group (dest_extension_reg, extension_rhs, ref_copy);
2639 if (!num_changes_pending ())
2640 /* In this case this is not a real use (the only use is/was in the notes
2641 list). Remove the use extension from the hash. This will prevent it
2642 from been emitted in the first place. */
2644 if (dump_file)
2646 fprintf (dump_file, "Use extension not necessary before:\n");
2647 print_rtl_single (dump_file, ref);
2649 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2650 PREV_INSN (ref_copy) = NULL_RTX;
2651 NEXT_INSN (ref_copy) = merged_ref_next;
2652 if (merged_ref_next != NULL_RTX)
2653 PREV_INSN (merged_ref_next) = ref_copy;
2654 curr_ref_s->merged_insn = ref_copy;
2655 return 1;
2658 if (!apply_change_group ())
2660 /* The merge failed. */
2661 if (dump_file)
2663 fprintf (dump_file, "Use merge failed!\n");
2664 fprintf (dump_file, "Original instructions:\n");
2665 print_rtl_single (dump_file, use_se);
2666 print_rtl_single (dump_file, ref);
2668 /* Don't remove the current use_se from the use_se_hash and continue to
2669 the next extension. */
2670 return 1;
2673 /* The merge succeeded! */
2675 /* Try to simplify the new merged insn. */
2676 validate_simplify_insn (ref_copy);
2678 PREV_INSN (ref_copy) = NULL_RTX;
2679 NEXT_INSN (ref_copy) = merged_ref_next;
2680 if (merged_ref_next != NULL_RTX)
2681 PREV_INSN (merged_ref_next) = ref_copy;
2682 curr_ref_s->merged_insn = ref_copy;
2684 if (dump_file)
2686 fprintf (dump_file, "Use merge succeeded!\n");
2687 fprintf (dump_file, "Original instructions:\n");
2688 print_rtl_single (dump_file, use_se);
2689 print_rtl_single (dump_file, ref);
2690 fprintf (dump_file, "Merged instruction:\n");
2691 print_rtl_single (dump_file, ref_copy);
2694 /* Remove the current use_se from the use_se_hash. This will prevent it from
2695 been emitted in the first place. */
2696 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2697 return 1;
2701 /* Merge the reference instruction (ref) with the extension that follows it
2702 in the same basic block (def_se).
2703 ref sets a NARROWmode register and def_se extends it to WIDEmode register.
2705 The pattern we try to merge is this:
2706 ref: set (dest_reg) (rhs)
2707 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2709 where dest_reg and source_extension_reg can both be subregs (togather)
2710 and (REGNO (dest_reg) == REGNO (source_extension_reg))
2712 The merge is done by generating, simplifying and recognizing the pattern:
2713 set (dest_extension_reg) (sign/zero_extend (rhs))
2714 If ref is a parallel instruction we just replace the relevant set in it.
2716 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2717 we don't try to merge it with def_se and we continue as if the merge failed.
2719 This is a subroutine of see_handle_extensions_for_one_ref called
2720 via htab_traverse.
2722 SLOT contains the current def extension instruction.
2723 B is the see_ref_s structure pointer. */
2725 static int
2726 see_merge_one_def_extension (void **slot, void *b)
2728 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2729 rtx def_se = *slot;
2730 /* If the original insn was already merged with an extension before,
2731 take the merged one. */
2732 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn :
2733 curr_ref_s->insn;
2734 rtx merged_ref_next = (curr_ref_s->merged_insn) ?
2735 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX;
2736 rtx ref_copy = copy_rtx (ref);
2737 rtx new_set = NULL;
2738 rtx source_extension_reg = see_get_extension_reg (def_se, 0);
2739 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2740 rtx move_insn = NULL;
2741 rtx *rtx_slot = NULL;
2742 rtx subreg = NULL;
2743 rtx temp_extension = NULL;
2744 rtx simplified_temp_extension = NULL;
2745 rtx *pat = NULL;
2746 enum rtx_code code;
2747 enum rtx_code extension_code;
2748 enum machine_mode source_extension_mode;
2749 enum machine_mode source_mode;
2750 enum machine_mode dest_extension_mode;
2751 bool merge_success = false;
2752 int i;
2754 gcc_assert (def_se
2755 && INSN_P (def_se)
2756 && curr_ref_s
2757 && ref
2758 && INSN_P (ref));
2760 if (!see_want_to_be_merged_with_extension (ref, def_se, DEF_EXTENSION))
2762 /* The definition in the reference is too simple. Don't try to merge. */
2763 if (dump_file)
2765 fprintf (dump_file, "Def merge skipped!\n");
2766 fprintf (dump_file, "Original instructions:\n");
2767 print_rtl_single (dump_file, ref);
2768 print_rtl_single (dump_file, def_se);
2771 see_def_extension_not_merged (curr_ref_s, def_se);
2772 /* Continue to the next extension. */
2773 return 1;
2776 extension_code = see_get_extension_data (def_se, &source_mode);
2778 /* Try to merge and simplify the extension. */
2779 source_extension_mode = GET_MODE (source_extension_reg);
2780 dest_extension_mode = GET_MODE (dest_extension_reg);
2782 pat = &PATTERN (ref_copy);
2783 code = GET_CODE (*pat);
2785 if (code == PARALLEL)
2787 bool need_to_apply_change = false;
2789 for (i = 0; i < XVECLEN (*pat, 0); i++)
2791 rtx *sub = &XVECEXP (*pat, 0, i);
2793 if ((GET_CODE (*sub) == SET)
2794 && (GET_MODE (SET_SRC (*sub)) != VOIDmode)
2795 && (GET_MODE (SET_DEST (*sub)) == source_mode)
2796 && ((REG_P (SET_DEST (*sub))
2797 && (REGNO (SET_DEST (*sub)) == REGNO (source_extension_reg)))
2798 || ((GET_CODE (SET_DEST (*sub)) == SUBREG)
2799 && (REG_P (SUBREG_REG (SET_DEST (*sub))))
2800 && (REGNO (SUBREG_REG (SET_DEST (*sub))) ==
2801 REGNO (source_extension_reg)))))
2803 rtx orig_src = SET_SRC (*sub);
2805 if (extension_code == SIGN_EXTEND)
2806 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode,
2807 orig_src);
2808 else
2809 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode,
2810 orig_src);
2811 simplified_temp_extension = simplify_rtx (temp_extension);
2812 temp_extension =
2813 (simplified_temp_extension) ? simplified_temp_extension :
2814 temp_extension;
2815 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg,
2816 temp_extension);
2817 validate_change (ref_copy, sub, new_set, 1);
2818 need_to_apply_change = true;
2821 if (need_to_apply_change)
2822 if (apply_change_group ())
2823 merge_success = true;
2825 else if ((code == SET)
2826 && (GET_MODE (SET_SRC (*pat)) != VOIDmode)
2827 && (GET_MODE (SET_DEST (*pat)) == source_mode)
2828 && ((REG_P (SET_DEST (*pat))
2829 && REGNO (SET_DEST (*pat)) == REGNO (source_extension_reg))
2830 || ((GET_CODE (SET_DEST (*pat)) == SUBREG)
2831 && (REG_P (SUBREG_REG (SET_DEST (*pat))))
2832 && (REGNO (SUBREG_REG (SET_DEST (*pat))) ==
2833 REGNO (source_extension_reg)))))
2835 rtx orig_src = SET_SRC (*pat);
2837 if (extension_code == SIGN_EXTEND)
2838 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode, orig_src);
2839 else
2840 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode, orig_src);
2841 simplified_temp_extension = simplify_rtx (temp_extension);
2842 temp_extension = (simplified_temp_extension) ? simplified_temp_extension :
2843 temp_extension;
2844 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg, temp_extension);
2845 if (validate_change (ref_copy, pat, new_set, 0))
2846 merge_success = true;
2848 if (!merge_success)
2850 /* The merge failed. */
2851 if (dump_file)
2853 fprintf (dump_file, "Def merge failed!\n");
2854 fprintf (dump_file, "Original instructions:\n");
2855 print_rtl_single (dump_file, ref);
2856 print_rtl_single (dump_file, def_se);
2859 see_def_extension_not_merged (curr_ref_s, def_se);
2860 /* Continue to the next extension. */
2861 return 1;
2864 /* The merge succeeded! */
2866 /* Create a simple move instruction to assure the correctness of the code. */
2867 subreg = gen_lowpart_SUBREG (source_extension_mode, dest_extension_reg);
2868 start_sequence ();
2869 emit_move_insn (source_extension_reg, subreg);
2870 move_insn = get_insns ();
2871 end_sequence ();
2873 /* Link the merged instruction to the newly created move instruction and
2874 to the former created move instructions. */
2875 PREV_INSN (ref_copy) = NULL_RTX;
2876 NEXT_INSN (ref_copy) = move_insn;
2877 PREV_INSN (move_insn) = ref_copy;
2878 NEXT_INSN (move_insn) = merged_ref_next;
2879 if (merged_ref_next != NULL_RTX)
2880 PREV_INSN (merged_ref_next) = move_insn;
2881 curr_ref_s->merged_insn = ref_copy;
2883 if (dump_file)
2885 fprintf (dump_file, "Def merge succeeded!\n");
2886 fprintf (dump_file, "Original instructions:\n");
2887 print_rtl_single (dump_file, ref);
2888 print_rtl_single (dump_file, def_se);
2889 fprintf (dump_file, "Merged instruction:\n");
2890 print_rtl_single (dump_file, ref_copy);
2891 fprintf (dump_file, "Move instruction that was added:\n");
2892 print_rtl_single (dump_file, move_insn);
2895 /* Remove the current def_se from the unmerged_def_se_hash and insert it to
2896 the merged_def_se_hash. */
2897 htab_clear_slot (curr_ref_s->unmerged_def_se_hash, (PTR *)slot);
2898 if (!curr_ref_s->merged_def_se_hash)
2899 curr_ref_s->merged_def_se_hash =
2900 htab_create (10, hash_descriptor_extension, eq_descriptor_extension,
2901 NULL);
2902 rtx_slot = (rtx *) htab_find_slot (curr_ref_s->merged_def_se_hash,
2903 dest_extension_reg, INSERT);
2904 gcc_assert (*rtx_slot == NULL);
2905 *rtx_slot = def_se;
2907 return 1;
2911 /* Try to eliminate extensions in this order:
2912 a. Try to merge only the def extensions, one by one.
2913 b. Try to merge only the use extensions, one by one.
2915 TODO:
2916 Try to merge any couple of use extensions simultaneously.
2917 Try to merge any def extension with one or two uses extensions
2918 simultaneously.
2920 After all the merges are done, update the register properties for the basic
2921 block and eliminate locally redundant use extensions.
2923 This is a subroutine of see_merge_and_eliminate_extensions called
2924 via splay_tree_foreach.
2925 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2926 see_ref_s structure. */
2928 static int
2929 see_handle_extensions_for_one_ref (splay_tree_node stn,
2930 void *data ATTRIBUTE_UNUSED)
2932 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
2933 htab_t unmerged_def_se_hash =
2934 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
2935 htab_t merged_def_se_hash = NULL;
2936 rtx ref = ((struct see_ref_s *) (stn->value))->insn;
2938 if (dump_file)
2940 fprintf (dump_file, "Handling ref:\n");
2941 print_rtl_single (dump_file, ref);
2944 /* a. Try to eliminate only def extensions, one by one. */
2945 if (unmerged_def_se_hash)
2946 htab_traverse_noresize (unmerged_def_se_hash, see_merge_one_def_extension,
2947 (PTR) (stn->value));
2949 if (use_se_hash)
2950 /* b. Try to eliminate only use extensions, one by one. */
2951 htab_traverse_noresize (use_se_hash, see_merge_one_use_extension,
2952 (PTR) (stn->value));
2954 merged_def_se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
2956 if (dump_file)
2958 fprintf (dump_file, "The hashes of the current reference:\n");
2959 if (unmerged_def_se_hash)
2961 fprintf (dump_file, "unmerged_def_se_hash:\n");
2962 htab_traverse (unmerged_def_se_hash, see_print_one_extension, NULL);
2964 if (merged_def_se_hash)
2966 fprintf (dump_file, "merged_def_se_hash:\n");
2967 htab_traverse (merged_def_se_hash, see_print_one_extension, NULL);
2969 if (use_se_hash)
2971 fprintf (dump_file, "use_se_hash:\n");
2972 htab_traverse (use_se_hash, see_print_one_extension, NULL);
2976 /* Now that all the merges are done, update the register properties of the
2977 basic block and eliminate locally redundant extensions.
2978 It is important that we first traverse the use extensions hash and
2979 afterwards the def extensions hashes. */
2981 if (use_se_hash)
2982 htab_traverse_noresize (use_se_hash, see_set_prop_unmerged_use,
2983 (PTR) (stn->value));
2985 if (unmerged_def_se_hash)
2986 htab_traverse (unmerged_def_se_hash, see_set_prop_unmerged_def,
2987 (PTR) (stn->value));
2989 if (merged_def_se_hash)
2990 htab_traverse (merged_def_se_hash, see_set_prop_merged_def,
2991 (PTR) (stn->value));
2993 /* Continue to the next definition. */
2994 return 0;
2998 /* Phase 2 top level function.
2999 In this phase, we try to merge def extensions and use extensions with their
3000 references, and eliminate redundant extensions in the same basic block.
3001 We also gather information for the next phases. */
3003 static void
3004 see_merge_and_eliminate_extensions (void)
3006 int i = 0;
3008 if (dump_file)
3009 fprintf (dump_file,
3010 "* Phase 2: Merge and eliminate locally redundant extensions. *\n");
3012 /* Traverse over all the splay trees of the basic blocks. */
3013 for (i = 0; i < last_bb; i++)
3015 if (see_bb_splay_ar[i])
3017 if (dump_file)
3018 fprintf (dump_file, "Handling references for bb %d\n", i);
3019 /* Traverse over all the references in the basic block in forward
3020 order. */
3021 splay_tree_foreach (see_bb_splay_ar[i],
3022 see_handle_extensions_for_one_ref, NULL);
3028 /* Phase 1 implementation: Propagate extensions to uses. */
3030 /* Insert REF_INSN into the splay tree of its basic block.
3031 SE_INSN is the extension to store in the proper hash according to TYPE.
3033 Return true if everything went well.
3034 Otherwise, return false (this will cause the optimization to be aborted). */
3036 static bool
3037 see_store_reference_and_extension (rtx ref_insn, rtx se_insn,
3038 enum extension_type type)
3040 rtx *rtx_slot = NULL;
3041 int curr_bb_num;
3042 splay_tree_node stn = NULL;
3043 htab_t se_hash = NULL;
3044 struct see_ref_s *ref_s = NULL;
3046 /* Check the arguments. */
3047 gcc_assert (ref_insn && se_insn);
3048 if (!see_bb_splay_ar)
3049 return false;
3051 curr_bb_num = BLOCK_NUM (ref_insn);
3052 gcc_assert ((curr_bb_num < last_bb) && (curr_bb_num >= 0));
3054 /* Insert the reference to the splay tree of its basic block. */
3055 if (!see_bb_splay_ar[curr_bb_num])
3056 /* The splay tree for this block doesn't exist yet, create it. */
3057 see_bb_splay_ar[curr_bb_num] = splay_tree_new (splay_tree_compare_ints,
3058 NULL, see_free_ref_s);
3059 else
3060 /* Splay tree already exists, check if the current reference is already
3061 in it. */
3063 stn = splay_tree_lookup (see_bb_splay_ar[curr_bb_num],
3064 DF_INSN_LUID (df, ref_insn));
3065 if (stn)
3067 switch (type)
3069 case EXPLICIT_DEF_EXTENSION:
3070 se_hash =
3071 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
3072 if (!se_hash)
3074 se_hash = htab_create (10, hash_descriptor_extension,
3075 eq_descriptor_extension, NULL);
3076 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash =
3077 se_hash;
3079 break;
3080 case IMPLICIT_DEF_EXTENSION:
3081 se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
3082 if (!se_hash)
3084 se_hash = htab_create (10, hash_descriptor_extension,
3085 eq_descriptor_extension, NULL);
3086 ((struct see_ref_s *) (stn->value))->merged_def_se_hash =
3087 se_hash;
3089 break;
3090 case USE_EXTENSION:
3091 se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
3092 if (!se_hash)
3094 se_hash = htab_create (10, hash_descriptor_extension,
3095 eq_descriptor_extension, NULL);
3096 ((struct see_ref_s *) (stn->value))->use_se_hash = se_hash;
3098 break;
3099 default:
3100 gcc_unreachable ();
3105 /* Initialize a new see_ref_s structure and insert it to the splay
3106 tree. */
3107 if (!stn)
3109 ref_s = xmalloc (sizeof (struct see_ref_s));
3110 ref_s->luid = DF_INSN_LUID (df, ref_insn);
3111 ref_s->insn = ref_insn;
3112 ref_s->merged_insn = NULL;
3114 /* Initialize the hashes. */
3115 switch (type)
3117 case EXPLICIT_DEF_EXTENSION:
3118 ref_s->unmerged_def_se_hash =
3119 htab_create (10, hash_descriptor_extension, eq_descriptor_extension,
3120 NULL);
3121 se_hash = ref_s->unmerged_def_se_hash;
3122 ref_s->merged_def_se_hash = NULL;
3123 ref_s->use_se_hash = NULL;
3124 break;
3125 case IMPLICIT_DEF_EXTENSION:
3126 ref_s->merged_def_se_hash =
3127 htab_create (10, hash_descriptor_extension, eq_descriptor_extension,
3128 NULL);
3129 se_hash = ref_s->merged_def_se_hash;
3130 ref_s->unmerged_def_se_hash = NULL;
3131 ref_s->use_se_hash = NULL;
3132 break;
3133 case USE_EXTENSION:
3134 ref_s->use_se_hash =
3135 htab_create (10, hash_descriptor_extension, eq_descriptor_extension,
3136 NULL);
3137 se_hash = ref_s->use_se_hash;
3138 ref_s->unmerged_def_se_hash = NULL;
3139 ref_s->merged_def_se_hash = NULL;
3140 break;
3141 default:
3142 gcc_unreachable ();
3146 /* Insert the new extension instruction into the correct se_hash of the
3147 current reference. */
3148 rtx_slot = (rtx *) htab_find_slot (se_hash, se_insn, INSERT);
3149 if (*rtx_slot != NULL)
3151 gcc_assert (type == USE_EXTENSION);
3152 gcc_assert (rtx_equal_p (PATTERN (*rtx_slot), PATTERN (se_insn)));
3154 else
3155 *rtx_slot = se_insn;
3157 /* If this is a new reference, insert it into the splay_tree. */
3158 if (!stn)
3159 splay_tree_insert (see_bb_splay_ar[curr_bb_num],
3160 DF_INSN_LUID (df, ref_insn), (splay_tree_value) ref_s);
3161 return true;
3165 /* Go over all the defs, for each relevant definition (defined below) store its
3166 instruction as a reference.
3168 A definition is relevant if its root has
3169 ((entry_type == SIGN_EXTENDED_DEF) || (entry_type == ZERO_EXTENDED_DEF)) and
3170 his source_mode is not narrower then the the roots source_mode.
3172 Return the number of relevant defs or negative number if something bad had
3173 happened and the optimization should be aborted. */
3175 static int
3176 see_handle_relevant_defs (void)
3178 rtx insn = NULL;
3179 rtx se_insn = NULL;
3180 rtx reg = NULL;
3181 rtx ref_insn = NULL;
3182 struct web_entry *root_entry = NULL;
3183 unsigned int i;
3184 int num_relevant_defs = 0;
3185 enum rtx_code extension_code;
3187 for (i = 0; i < defs_num; i++)
3189 insn = DF_REF_INSN (DF_DEFS_GET (df, i));
3190 reg = DF_REF_REAL_REG (DF_DEFS_GET (df, i));
3192 if (!insn)
3193 continue;
3195 if (!INSN_P (insn))
3196 continue;
3198 root_entry = unionfind_root (&def_entry[i]);
3200 if ((ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF)
3201 && (ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF))
3202 /* The current web is not relevant. Continue to the next def. */
3203 continue;
3205 if (root_entry->reg)
3206 /* It isn't possible to have two different register for the same
3207 web. */
3208 gcc_assert (rtx_equal_p (root_entry->reg, reg));
3209 else
3210 root_entry->reg = reg;
3212 /* The current definition is an EXTENDED_DEF or a definition that its
3213 source_mode is narrower then its web's source_mode.
3214 This means that we need to generate the implicit extension explicitly
3215 and store it in the current reference's merged_def_se_hash. */
3216 if ((ENTRY_EI (&def_entry[i])->local_relevancy == EXTENDED_DEF)
3217 || (ENTRY_EI (&def_entry[i])->local_source_mode <
3218 ENTRY_EI (root_entry)->source_mode))
3220 num_relevant_defs++;
3222 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF)
3223 extension_code = SIGN_EXTEND;
3224 else
3225 extension_code = ZERO_EXTEND;
3227 se_insn =
3228 see_gen_normalized_extension (reg, extension_code,
3229 ENTRY_EI (root_entry)->source_mode);
3231 /* This is a dummy extension, mark it as deleted. */
3232 INSN_DELETED_P (se_insn) = 1;
3234 if (!see_store_reference_and_extension (insn, se_insn,
3235 IMPLICIT_DEF_EXTENSION))
3236 /* Something bad happened. Abort the optimization. */
3237 return -1;
3238 continue;
3241 ref_insn = PREV_INSN (insn);
3242 gcc_assert (BLOCK_NUM (ref_insn) == BLOCK_NUM (insn));
3244 num_relevant_defs++;
3246 if (!see_store_reference_and_extension (ref_insn, insn,
3247 EXPLICIT_DEF_EXTENSION))
3248 /* Something bad happened. Abort the optimization. */
3249 return -1;
3251 return num_relevant_defs;
3255 /* Go over all the uses, for each use in relevant web store its instruction as
3256 a reference and generate an extension before it.
3258 Return the number of relevant uses or negative number if something bad had
3259 happened and the optimization should be aborted. */
3261 static int
3262 see_handle_relevant_uses (void)
3264 rtx insn = NULL;
3265 rtx reg = NULL;
3266 struct web_entry *root_entry = NULL;
3267 rtx se_insn = NULL;
3268 unsigned int i;
3269 int num_relevant_uses = 0;
3270 enum rtx_code extension_code;
3272 for (i = 0; i < uses_num; i++)
3274 insn = DF_REF_INSN (DF_USES_GET (df, i));
3275 reg = DF_REF_REAL_REG (DF_USES_GET (df, i));
3277 if (!insn)
3278 continue;
3280 if (!INSN_P (insn))
3281 continue;
3283 root_entry = unionfind_root (&use_entry[i]);
3285 if ((ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF)
3286 && (ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF))
3287 /* The current web is not relevant. Continue to the next use. */
3288 continue;
3290 if (root_entry->reg)
3291 /* It isn't possible to have two different register for the same
3292 web. */
3293 gcc_assert (rtx_equal_p (root_entry->reg, reg));
3294 else
3295 root_entry->reg = reg;
3297 /* Generate the use extension. */
3298 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF)
3299 extension_code = SIGN_EXTEND;
3300 else
3301 extension_code = ZERO_EXTEND;
3303 se_insn =
3304 see_gen_normalized_extension (reg, extension_code,
3305 ENTRY_EI (root_entry)->source_mode);
3306 if (!se_insn)
3307 /* This is very bad, abort the transformation. */
3308 return -1;
3310 num_relevant_uses++;
3312 if (!see_store_reference_and_extension (insn, se_insn,
3313 USE_EXTENSION))
3314 /* Something bad happened. Abort the optimization. */
3315 return -1;
3318 return num_relevant_uses;
3322 /* Updates the relevancy of all the uses.
3323 The information of the i'th use is stored in use_entry[i].
3324 Currently all the uses are relevant for the optimization except for uses that
3325 are in LIBCALL or RETVAL instructions. */
3327 static void
3328 see_update_uses_relevancy (void)
3330 rtx insn = NULL;
3331 rtx reg = NULL;
3332 struct see_entry_extra_info *curr_entry_extra_info;
3333 enum entry_type et;
3334 unsigned int i;
3336 if (!df || !use_entry)
3337 return;
3339 for (i = 0; i < uses_num; i++)
3342 insn = DF_REF_INSN (DF_USES_GET (df, i));
3343 reg = DF_REF_REAL_REG (DF_USES_GET (df, i));
3345 et = RELEVANT_USE;
3347 if (insn)
3349 if (!INSN_P (insn))
3350 et = NOT_RELEVANT;
3351 if (insn && find_reg_note (insn, REG_LIBCALL, NULL_RTX))
3352 et = NOT_RELEVANT;
3353 if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
3354 et = NOT_RELEVANT;
3356 else
3357 et = NOT_RELEVANT;
3359 if (dump_file)
3361 fprintf (dump_file, "u%i insn %i reg %i ",
3362 i, (insn ? INSN_UID (insn) : -1), REGNO (reg));
3363 if (et == NOT_RELEVANT)
3364 fprintf (dump_file, "NOT RELEVANT \n");
3365 else
3366 fprintf (dump_file, "RELEVANT USE \n");
3369 curr_entry_extra_info = xmalloc (sizeof (struct see_entry_extra_info));
3370 curr_entry_extra_info->relevancy = et;
3371 curr_entry_extra_info->local_relevancy = et;
3372 use_entry[i].extra_info = curr_entry_extra_info;
3373 use_entry[i].reg = NULL;
3374 use_entry[i].pred = NULL;
3379 /* A definition in a candidate for this optimization only if its pattern is
3380 recognized as relevant in this function.
3381 INSN is the instruction to be recognized.
3383 - If this is the pattern of a common sign extension after definition:
3384 PREV_INSN (INSN): def (reg:NARROWmode r)
3385 INSN: set ((reg:WIDEmode r')
3386 (sign_extend:WIDEmode (reg:NARROWmode r)))
3387 return SIGN_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3389 - If this is the pattern of a common zero extension after definition:
3390 PREV_INSN (INSN): def (reg:NARROWmode r)
3391 INSN: set ((reg:WIDEmode r')
3392 (zero_extend:WIDEmode (reg:NARROWmode r)))
3393 return ZERO_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3395 - Otherwise,
3397 For the pattern:
3398 INSN: set ((reg:WIDEmode r) (sign_extend:WIDEmode (...expr...)))
3399 return EXTENDED_DEF and set SOURCE_MODE to the mode of expr.
3401 For the pattern:
3402 INSN: set ((reg:WIDEmode r) (zero_extend:WIDEmode (...expr...)))
3403 return EXTENDED_DEF and set SOURCE_MODE_UNSIGNED to the mode of expr.
3405 For the pattern:
3406 INSN: set ((reg:WIDEmode r) (CONST_INT (...)))
3407 return EXTENDED_DEF and set SOURCE_MODE(_UNSIGNED) to the narrowest mode that
3408 is implicitly sign(zero) extended to WIDEmode in the INSN.
3410 - FIXME: Extensions that are not adjacent to their definition and EXTENDED_DEF
3411 that is part of a PARALLEL instruction are not handled.
3412 These restriction can be relaxed. */
3414 static enum entry_type
3415 see_analyze_one_def (rtx insn, enum machine_mode *source_mode,
3416 enum machine_mode *source_mode_unsigned)
3418 enum rtx_code extension_code;
3419 rtx rhs = NULL;
3420 rtx lhs = NULL;
3421 rtx set = NULL;
3422 rtx source_register = NULL;
3423 rtx prev_insn = NULL;
3424 rtx next_insn = NULL;
3425 enum machine_mode mode;
3426 enum machine_mode next_source_mode;
3427 HOST_WIDE_INT val = 0;
3428 HOST_WIDE_INT val2 = 0;
3429 int i = 0;
3431 *source_mode = MAX_MACHINE_MODE;
3432 *source_mode_unsigned = MAX_MACHINE_MODE;
3434 if (!insn)
3435 return NOT_RELEVANT;
3437 if (!INSN_P (insn))
3438 return NOT_RELEVANT;
3440 extension_code = see_get_extension_data (insn, source_mode);
3441 switch (extension_code)
3443 case SIGN_EXTEND:
3444 case ZERO_EXTEND:
3445 source_register = see_get_extension_reg (insn, 0);
3446 /* FIXME: This restriction can be relaxed. The only thing that is
3447 important is that the reference would be inside the same basic block
3448 as the extension. */
3449 prev_insn = PREV_INSN (insn);
3450 if (!prev_insn || !INSN_P (prev_insn))
3451 return NOT_RELEVANT;
3453 if (!reg_set_between_p (source_register, PREV_INSN (prev_insn), insn))
3454 return NOT_RELEVANT;
3456 if (find_reg_note (prev_insn, REG_LIBCALL, NULL_RTX))
3457 return NOT_RELEVANT;
3459 if (find_reg_note (prev_insn, REG_RETVAL, NULL_RTX))
3460 return NOT_RELEVANT;
3462 /* If we can't use copy_rtx on the reference it can't be a reference. */
3463 if ((GET_CODE (PATTERN (prev_insn)) == PARALLEL)
3464 && (asm_noperands (PATTERN (prev_insn)) >= 0))
3465 return NOT_RELEVANT;
3467 /* Now, check if this extension is a reference itself. If so, it is not
3468 relevant. Handling this extension as relevant would make things much
3469 more complicated. */
3470 next_insn = NEXT_INSN (insn);
3471 if (prev_insn
3472 && INSN_P (prev_insn)
3473 && (see_get_extension_data (next_insn, &next_source_mode) !=
3474 NOT_RELEVANT))
3476 rtx curr_dest_register = see_get_extension_reg (insn, 1);
3477 rtx next_source_register = see_get_extension_reg (next_insn, 0);
3479 if (REGNO (curr_dest_register) == REGNO (next_source_register))
3480 return NOT_RELEVANT;
3483 if (extension_code == SIGN_EXTEND)
3484 return SIGN_EXTENDED_DEF;
3485 else
3486 return ZERO_EXTENDED_DEF;
3488 case UNKNOWN:
3489 /* This may still be an EXTENDED_DEF. */
3491 /* FIXME: This restriction can be relaxed. It is possible to handle
3492 PARALLEL insns too. */
3493 set = single_set (insn);
3494 if (!set)
3495 return NOT_RELEVANT;
3496 rhs = SET_SRC (set);
3497 lhs = SET_DEST (set);
3499 /* Don't handle extensions to something other then register or
3500 subregister. */
3501 if (!REG_P (lhs) && !SUBREG_REG (lhs))
3502 return NOT_RELEVANT;
3504 switch (GET_CODE (rhs))
3506 case (SIGN_EXTEND):
3507 *source_mode = GET_MODE (XEXP (rhs, 0));
3508 *source_mode_unsigned = MAX_MACHINE_MODE;
3509 return EXTENDED_DEF;
3510 case (ZERO_EXTEND):
3511 *source_mode = MAX_MACHINE_MODE;
3512 *source_mode_unsigned = GET_MODE (XEXP (rhs, 0));
3513 return EXTENDED_DEF;
3514 case (CONST_INT):
3516 val = INTVAL (rhs);
3518 /* Find the narrowest mode, val could fit into. */
3519 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT), i = 0;
3520 GET_MODE_BITSIZE (mode) < BITS_PER_WORD;
3521 mode = GET_MODE_WIDER_MODE (mode), i++)
3523 val2 = trunc_int_for_mode (val, mode);
3524 if ((val2 == val) && (*source_mode == MAX_MACHINE_MODE))
3525 *source_mode = mode;
3526 if ((val == (val & (HOST_WIDE_INT)GET_MODE_MASK (mode)))
3527 && (*source_mode_unsigned == MAX_MACHINE_MODE))
3528 *source_mode_unsigned = mode;
3529 if ((*source_mode != MAX_MACHINE_MODE)
3530 && (*source_mode_unsigned !=MAX_MACHINE_MODE))
3531 return EXTENDED_DEF;
3533 if ((*source_mode != MAX_MACHINE_MODE)
3534 || (*source_mode_unsigned !=MAX_MACHINE_MODE))
3535 return EXTENDED_DEF;
3536 return NOT_RELEVANT;
3537 default:
3538 return NOT_RELEVANT;
3540 default:
3541 gcc_unreachable ();
3546 /* Updates the relevancy and source_mode of all the definitions.
3547 The information of the i'th definition is stored in def_entry[i]. */
3549 static void
3550 see_update_defs_relevancy (void)
3552 struct see_entry_extra_info *curr_entry_extra_info;
3553 unsigned int i;
3554 rtx insn = NULL;
3555 rtx reg = NULL;
3556 enum entry_type et;
3557 enum machine_mode source_mode;
3558 enum machine_mode source_mode_unsigned;
3560 if (!df || !def_entry)
3561 return;
3563 for (i = 0; i < defs_num; i++)
3565 insn = DF_REF_INSN (DF_DEFS_GET (df, i));
3566 reg = DF_REF_REAL_REG (DF_DEFS_GET (df, i));
3568 et = see_analyze_one_def (insn, &source_mode, &source_mode_unsigned);
3570 curr_entry_extra_info = xmalloc (sizeof (struct see_entry_extra_info));
3571 curr_entry_extra_info->relevancy = et;
3572 curr_entry_extra_info->local_relevancy = et;
3573 if (et != EXTENDED_DEF)
3575 curr_entry_extra_info->source_mode = source_mode;
3576 curr_entry_extra_info->local_source_mode = source_mode;
3578 else
3580 curr_entry_extra_info->source_mode_signed = source_mode;
3581 curr_entry_extra_info->source_mode_unsigned = source_mode_unsigned;
3583 def_entry[i].extra_info = curr_entry_extra_info;
3584 def_entry[i].reg = NULL;
3585 def_entry[i].pred = NULL;
3587 if (dump_file)
3589 if (et == NOT_RELEVANT)
3591 fprintf (dump_file, "d%i insn %i reg %i ",
3592 i, (insn ? INSN_UID (insn) : -1), REGNO (reg));
3593 fprintf (dump_file, "NOT RELEVANT \n");
3595 else
3597 fprintf (dump_file, "d%i insn %i reg %i ",
3598 i ,INSN_UID (insn), REGNO (reg));
3599 fprintf (dump_file, "RELEVANT - ");
3600 switch (et)
3602 case SIGN_EXTENDED_DEF :
3603 fprintf (dump_file, "SIGN_EXTENDED_DEF, source_mode = %s\n",
3604 GET_MODE_NAME (source_mode));
3605 break;
3606 case ZERO_EXTENDED_DEF :
3607 fprintf (dump_file, "ZERO_EXTENDED_DEF, source_mode = %s\n",
3608 GET_MODE_NAME (source_mode));
3609 break;
3610 case EXTENDED_DEF :
3611 fprintf (dump_file, "EXTENDED_DEF, ");
3612 if ((source_mode != MAX_MACHINE_MODE)
3613 && (source_mode_unsigned != MAX_MACHINE_MODE))
3615 fprintf (dump_file, "positive const, ");
3616 fprintf (dump_file, "source_mode_signed = %s, ",
3617 GET_MODE_NAME (source_mode));
3618 fprintf (dump_file, "source_mode_unsigned = %s\n",
3619 GET_MODE_NAME (source_mode_unsigned));
3621 else if (source_mode != MAX_MACHINE_MODE)
3622 fprintf (dump_file, "source_mode_signed = %s\n",
3623 GET_MODE_NAME (source_mode));
3624 else
3625 fprintf (dump_file, "source_mode_unsigned = %s\n",
3626 GET_MODE_NAME (source_mode_unsigned));
3627 break;
3628 default :
3629 gcc_unreachable ();
3637 /* Phase 1 top level function.
3638 In this phase the relevancy of all the definitions and uses are checked,
3639 later the webs are produces and the extensions are generated.
3640 These extensions are not emitted yet into the insns stream.
3642 returns true if at list one relevant web was found and there were no
3643 problems, otherwise return false. */
3645 static bool
3646 see_propagate_extensions_to_uses (void)
3648 unsigned int i = 0;
3649 int num_relevant_uses;
3650 int num_relevant_defs;
3652 if (dump_file)
3653 fprintf (dump_file,
3654 "* Phase 1: Propagate extensions to uses. *\n");
3656 /* Update the relevancy of references using the DF object. */
3657 see_update_defs_relevancy ();
3658 see_update_uses_relevancy ();
3660 /* Produce the webs and update the extra_info of the root.
3661 In general, a web is relevant if all its definitions and uses are relevant
3662 and there is at least one definition that was marked as SIGN_EXTENDED_DEF
3663 or ZERO_EXTENDED_DEF. */
3664 for (i = 0; i < uses_num; i++)
3666 union_defs (df, DF_USES_GET (df, i), def_entry, use_entry,
3667 see_update_leader_extra_info);
3670 /* Generate use extensions for references and insert these
3671 references to see_bb_splay_ar data structure. */
3672 num_relevant_uses = see_handle_relevant_uses ();
3674 if (num_relevant_uses < 0)
3675 return false;
3677 /* Store the def extensions in their references structures and insert these
3678 references to see_bb_splay_ar data structure. */
3679 num_relevant_defs = see_handle_relevant_defs ();
3681 if (num_relevant_defs < 0)
3682 return false;
3684 return ((num_relevant_uses > 0) || (num_relevant_defs > 0));
3688 /* Main entry point for the sign extension elimination optimization. */
3690 void
3691 see_main (void)
3693 bool cont = false;
3694 int i = 0;
3696 /* Initialize global data structures. */
3697 see_initialize_data_structures ();
3699 /* Phase 1: Propagate extensions to uses. */
3700 cont = see_propagate_extensions_to_uses ();
3702 if (cont)
3704 init_recog ();
3706 /* Phase 2: Merge and eliminate locally redundant extensions. */
3707 see_merge_and_eliminate_extensions ();
3709 /* Phase 3: Eliminate globally redundant extensions. */
3710 see_execute_LCM ();
3712 /* Phase 4: Commit changes to the insn stream. */
3713 see_commit_changes ();
3715 if (dump_file)
3717 /* For debug purpose only. */
3718 fprintf (dump_file, "see_pre_extension_hash:\n");
3719 htab_traverse (see_pre_extension_hash, see_print_pre_extension_expr,
3720 NULL);
3722 for (i = 0; i < last_bb; i++)
3724 if (see_bb_hash_ar[i])
3725 /* Traverse over all the references in the basic block in
3726 forward order. */
3728 fprintf (dump_file,
3729 "Searching register properties in bb %d\n", i);
3730 htab_traverse (see_bb_hash_ar[i],
3731 see_print_register_properties, NULL);
3737 /* Free global data structures. */
3738 see_free_data_structures ();
3742 static bool
3743 gate_handle_see (void)
3745 return ((optimize > 1) && flag_see);
3748 static unsigned int
3749 rest_of_handle_see (void)
3751 int no_new_pseudos_bcp = no_new_pseudos;
3753 no_new_pseudos = 0;
3754 see_main ();
3755 no_new_pseudos = no_new_pseudos_bcp;
3757 delete_trivially_dead_insns (get_insns (), max_reg_num ());
3758 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
3759 (PROP_DEATH_NOTES));
3760 cleanup_cfg (CLEANUP_EXPENSIVE);
3761 reg_scan (get_insns (), max_reg_num ());
3763 return 0;
3766 struct tree_opt_pass pass_see =
3768 "see", /* name */
3769 gate_handle_see, /* gate */
3770 rest_of_handle_see, /* execute */
3771 NULL, /* sub */
3772 NULL, /* next */
3773 0, /* static_pass_number */
3774 TV_SEE, /* tv_id */
3775 0, /* properties_required */
3776 0, /* properties_provided */
3777 0, /* properties_destroyed */
3778 0, /* todo_flags_start */
3779 TODO_dump_func, /* todo_flags_finish */
3780 'u' /* letter */