Add files that I missed when importing NaCl changes earlier
[gcc/nacl-gcc.git] / gcc / see.c
blob2bfd9fadc3c967b3b9f8e99d787f1898ee6198f0
1 /* Sign extension elimination optimization for GNU compiler.
2 Copyright (C) 2005, 2007 Free Software Foundation, Inc.
3 Contributed by Leehod Baruch <leehod@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 -Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>.
21 Problem description:
22 --------------------
23 In order to support 32bit computations on a 64bit machine, sign
24 extension instructions are generated to ensure the correctness of
25 the computation.
26 A possible policy (as currently implemented) is to generate a sign
27 extension right after each 32bit computation.
28 Depending on the instruction set of the architecture, some of these
29 sign extension instructions may be redundant.
30 There are two cases in which the extension may be redundant:
32 Case1:
33 The instruction that uses the 64bit operands that are sign
34 extended has a dual mode that works with 32bit operands.
35 For example:
37 int32 a, b;
39 a = .... --> a = ....
40 a = sign extend a -->
41 b = .... --> b = ....
42 b = sign extend a -->
43 -->
44 cmpd a, b --> cmpw a, b //half word compare
46 Case2:
47 The instruction that defines the 64bit operand (which is later sign
48 extended) has a dual mode that defines and sign-extends simultaneously
49 a 32bit operand. For example:
51 int32 a;
53 ld a --> lwa a // load half word and sign extend
54 a = sign extend a -->
55 -->
56 return a --> return a
59 General idea for solution:
60 --------------------------
61 First, try to merge the sign extension with the instruction that
62 defines the source of the extension and (separately) with the
63 instructions that uses the extended result. By doing this, both cases
64 of redundancies (as described above) will be eliminated.
66 Then, use partial redundancy elimination to place the non redundant
67 ones at optimal placements.
70 Implementation by example:
71 --------------------------
72 Note: The instruction stream is not changed till the last phase.
74 Phase 0: Initial code, as currently generated by gcc.
76 def1 def3
77 se1 def2 se3
78 | \ | / |
79 | \ | / |
80 | \ | / |
81 | \ | / |
82 | \ | / |
83 | \|/ |
84 use1 use2 use3
85 use4
86 def1 + se1:
87 set ((reg:SI 10) (..def1rhs..))
88 set ((reg:DI 100) (sign_extend:DI (reg:SI 10)))
90 def2:
91 set ((reg:DI 100) (const_int 7))
93 def3 + se3:
94 set ((reg:SI 20) (..def3rhs..))
95 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
97 use1:
98 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
100 use2, use3, use4:
101 set ((...) (reg:DI 100))
103 Phase 1: Propagate extensions to uses.
105 def1 def3
106 se1 def2 se3
107 | \ | / |
108 | \ | / |
109 | \ | / |
110 | \ | / |
111 | \ | / |
112 | \|/ |
113 se se se
114 use1 use2 use3
116 use4
118 From here, all of the subregs are lowpart !
120 def1, def2, def3: No change.
122 use1:
123 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
124 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
126 use2, use3, use4:
127 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
128 set ((...) (reg:DI 100))
131 Phase 2: Merge and eliminate locally redundant extensions.
134 *def1 def2 *def3
135 [se removed] se se3
136 | \ | / |
137 | \ | / |
138 | \ | / |
139 | \ | / |
140 | \ | / |
141 | \|/ |
142 [se removed] se se
143 *use1 use2 use3
144 [se removed]
145 use4
147 The instructions that were changed at this phase are marked with
148 asterisk.
150 *def1: Merge failed.
151 Remove the sign extension instruction, modify def1 and
152 insert a move instruction to assure to correctness of the code.
153 set ((subreg:SI (reg:DI 100)) (..def1rhs..))
154 set ((reg:SI 10) (subreg:SI (reg:DI 100)))
156 def2 + se: There is no need for merge.
157 Def2 is not changed but a sign extension instruction is
158 created.
159 set ((reg:DI 100) (const_int 7))
160 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
162 *def3 + se3: Merge succeeded.
163 set ((reg:DI 100) (sign_extend:DI (..def3rhs..)))
164 set ((reg:SI 20) (reg:DI 100))
165 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
166 (The extension instruction is the original one).
168 *use1: Merge succeeded. Remove the sign extension instruction.
169 set ((reg:CC...)
170 (compare:CC (subreg:SI (reg:DI 100)) (...)))
172 use2, use3: Merge failed. No change.
174 use4: The extension is locally redundant, therefore it is eliminated
175 at this point.
178 Phase 3: Eliminate globally redundant extensions.
180 Following the LCM output:
182 def1 def2 def3
183 se se3
184 | \ | / |
185 | \ | / |
186 | se | / |
187 | \ | / |
188 | \ | / |
189 | \|/ |
190 [ses removed]
191 use1 use2 use3
192 use4
195 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
197 se3:
198 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
201 Phase 4: Commit changes to the insn stream.
204 def1 def3 *def1 def2 *def3
205 se1 def2 se3 [se removed] [se removed]
206 | \ | / | | \ | / |
207 | \ | / | ------> | \ | / |
208 | \ | / | ------> | se | / |
209 | \ | / | | \ | / |
210 | \ | / | | \ | / |
211 | \|/ | | \|/ |
212 use1 use2 use3 *use1 use2 use3
213 use4 use4
215 The instructions that were changed during the whole optimization are
216 marked with asterisk.
218 The result:
220 def1 + se1:
221 [ set ((reg:SI 10) (..def1rhs..)) ] - Deleted
222 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 10))) ] - Deleted
223 set ((subreg:SI (reg:DI 100)) (..def3rhs..)) - Inserted
224 set ((reg:SI 10) (subreg:SI (reg:DI 100))) - Inserted
226 def2:
227 set ((reg:DI 100) (const_int 7)) - No change
229 def3 + se3:
230 [ set ((reg:SI 20) (..def3rhs..)) ] - Deleted
231 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) ] - Deleted
232 set ((reg:DI 100) (sign_extend:DI (..def3rhs..))) - Inserted
233 set ((reg:SI 20) (reg:DI 100)) - Inserted
235 use1:
236 [ set ((reg:CC...) (compare:CC (reg:DI 100) (...))) ] - Deleted
237 set ((reg:CC...) - Inserted
238 (compare:CC (subreg:SI (reg:DI 100)) (...)))
240 use2, use3, use4:
241 set ((...) (reg:DI 100)) - No change
243 se: - Inserted
244 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
246 Note: Most of the simple move instructions that were inserted will be
247 trivially dead and therefore eliminated.
249 The implementation outline:
250 ---------------------------
251 Some definitions:
252 A web is RELEVANT if at the end of phase 1, his leader's
253 relevancy is {ZERO, SIGN}_EXTENDED_DEF. The source_mode of
254 the web is the source_mode of his leader.
255 A definition is a candidate for the optimization if it is part
256 of a RELEVANT web and his local source_mode is not narrower
257 then the source_mode of its web.
258 A use is a candidate for the optimization if it is part of a
259 RELEVANT web.
260 A simple explicit extension is a single set instruction that
261 extends a register (or a subregister) to a register (or
262 subregister).
263 A complex explicit extension is an explicit extension instruction
264 that is not simple.
265 A def extension is a simple explicit extension that is
266 also a candidate for the optimization. This extension is part
267 of the instruction stream, it is not generated by this
268 optimization.
269 A use extension is a simple explicit extension that is generated
270 and stored for candidate use during this optimization. It is
271 not emitted to the instruction stream till the last phase of
272 the optimization.
273 A reference is an instruction that satisfy at least on of these
274 criteria:
275 - It contains a definition with EXTENDED_DEF relevancy in a RELEVANT web.
276 - It is followed by a def extension.
277 - It contains a candidate use.
279 Phase 1: Propagate extensions to uses.
280 In this phase, we find candidate extensions for the optimization
281 and we generate (but not emit) proper extensions "right before the
282 uses".
284 a. Build a DF object.
285 b. Traverse over all the instructions that contains a definition
286 and set their local relevancy and local source_mode like this:
287 - If the instruction is a simple explicit extension instruction,
288 mark it as {ZERO, SIGN}_EXTENDED_DEF according to the extension
289 type and mark its source_mode to be the mode of the quantity
290 that is been extended.
291 - Otherwise, If the instruction has an implicit extension,
292 which means that its high part is an extension of its low part,
293 or if it is a complicated explicit extension, mark it as
294 EXTENDED_DEF and set its source_mode to be the narrowest
295 mode that is been extended in the instruction.
296 c. Traverse over all the instructions that contains a use and set
297 their local relevancy to RELEVANT_USE (except for few corner
298 cases).
299 d. Produce the web. During union of two entries, update the
300 relevancy and source_mode of the leader. There are two major
301 guide lines for this update:
302 - If one of the entries is NOT_RELEVANT, mark the leader
303 NOT_RELEVANT.
304 - If one is ZERO_EXTENDED_DEF and the other is SIGN_EXTENDED_DEF
305 (or vice versa) mark the leader as NOT_RELEVANT. We don't
306 handle this kind of mixed webs.
307 (For more details about this update process,
308 see see_update_leader_extra_info ()).
309 e. Generate uses extensions according to the relevancy and
310 source_mode of the webs.
312 Phase 2: Merge and eliminate locally redundant extensions.
313 In this phase, we try to merge def extensions and use
314 extensions with their references, and eliminate redundant extensions
315 in the same basic block.
317 Traverse over all the references. Do this in basic block number and
318 luid number forward order.
319 For each reference do:
320 a. Peephole optimization - try to merge it with all its
321 def extensions and use extensions in the following
322 order:
323 - Try to merge only the def extensions, one by one.
324 - Try to merge only the use extensions, one by one.
325 - Try to merge any couple of use extensions simultaneously.
326 - Try to merge any def extension with one or two uses
327 extensions simultaneously.
328 b. Handle each EXTENDED_DEF in it as if it was already merged with
329 an extension.
331 During the merge process we save the following data for each
332 register in each basic block:
333 a. The first instruction that defines the register in the basic
334 block.
335 b. The last instruction that defines the register in the basic
336 block.
337 c. The first extension of this register before the first
338 instruction that defines it in the basic block.
339 c. The first extension of this register after the last
340 instruction that defines it in the basic block.
341 This data will help us eliminate (or more precisely, not generate)
342 locally redundant extensions, and will be useful in the next stage.
344 While merging extensions with their reference there are 4 possible
345 situations:
346 a. A use extension was merged with the reference:
347 Delete the extension instruction and save the merged reference
348 for phase 4. (For details, see see_use_extension_merged ())
349 b. A use extension failed to be merged with the reference:
350 If there is already such an extension in the same basic block
351 and it is not dead at this point, delete the unmerged extension
352 (it is locally redundant), otherwise properly update the above
353 basic block data.
354 (For details, see see_merge_one_use_extension ())
355 c. A def extension was merged with the reference:
356 Mark this extension as a merged_def extension and properly
357 update the above basic block data.
358 (For details, see see_merge_one_def_extension ())
359 d. A def extension failed to be merged with the reference:
360 Replace the definition of the NARROWmode register in the
361 reference with the proper subreg of WIDEmode register and save
362 the result as a merged reference. Also, properly update the
363 the above basic block data.
364 (For details, see see_def_extension_not_merged ())
366 Phase 3: Eliminate globally redundant extensions.
367 In this phase, we set the bit vectors input of the edge based LCM
368 using the recorded data on the registers in each basic block.
369 We also save pointers for all the anticipatable and available
370 occurrences of the relevant extensions. Then we run the LCM.
372 a. Initialize the comp, antloc, kill bit vectors to zero and the
373 transp bit vector to ones.
375 b. Traverse over all the references. Do this in basic block number
376 and luid number forward order. For each reference:
377 - Go over all its use extensions. For each such extension -
378 If it is not dead from the beginning of the basic block SET
379 the antloc bit of the current extension in the current
380 basic block bits.
381 If it is not dead till the end of the basic block SET the
382 comp bit of the current extension in the current basic
383 block bits.
384 - Go over all its def extensions that were merged with
385 it. For each such extension -
386 If it is not dead till the end of the basic block SET the
387 comp bit of the current extension in the current basic
388 block bits.
389 RESET the proper transp and kill bits.
390 - Go over all its def extensions that were not merged
391 with it. For each such extension -
392 RESET the transp bit and SET the kill bit of the current
393 extension in the current basic block bits.
395 c. Run the edge based LCM.
397 Phase 4: Commit changes to the insn stream.
398 This is the only phase that actually changes the instruction stream.
399 Up to this point the optimization could be aborted at any time.
400 Here we insert extensions at their best placements and delete the
401 redundant ones according to the output of the LCM. We also replace
402 some of the instructions according to the second phase merges results.
404 a. Use the pre_delete_map (from the output of the LCM) in order to
405 delete redundant extensions. This will prevent them from been
406 emitted in the first place.
408 b. Insert extensions on edges where needed according to
409 pre_insert_map and edge_list (from the output of the LCM).
411 c. For each reference do-
412 - Emit all the uses extensions that were not deleted until now,
413 right before the reference.
414 - Delete all the merged and unmerged def extensions from
415 the instruction stream.
416 - Replace the reference with the merged one, if exist.
418 The implementation consists of four data structures:
419 - Data structure I
420 Purpose: To handle the relevancy of the uses, definitions and webs.
421 Relevant structures: web_entry (from df.h), see_entry_extra_info.
422 Details: This is a disjoint-set data structure. Most of its functions are
423 implemented in web.c. Each definition and use in the code are
424 elements. A web_entry structure is allocated for each element to
425 hold the element's relevancy and source_mode. The union rules are
426 defined in see_update_leader_extra_info ().
427 - Data structure II
428 Purpose: To store references and their extensions (uses and defs)
429 and to enable traverse over these references according to basic
430 block order.
431 Relevant structure: see_ref_s.
432 Details: This data structure consists of an array of splay trees. One splay
433 tree for each basic block. The splay tree nodes are references and
434 the keys are the luids of the references.
435 A see_ref_s structure is allocated for each reference. It holds the
436 reference itself, its def and uses extensions and later the merged
437 version of the reference.
438 Using this data structure we can traverse over all the references of
439 a basic block and their extensions in forward order.
440 - Data structure III.
441 Purpose: To store local properties of registers for each basic block.
442 This data will later help us build the LCM sbitmap_vectors
443 input.
444 Relevant structure: see_register_properties.
445 Details: This data structure consists of an array of hash tables. One hash
446 for each basic block. The hash node are a register properties
447 and the keys are the numbers of the registers.
448 A see_register_properties structure is allocated for each register
449 that we might be interested in its properties.
450 Using this data structure we can easily find the properties of a
451 register in a specific basic block. This is necessary for locally
452 redundancy elimination and for setting up the LCM input.
453 - Data structure IV.
454 Purpose: To store the extensions that are candidate for PRE and their
455 anticipatable and available occurrences.
456 Relevant structure: see_occr, see_pre_extension_expr.
457 Details: This data structure is a hash tables. Its nodes are the extensions
458 that are candidate for PRE.
459 A see_pre_extension_expr structure is allocated for each candidate
460 extension. It holds a copy of the extension and a linked list of all
461 the anticipatable and available occurrences of it.
462 We use this data structure when we read the output of the LCM. */
464 #include "config.h"
465 #include "system.h"
466 #include "coretypes.h"
467 #include "tm.h"
469 #include "obstack.h"
470 #include "rtl.h"
471 #include "output.h"
472 #include "df.h"
473 #include "insn-config.h"
474 #include "recog.h"
475 #include "expr.h"
476 #include "splay-tree.h"
477 #include "hashtab.h"
478 #include "regs.h"
479 #include "timevar.h"
480 #include "tree-pass.h"
482 /* Used to classify defs and uses according to relevancy. */
483 enum entry_type {
484 NOT_RELEVANT,
485 SIGN_EXTENDED_DEF,
486 ZERO_EXTENDED_DEF,
487 EXTENDED_DEF,
488 RELEVANT_USE
491 /* Used to classify extensions in relevant webs. */
492 enum extension_type {
493 DEF_EXTENSION,
494 EXPLICIT_DEF_EXTENSION,
495 IMPLICIT_DEF_EXTENSION,
496 USE_EXTENSION
499 /* Global data structures and flags. */
501 /* This structure will be assigned for each web_entry structure (defined
502 in df.h). It is placed in the extra_info field of a web_entry and holds the
503 relevancy and source mode of the web_entry. */
505 struct see_entry_extra_info
507 /* The relevancy of the ref. */
508 enum entry_type relevancy;
509 /* The relevancy of the ref.
510 This field is updated only once - when this structure is created. */
511 enum entry_type local_relevancy;
512 /* The source register mode. */
513 enum machine_mode source_mode;
514 /* This field is used only if the relevancy is ZERO/SIGN_EXTENDED_DEF.
515 It is updated only once when this structure is created. */
516 enum machine_mode local_source_mode;
517 /* This field is used only if the relevancy is EXTENDED_DEF.
518 It holds the narrowest mode that is sign extended. */
519 enum machine_mode source_mode_signed;
520 /* This field is used only if the relevancy is EXTENDED_DEF.
521 It holds the narrowest mode that is zero extended. */
522 enum machine_mode source_mode_unsigned;
525 /* There is one such structure for every reference. It stores the reference
526 itself as well as its extensions (uses and definitions).
527 Used as the value in splay_tree see_bb_splay_ar[]. */
528 struct see_ref_s
530 /* The luid of the insn. */
531 unsigned int luid;
532 /* The insn of the ref. */
533 rtx insn;
534 /* The merged insn that was formed from the reference's insn and extensions.
535 If all merges failed, it remains NULL. */
536 rtx merged_insn;
537 /* The def extensions of the reference that were not merged with
538 it. */
539 htab_t unmerged_def_se_hash;
540 /* The def extensions of the reference that were merged with
541 it. Implicit extensions of the reference will be stored here too. */
542 htab_t merged_def_se_hash;
543 /* The uses extensions of reference. */
544 htab_t use_se_hash;
547 /* There is one such structure for every relevant extended register in a
548 specific basic block. This data will help us build the LCM sbitmap_vectors
549 input. */
550 struct see_register_properties
552 /* The register number. */
553 unsigned int regno;
554 /* The last luid of the reference that defines this register in this basic
555 block. */
556 int last_def;
557 /* The luid of the reference that has the first extension of this register
558 that appears before any definition in this basic block. */
559 int first_se_before_any_def;
560 /* The luid of the reference that has the first extension of this register
561 that appears after the last definition in this basic block. */
562 int first_se_after_last_def;
565 /* Occurrence of an expression.
566 There must be at most one available occurrence and at most one anticipatable
567 occurrence per basic block. */
568 struct see_occr
570 /* Next occurrence of this expression. */
571 struct see_occr *next;
572 /* The insn that computes the expression. */
573 rtx insn;
574 int block_num;
577 /* There is one such structure for every relevant extension expression.
578 It holds a copy of this extension instruction as well as a linked lists of
579 pointers to all the antic and avail occurrences of it. */
580 struct see_pre_extension_expr
582 /* A copy of the extension instruction. */
583 rtx se_insn;
584 /* Index in the available expression bitmaps. */
585 int bitmap_index;
586 /* List of anticipatable occurrences in basic blocks in the function.
587 An "anticipatable occurrence" is the first occurrence in the basic block,
588 the operands are not modified in the basic block prior to the occurrence
589 and the output is not used between the start of the block and the
590 occurrence. */
591 struct see_occr *antic_occr;
592 /* List of available occurrence in basic blocks in the function.
593 An "available occurrence" is the last occurrence in the basic block and
594 the operands are not modified by following statements in the basic block
595 [including this insn]. */
596 struct see_occr *avail_occr;
599 /* Helper structure for the note_uses and see_replace_src functions. */
600 struct see_replace_data
602 rtx from;
603 rtx to;
606 /* Helper structure for the note_uses and see_mentioned_reg functions. */
607 struct see_mentioned_reg_data
609 rtx reg;
610 bool mentioned;
613 /* A data flow object that will be created once and used throughout the
614 optimization. */
615 static struct df *df = NULL;
616 /* An array of web_entries. The i'th definition in the df object is associated
617 with def_entry[i] */
618 static struct web_entry *def_entry = NULL;
619 /* An array of web_entries. The i'th use in the df object is associated with
620 use_entry[i] */
621 static struct web_entry *use_entry = NULL;
622 /* Array of splay_trees.
623 see_bb_splay_ar[i] refers to the splay tree of the i'th basic block.
624 The splay tree will hold see_ref_s structures. The key is the luid
625 of the insn. This way we can traverse over the references of each basic
626 block in forward or backward order. */
627 static splay_tree *see_bb_splay_ar = NULL;
628 /* Array of hashes.
629 see_bb_hash_ar[i] refers to the hash of the i'th basic block.
630 The hash will hold see_register_properties structure. The key is regno. */
631 static htab_t *see_bb_hash_ar = NULL;
632 /* Hash table that holds a copy of all the extensions. The key is the right
633 hand side of the se_insn field. */
634 static htab_t see_pre_extension_hash = NULL;
636 /* Local LCM properties of expressions. */
637 /* Nonzero for expressions that are transparent in the block. */
638 static sbitmap *transp = NULL;
639 /* Nonzero for expressions that are computed (available) in the block. */
640 static sbitmap *comp = NULL;
641 /* Nonzero for expressions that are locally anticipatable in the block. */
642 static sbitmap *antloc = NULL;
643 /* Nonzero for expressions that are locally killed in the block. */
644 static sbitmap *ae_kill = NULL;
645 /* Nonzero for expressions which should be inserted on a specific edge. */
646 static sbitmap *pre_insert_map = NULL;
647 /* Nonzero for expressions which should be deleted in a specific block. */
648 static sbitmap *pre_delete_map = NULL;
649 /* Contains the edge_list returned by pre_edge_lcm. */
650 static struct edge_list *edge_list = NULL;
651 /* Records the last basic block at the beginning of the optimization. */
652 static int last_bb;
653 /* Records the number of uses at the beginning of the optimization. */
654 static unsigned int uses_num;
655 /* Records the number of definitions at the beginning of the optimization. */
656 static unsigned int defs_num;
658 #define ENTRY_EI(ENTRY) ((struct see_entry_extra_info *) (ENTRY)->extra_info)
660 /* Functions implementation. */
662 /* Verifies that EXTENSION's pattern is this:
664 set (reg/subreg reg1) (sign/zero_extend:WIDEmode (reg/subreg reg2))
666 If it doesn't have the expected pattern return NULL.
667 Otherwise, if RETURN_DEST_REG is set, return reg1 else return reg2. */
669 static rtx
670 see_get_extension_reg (rtx extension, bool return_dest_reg)
672 rtx set, rhs, lhs;
673 rtx reg1 = NULL;
674 rtx reg2 = NULL;
676 /* Parallel pattern for extension not supported for the moment. */
677 if (GET_CODE (PATTERN (extension)) == PARALLEL)
678 return NULL;
680 set = single_set (extension);
681 if (!set)
682 return NULL;
683 lhs = SET_DEST (set);
684 rhs = SET_SRC (set);
686 if (REG_P (lhs))
687 reg1 = lhs;
688 else if (REG_P (SUBREG_REG (lhs)))
689 reg1 = SUBREG_REG (lhs);
690 else
691 return NULL;
693 if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND)
694 return NULL;
696 rhs = XEXP (rhs, 0);
697 if (REG_P (rhs))
698 reg2 = rhs;
699 else if (REG_P (SUBREG_REG (rhs)))
700 reg2 = SUBREG_REG (rhs);
701 else
702 return NULL;
704 if (return_dest_reg)
705 return reg1;
706 return reg2;
709 /* Verifies that EXTENSION's pattern is this:
711 set (reg/subreg reg1) (sign/zero_extend: (...expr...)
713 If it doesn't have the expected pattern return UNKNOWN.
714 Otherwise, set SOURCE_MODE to be the mode of the extended expr and return
715 the rtx code of the extension. */
717 static enum rtx_code
718 see_get_extension_data (rtx extension, enum machine_mode *source_mode)
720 rtx rhs, lhs, set;
722 if (!extension || !INSN_P (extension))
723 return UNKNOWN;
725 /* Parallel pattern for extension not supported for the moment. */
726 if (GET_CODE (PATTERN (extension)) == PARALLEL)
727 return NOT_RELEVANT;
729 set = single_set (extension);
730 if (!set)
731 return NOT_RELEVANT;
732 rhs = SET_SRC (set);
733 lhs = SET_DEST (set);
735 /* Don't handle extensions to something other then register or
736 subregister. */
737 if (!REG_P (lhs) && !SUBREG_REG (lhs))
738 return UNKNOWN;
740 if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND)
741 return UNKNOWN;
743 if (!REG_P (XEXP (rhs, 0))
744 && !(GET_CODE (XEXP (rhs, 0)) == SUBREG
745 && REG_P (SUBREG_REG (XEXP (rhs, 0)))))
746 return UNKNOWN;
748 *source_mode = GET_MODE (XEXP (rhs, 0));
750 if (GET_CODE (rhs) == SIGN_EXTEND)
751 return SIGN_EXTEND;
752 return ZERO_EXTEND;
756 /* Generate instruction with the pattern:
757 set ((reg r) (sign/zero_extend (subreg:mode (reg r))))
758 (the register r on both sides of the set is the same register).
759 And recognize it.
760 If the recognition failed, this is very bad, return NULL (This will abort
761 the entire optimization).
762 Otherwise, return the generated instruction. */
764 static rtx
765 see_gen_normalized_extension (rtx reg, enum rtx_code extension_code,
766 enum machine_mode mode)
768 rtx subreg, insn;
769 rtx extension = NULL;
771 if (!reg
772 || !REG_P (reg)
773 || (extension_code != SIGN_EXTEND && extension_code != ZERO_EXTEND))
774 return NULL;
776 subreg = gen_lowpart_SUBREG (mode, reg);
777 if (extension_code == SIGN_EXTEND)
778 extension = gen_rtx_SIGN_EXTEND (GET_MODE (reg), subreg);
779 else
780 extension = gen_rtx_ZERO_EXTEND (GET_MODE (reg), subreg);
782 start_sequence ();
783 emit_insn (gen_rtx_SET (VOIDmode, reg, extension));
784 insn = get_insns ();
785 end_sequence ();
787 if (insn_invalid_p (insn))
788 /* Recognition failed, this is very bad for this optimization.
789 Abort the optimization. */
790 return NULL;
791 return insn;
794 /* Hashes and splay_trees related functions implementation. */
796 /* Helper functions for the pre_extension hash.
797 This kind of hash will hold see_pre_extension_expr structures.
799 The key is the right hand side of the se_insn field.
800 Note that the se_insn is an expression that looks like:
802 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
803 (subreg:NARROWmode (reg:WIDEmode r2)))) */
805 /* Return TRUE if P1 has the same value in its rhs as P2.
806 Otherwise, return FALSE.
807 P1 and P2 are see_pre_extension_expr structures. */
809 static int
810 eq_descriptor_pre_extension (const void *p1, const void *p2)
812 const struct see_pre_extension_expr *extension1 = p1;
813 const struct see_pre_extension_expr *extension2 = p2;
814 rtx set1 = single_set (extension1->se_insn);
815 rtx set2 = single_set (extension2->se_insn);
816 rtx rhs1, rhs2;
818 gcc_assert (set1 && set2);
819 rhs1 = SET_SRC (set1);
820 rhs2 = SET_SRC (set2);
822 return rtx_equal_p (rhs1, rhs2);
826 /* P is a see_pre_extension_expr struct, use the RHS of the se_insn field.
827 Note that the RHS is an expression that looks like this:
828 (sign_extend:WIDEmode (subreg:NARROWmode (reg:WIDEmode r))) */
830 static hashval_t
831 hash_descriptor_pre_extension (const void *p)
833 const struct see_pre_extension_expr *extension = p;
834 rtx set = single_set (extension->se_insn);
835 rtx rhs;
837 gcc_assert (set);
838 rhs = SET_SRC (set);
840 return hash_rtx (rhs, GET_MODE (rhs), 0, NULL, 0);
844 /* Free the allocated memory of the current see_pre_extension_expr struct.
846 It frees the two linked list of the occurrences structures. */
848 static void
849 hash_del_pre_extension (void *p)
851 struct see_pre_extension_expr *extension = p;
852 struct see_occr *curr_occr = extension->antic_occr;
853 struct see_occr *next_occr = NULL;
855 /* Free the linked list of the anticipatable occurrences. */
856 while (curr_occr)
858 next_occr = curr_occr->next;
859 free (curr_occr);
860 curr_occr = next_occr;
863 /* Free the linked list of the available occurrences. */
864 curr_occr = extension->avail_occr;
865 while (curr_occr)
867 next_occr = curr_occr->next;
868 free (curr_occr);
869 curr_occr = next_occr;
872 /* Free the see_pre_extension_expr structure itself. */
873 free (extension);
877 /* Helper functions for the register_properties hash.
878 This kind of hash will hold see_register_properties structures.
880 The value of the key is the regno field of the structure. */
882 /* Return TRUE if P1 has the same value in the regno field as P2.
883 Otherwise, return FALSE.
884 Where P1 and P2 are see_register_properties structures. */
886 static int
887 eq_descriptor_properties (const void *p1, const void *p2)
889 const struct see_register_properties *curr_prop1 = p1;
890 const struct see_register_properties *curr_prop2 = p2;
892 return curr_prop1->regno == curr_prop2->regno;
896 /* P is a see_register_properties struct, use the register number in the
897 regno field. */
899 static hashval_t
900 hash_descriptor_properties (const void *p)
902 const struct see_register_properties *curr_prop = p;
903 return curr_prop->regno;
907 /* Free the allocated memory of the current see_register_properties struct. */
908 static void
909 hash_del_properties (void *p)
911 struct see_register_properties *curr_prop = p;
912 free (curr_prop);
916 /* Helper functions for an extension hash.
917 This kind of hash will hold insns that look like:
919 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
920 (subreg:NARROWmode (reg:WIDEmode r2))))
922 set ((reg:WIDEmode r1) (sign_extend:WIDEmode (reg:NARROWmode r2)))
924 The value of the key is (REGNO (reg:WIDEmode r1))
925 It is possible to search this hash in two ways:
926 1. By a register rtx. The Value that is been compared to the keys is the
927 REGNO of it.
928 2. By an insn with the above pattern. The Value that is been compared to
929 the keys is the REGNO of the reg on the lhs. */
931 /* Return TRUE if P1 has the same value as P2. Otherwise, return FALSE.
932 Where P1 is an insn and P2 is an insn or a register. */
934 static int
935 eq_descriptor_extension (const void *p1, const void *p2)
937 const rtx insn = (rtx) p1;
938 const rtx element = (rtx) p2;
939 rtx set1 = single_set (insn);
940 rtx dest_reg1;
941 rtx set2 = NULL;
942 rtx dest_reg2 = NULL;
944 gcc_assert (set1 && element && (REG_P (element) || INSN_P (element)));
946 dest_reg1 = SET_DEST (set1);
948 if (INSN_P (element))
950 set2 = single_set (element);
951 dest_reg2 = SET_DEST (set2);
953 else
954 dest_reg2 = element;
956 return REGNO (dest_reg1) == REGNO (dest_reg2);
960 /* If P is an insn, use the register number of its lhs
961 otherwise, P is a register, use its number. */
963 static hashval_t
964 hash_descriptor_extension (const void *p)
966 const rtx r = (rtx) p;
967 rtx set, lhs;
969 if (r && REG_P (r))
970 return REGNO (r);
972 gcc_assert (r && INSN_P (r));
973 set = single_set (r);
974 gcc_assert (set);
975 lhs = SET_DEST (set);
976 return REGNO (lhs);
980 /* Helper function for a see_bb_splay_ar[i] splay tree.
981 It frees all the allocated memory of a struct see_ref_s pointer.
983 VALUE is the value of a splay tree node. */
985 static void
986 see_free_ref_s (splay_tree_value value)
988 struct see_ref_s *ref_s = (struct see_ref_s *)value;
990 if (ref_s->unmerged_def_se_hash)
991 htab_delete (ref_s->unmerged_def_se_hash);
992 if (ref_s->merged_def_se_hash)
993 htab_delete (ref_s->merged_def_se_hash);
994 if (ref_s->use_se_hash)
995 htab_delete (ref_s->use_se_hash);
996 free (ref_s);
1000 /* Rest of the implementation. */
1002 /* Search the extension hash for a suitable entry for EXTENSION.
1003 TYPE is the type of EXTENSION (USE_EXTENSION or DEF_EXTENSION).
1005 If TYPE is DEF_EXTENSION we need to normalize EXTENSION before searching the
1006 extension hash.
1008 If a suitable entry was found, return the slot. Otherwise, store EXTENSION
1009 in the hash and return NULL. */
1011 static struct see_pre_extension_expr *
1012 see_seek_pre_extension_expr (rtx extension, enum extension_type type)
1014 struct see_pre_extension_expr **slot_pre_exp, temp_pre_exp;
1015 rtx dest_extension_reg = see_get_extension_reg (extension, 1);
1016 enum rtx_code extension_code;
1017 enum machine_mode source_extension_mode;
1019 if (type == DEF_EXTENSION)
1021 extension_code = see_get_extension_data (extension,
1022 &source_extension_mode);
1023 gcc_assert (extension_code != UNKNOWN);
1024 extension =
1025 see_gen_normalized_extension (dest_extension_reg, extension_code,
1026 source_extension_mode);
1028 temp_pre_exp.se_insn = extension;
1029 slot_pre_exp =
1030 (struct see_pre_extension_expr **) htab_find_slot (see_pre_extension_hash,
1031 &temp_pre_exp, INSERT);
1032 if (*slot_pre_exp == NULL)
1033 /* This is the first time this extension instruction is encountered. Store
1034 it in the hash. */
1036 (*slot_pre_exp) = xmalloc (sizeof (struct see_pre_extension_expr));
1037 (*slot_pre_exp)->se_insn = extension;
1038 (*slot_pre_exp)->bitmap_index =
1039 (htab_elements (see_pre_extension_hash) - 1);
1040 (*slot_pre_exp)->antic_occr = NULL;
1041 (*slot_pre_exp)->avail_occr = NULL;
1042 return NULL;
1044 return *slot_pre_exp;
1048 /* This function defines how to update the extra_info of the web_entry.
1050 FIRST is the pointer of the extra_info of the first web_entry.
1051 SECOND is the pointer of the extra_info of the second web_entry.
1052 The first web_entry will be the predecessor (leader) of the second web_entry
1053 after the union.
1055 Return true if FIRST and SECOND points to the same web entry structure and
1056 nothing is done. Otherwise, return false. */
1058 static bool
1059 see_update_leader_extra_info (struct web_entry *first, struct web_entry *second)
1061 struct see_entry_extra_info *first_ei, *second_ei;
1063 first = unionfind_root (first);
1064 second = unionfind_root (second);
1066 if (unionfind_union (first, second))
1067 return true;
1069 first_ei = (struct see_entry_extra_info *) first->extra_info;
1070 second_ei = (struct see_entry_extra_info *) second->extra_info;
1072 gcc_assert (first_ei && second_ei);
1074 if (second_ei->relevancy == NOT_RELEVANT)
1076 first_ei->relevancy = NOT_RELEVANT;
1077 return false;
1079 switch (first_ei->relevancy)
1081 case NOT_RELEVANT:
1082 break;
1083 case RELEVANT_USE:
1084 switch (second_ei->relevancy)
1086 case RELEVANT_USE:
1087 break;
1088 case EXTENDED_DEF:
1089 first_ei->relevancy = second_ei->relevancy;
1090 first_ei->source_mode_signed = second_ei->source_mode_signed;
1091 first_ei->source_mode_unsigned = second_ei->source_mode_unsigned;
1092 break;
1093 case SIGN_EXTENDED_DEF:
1094 case ZERO_EXTENDED_DEF:
1095 first_ei->relevancy = second_ei->relevancy;
1096 first_ei->source_mode = second_ei->source_mode;
1097 break;
1098 default:
1099 gcc_unreachable ();
1101 break;
1102 case SIGN_EXTENDED_DEF:
1103 switch (second_ei->relevancy)
1105 case SIGN_EXTENDED_DEF:
1106 /* The mode of the root should be the wider one in this case. */
1107 first_ei->source_mode =
1108 (first_ei->source_mode > second_ei->source_mode) ?
1109 first_ei->source_mode : second_ei->source_mode;
1110 break;
1111 case RELEVANT_USE:
1112 break;
1113 case ZERO_EXTENDED_DEF:
1114 /* Don't mix webs with zero extend and sign extend. */
1115 first_ei->relevancy = NOT_RELEVANT;
1116 break;
1117 case EXTENDED_DEF:
1118 if (second_ei->source_mode_signed == MAX_MACHINE_MODE)
1119 first_ei->relevancy = NOT_RELEVANT;
1120 else
1121 /* The mode of the root should be the wider one in this case. */
1122 first_ei->source_mode =
1123 (first_ei->source_mode > second_ei->source_mode_signed) ?
1124 first_ei->source_mode : second_ei->source_mode_signed;
1125 break;
1126 default:
1127 gcc_unreachable ();
1129 break;
1130 /* This case is similar to the previous one, with little changes. */
1131 case ZERO_EXTENDED_DEF:
1132 switch (second_ei->relevancy)
1134 case SIGN_EXTENDED_DEF:
1135 /* Don't mix webs with zero extend and sign extend. */
1136 first_ei->relevancy = NOT_RELEVANT;
1137 break;
1138 case RELEVANT_USE:
1139 break;
1140 case ZERO_EXTENDED_DEF:
1141 /* The mode of the root should be the wider one in this case. */
1142 first_ei->source_mode =
1143 (first_ei->source_mode > second_ei->source_mode) ?
1144 first_ei->source_mode : second_ei->source_mode;
1145 break;
1146 case EXTENDED_DEF:
1147 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE)
1148 first_ei->relevancy = NOT_RELEVANT;
1149 else
1150 /* The mode of the root should be the wider one in this case. */
1151 first_ei->source_mode =
1152 (first_ei->source_mode > second_ei->source_mode_unsigned) ?
1153 first_ei->source_mode : second_ei->source_mode_unsigned;
1154 break;
1155 default:
1156 gcc_unreachable ();
1158 break;
1159 case EXTENDED_DEF:
1160 if (first_ei->source_mode_signed != MAX_MACHINE_MODE
1161 && first_ei->source_mode_unsigned != MAX_MACHINE_MODE)
1163 switch (second_ei->relevancy)
1165 case SIGN_EXTENDED_DEF:
1166 first_ei->relevancy = SIGN_EXTENDED_DEF;
1167 first_ei->source_mode =
1168 (first_ei->source_mode_signed > second_ei->source_mode) ?
1169 first_ei->source_mode_signed : second_ei->source_mode;
1170 break;
1171 case RELEVANT_USE:
1172 break;
1173 case ZERO_EXTENDED_DEF:
1174 first_ei->relevancy = ZERO_EXTENDED_DEF;
1175 first_ei->source_mode =
1176 (first_ei->source_mode_unsigned > second_ei->source_mode) ?
1177 first_ei->source_mode_unsigned : second_ei->source_mode;
1178 break;
1179 case EXTENDED_DEF:
1180 if (second_ei->source_mode_unsigned != MAX_MACHINE_MODE)
1181 first_ei->source_mode_unsigned =
1182 (first_ei->source_mode_unsigned >
1183 second_ei->source_mode_unsigned) ?
1184 first_ei->source_mode_unsigned :
1185 second_ei->source_mode_unsigned;
1186 if (second_ei->source_mode_signed != MAX_MACHINE_MODE)
1187 first_ei->source_mode_signed =
1188 (first_ei->source_mode_signed >
1189 second_ei->source_mode_signed) ?
1190 first_ei->source_mode_signed : second_ei->source_mode_signed;
1191 break;
1192 default:
1193 gcc_unreachable ();
1196 else if (first_ei->source_mode_signed == MAX_MACHINE_MODE)
1198 gcc_assert (first_ei->source_mode_unsigned != MAX_MACHINE_MODE);
1199 switch (second_ei->relevancy)
1201 case SIGN_EXTENDED_DEF:
1202 first_ei->relevancy = NOT_RELEVANT;
1203 break;
1204 case RELEVANT_USE:
1205 break;
1206 case ZERO_EXTENDED_DEF:
1207 first_ei->relevancy = ZERO_EXTENDED_DEF;
1208 first_ei->source_mode =
1209 (first_ei->source_mode_unsigned > second_ei->source_mode) ?
1210 first_ei->source_mode_unsigned : second_ei->source_mode;
1211 break;
1212 case EXTENDED_DEF:
1213 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE)
1214 first_ei->relevancy = NOT_RELEVANT;
1215 else
1216 first_ei->source_mode_unsigned =
1217 (first_ei->source_mode_unsigned >
1218 second_ei->source_mode_unsigned) ?
1219 first_ei->source_mode_unsigned :
1220 second_ei->source_mode_unsigned;
1221 break;
1222 default:
1223 gcc_unreachable ();
1226 else
1228 gcc_assert (first_ei->source_mode_unsigned == MAX_MACHINE_MODE);
1229 gcc_assert (first_ei->source_mode_signed != MAX_MACHINE_MODE);
1230 switch (second_ei->relevancy)
1232 case SIGN_EXTENDED_DEF:
1233 first_ei->relevancy = SIGN_EXTENDED_DEF;
1234 first_ei->source_mode =
1235 (first_ei->source_mode_signed > second_ei->source_mode) ?
1236 first_ei->source_mode_signed : second_ei->source_mode;
1237 break;
1238 case RELEVANT_USE:
1239 break;
1240 case ZERO_EXTENDED_DEF:
1241 first_ei->relevancy = NOT_RELEVANT;
1242 break;
1243 case EXTENDED_DEF:
1244 if (second_ei->source_mode_signed == MAX_MACHINE_MODE)
1245 first_ei->relevancy = NOT_RELEVANT;
1246 else
1247 first_ei->source_mode_signed =
1248 (first_ei->source_mode_signed >
1249 second_ei->source_mode_signed) ?
1250 first_ei->source_mode_signed : second_ei->source_mode_signed;
1251 break;
1252 default:
1253 gcc_unreachable ();
1256 break;
1257 default:
1258 /* Unknown patern type. */
1259 gcc_unreachable ();
1262 return false;
1266 /* Free global data structures. */
1268 static void
1269 see_free_data_structures (void)
1271 int i;
1272 unsigned int j;
1274 /* Free the bitmap vectors. */
1275 if (transp)
1277 sbitmap_vector_free (transp);
1278 transp = NULL;
1279 sbitmap_vector_free (comp);
1280 comp = NULL;
1281 sbitmap_vector_free (antloc);
1282 antloc = NULL;
1283 sbitmap_vector_free (ae_kill);
1284 ae_kill = NULL;
1286 if (pre_insert_map)
1288 sbitmap_vector_free (pre_insert_map);
1289 pre_insert_map = NULL;
1291 if (pre_delete_map)
1293 sbitmap_vector_free (pre_delete_map);
1294 pre_delete_map = NULL;
1296 if (edge_list)
1298 free_edge_list (edge_list);
1299 edge_list = NULL;
1302 /* Free the extension hash. */
1303 htab_delete (see_pre_extension_hash);
1305 /* Free the array of hashes. */
1306 for (i = 0; i < last_bb; i++)
1307 if (see_bb_hash_ar[i])
1308 htab_delete (see_bb_hash_ar[i]);
1309 free (see_bb_hash_ar);
1311 /* Free the array of splay trees. */
1312 for (i = 0; i < last_bb; i++)
1313 if (see_bb_splay_ar[i])
1314 splay_tree_delete (see_bb_splay_ar[i]);
1315 free (see_bb_splay_ar);
1317 /* Free the array of web entries and their extra info field. */
1318 for (j = 0; j < defs_num; j++)
1319 free (def_entry[j].extra_info);
1320 free (def_entry);
1321 for (j = 0; j < uses_num; j++)
1322 free (use_entry[j].extra_info);
1323 free (use_entry);
1327 /* Initialize global data structures and variables. */
1329 static void
1330 see_initialize_data_structures (void)
1332 /* Build the df object. */
1333 df = df_init (DF_HARD_REGS | DF_EQUIV_NOTES | DF_SUBREGS);
1334 df_rd_add_problem (df, 0);
1335 df_chain_add_problem (df, DF_DU_CHAIN | DF_UD_CHAIN);
1336 df_analyze (df);
1338 if (dump_file)
1339 df_dump (df, dump_file);
1341 /* Record the last basic block at the beginning of the optimization. */
1342 last_bb = last_basic_block;
1343 /* Record the number of uses at the beginning of the optimization. */
1344 uses_num = DF_USES_SIZE (df);
1345 /* Record the number of definitions at the beginning of the optimization. */
1346 defs_num = DF_DEFS_SIZE (df);
1348 /* Allocate web entries array for the union-find data structure. */
1349 def_entry = xcalloc (defs_num, sizeof (struct web_entry));
1350 use_entry = xcalloc (uses_num, sizeof (struct web_entry));
1352 /* Allocate an array of splay trees.
1353 One splay tree for each basic block. */
1354 see_bb_splay_ar = xcalloc (last_bb, sizeof (splay_tree));
1356 /* Allocate an array of hashes.
1357 One hash for each basic block. */
1358 see_bb_hash_ar = xcalloc (last_bb, sizeof (htab_t));
1360 /* Allocate the extension hash. It will hold the extensions that we want
1361 to PRE. */
1362 see_pre_extension_hash = htab_create (10,
1363 hash_descriptor_pre_extension,
1364 eq_descriptor_pre_extension,
1365 hash_del_pre_extension);
1369 /* Function called by note_uses to check if a register is used in a
1370 subexpressions.
1372 X is a pointer to the subexpression and DATA is a pointer to a
1373 see_mentioned_reg_data structure that contains the register to look for and
1374 a place for the result. */
1376 static void
1377 see_mentioned_reg (rtx *x, void *data)
1379 struct see_mentioned_reg_data *d
1380 = (struct see_mentioned_reg_data *) data;
1382 if (reg_mentioned_p (d->reg, *x))
1383 d->mentioned = true;
1387 /* We don't want to merge a use extension with a reference if the extended
1388 register is used only in a simple move instruction. We also don't want to
1389 merge a def extension with a reference if the source register of the
1390 extension is defined only in a simple move in the reference.
1392 REF is the reference instruction.
1393 EXTENSION is the use extension or def extension instruction.
1394 TYPE is the type of the extension (use or def).
1396 Return true if the reference is complicated enough, so we would like to merge
1397 it with the extension. Otherwise, return false. */
1399 static bool
1400 see_want_to_be_merged_with_extension (rtx ref, rtx extension,
1401 enum extension_type type)
1403 rtx pat;
1404 rtx dest_extension_reg = see_get_extension_reg (extension, 1);
1405 rtx source_extension_reg = see_get_extension_reg (extension, 0);
1406 enum rtx_code code;
1407 struct see_mentioned_reg_data d;
1408 int i;
1410 pat = PATTERN (ref);
1411 code = GET_CODE (pat);
1413 if (code == PARALLEL)
1415 for (i = 0; i < XVECLEN (pat, 0); i++)
1417 rtx sub = XVECEXP (pat, 0, i);
1419 if (GET_CODE (sub) == SET
1420 && (REG_P (SET_DEST (sub))
1421 || (GET_CODE (SET_DEST (sub)) == SUBREG
1422 && REG_P (SUBREG_REG (SET_DEST (sub)))))
1423 && (REG_P (SET_SRC (sub))
1424 || (GET_CODE (SET_SRC (sub)) == SUBREG
1425 && REG_P (SUBREG_REG (SET_SRC (sub))))))
1427 /* This is a simple move SET. */
1428 if (type == DEF_EXTENSION
1429 && reg_mentioned_p (source_extension_reg, SET_DEST (sub)))
1430 return false;
1432 else
1434 /* This is not a simple move SET.
1435 Check if it uses the source of the extension. */
1436 if (type == USE_EXTENSION)
1438 d.reg = dest_extension_reg;
1439 d.mentioned = false;
1440 note_uses (&sub, see_mentioned_reg, &d);
1441 if (d.mentioned)
1442 return true;
1446 if (type == USE_EXTENSION)
1447 return false;
1449 else
1451 if (code == SET
1452 && (REG_P (SET_DEST (pat))
1453 || (GET_CODE (SET_DEST (pat)) == SUBREG
1454 && REG_P (SUBREG_REG (SET_DEST (pat)))))
1455 && (REG_P (SET_SRC (pat))
1456 || (GET_CODE (SET_SRC (pat)) == SUBREG
1457 && REG_P (SUBREG_REG (SET_SRC (pat))))))
1458 /* This is a simple move SET. */
1459 return false;
1462 return true;
1466 /* Print the register number of the current see_register_properties
1467 structure.
1469 This is a subroutine of see_main called via htab_traverse.
1470 SLOT contains the current see_register_properties structure pointer. */
1472 static int
1473 see_print_register_properties (void **slot, void *b ATTRIBUTE_UNUSED)
1475 struct see_register_properties *prop = *slot;
1477 gcc_assert (prop);
1478 fprintf (dump_file, "Property found for register %d\n", prop->regno);
1479 return 1;
1483 /* Print the extension instruction of the current see_register_properties
1484 structure.
1486 This is a subroutine of see_main called via htab_traverse.
1487 SLOT contains the current see_pre_extension_expr structure pointer. */
1489 static int
1490 see_print_pre_extension_expr (void **slot, void *b ATTRIBUTE_UNUSED)
1492 struct see_pre_extension_expr *pre_extension = *slot;
1494 gcc_assert (pre_extension
1495 && pre_extension->se_insn
1496 && INSN_P (pre_extension->se_insn));
1498 fprintf (dump_file, "Index %d for:\n", pre_extension->bitmap_index);
1499 print_rtl_single (dump_file, pre_extension->se_insn);
1501 return 1;
1505 /* Phase 4 implementation: Commit changes to the insn stream. */
1507 /* Delete the merged def extension.
1509 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1511 SLOT contains the current def extension instruction.
1512 B is the see_ref_s structure pointer. */
1514 static int
1515 see_delete_merged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1517 rtx def_se = *slot;
1519 if (dump_file)
1521 fprintf (dump_file, "Deleting merged def extension:\n");
1522 print_rtl_single (dump_file, def_se);
1525 if (INSN_DELETED_P (def_se))
1526 /* This def extension is an implicit one. No need to delete it since
1527 it is not in the insn stream. */
1528 return 1;
1530 delete_insn (def_se);
1531 return 1;
1535 /* Delete the unmerged def extension.
1537 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1539 SLOT contains the current def extension instruction.
1540 B is the see_ref_s structure pointer. */
1542 static int
1543 see_delete_unmerged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1545 rtx def_se = *slot;
1547 if (dump_file)
1549 fprintf (dump_file, "Deleting unmerged def extension:\n");
1550 print_rtl_single (dump_file, def_se);
1553 delete_insn (def_se);
1554 return 1;
1558 /* Emit the non-redundant use extension to the instruction stream.
1560 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1562 SLOT contains the current use extension instruction.
1563 B is the see_ref_s structure pointer. */
1565 static int
1566 see_emit_use_extension (void **slot, void *b)
1568 rtx use_se = *slot;
1569 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1571 if (INSN_DELETED_P (use_se))
1572 /* This use extension was previously removed according to the lcm
1573 output. */
1574 return 1;
1576 if (dump_file)
1578 fprintf (dump_file, "Inserting use extension:\n");
1579 print_rtl_single (dump_file, use_se);
1582 add_insn_before (use_se, curr_ref_s->insn);
1584 return 1;
1588 /* For each relevant reference:
1589 a. Emit the non-redundant use extensions.
1590 b. Delete the def extensions.
1591 c. Replace the original reference with the merged one (if exists) and add the
1592 move instructions that were generated.
1594 This is a subroutine of see_commit_changes called via splay_tree_foreach.
1596 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
1597 see_ref_s structure. */
1599 static int
1600 see_commit_ref_changes (splay_tree_node stn,
1601 void *data ATTRIBUTE_UNUSED)
1603 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
1604 htab_t unmerged_def_se_hash =
1605 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
1606 htab_t merged_def_se_hash =
1607 ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
1608 rtx ref = ((struct see_ref_s *) (stn->value))->insn;
1609 rtx merged_ref = ((struct see_ref_s *) (stn->value))->merged_insn;
1611 /* Emit the non-redundant use extensions. */
1612 if (use_se_hash)
1613 htab_traverse_noresize (use_se_hash, see_emit_use_extension,
1614 (PTR) (stn->value));
1616 /* Delete the def extensions. */
1617 if (unmerged_def_se_hash)
1618 htab_traverse (unmerged_def_se_hash, see_delete_unmerged_def_extension,
1619 (PTR) (stn->value));
1621 if (merged_def_se_hash)
1622 htab_traverse (merged_def_se_hash, see_delete_merged_def_extension,
1623 (PTR) (stn->value));
1625 /* Replace the original reference with the merged one (if exists) and add the
1626 move instructions that were generated. */
1627 if (merged_ref && !INSN_DELETED_P (ref))
1629 if (dump_file)
1631 fprintf (dump_file, "Replacing orig reference:\n");
1632 print_rtl_single (dump_file, ref);
1633 fprintf (dump_file, "With merged reference:\n");
1634 print_rtl_single (dump_file, merged_ref);
1636 emit_insn_after (merged_ref, ref);
1637 delete_insn (ref);
1640 /* Continue to the next reference. */
1641 return 0;
1645 /* Insert partially redundant expressions on edges to make the expressions fully
1646 redundant.
1648 INDEX_MAP is a mapping of an index to an expression.
1649 Return true if an instruction was inserted on an edge.
1650 Otherwise, return false. */
1652 static bool
1653 see_pre_insert_extensions (struct see_pre_extension_expr **index_map)
1655 int num_edges = NUM_EDGES (edge_list);
1656 int set_size = pre_insert_map[0]->size;
1657 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
1659 int did_insert = 0;
1660 int e;
1661 int i;
1662 int j;
1664 for (e = 0; e < num_edges; e++)
1666 int indx;
1667 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
1669 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
1671 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
1673 for (j = indx; insert && j < (int) pre_extension_num;
1674 j++, insert >>= 1)
1675 if (insert & 1)
1677 struct see_pre_extension_expr *expr = index_map[j];
1678 int idx = expr->bitmap_index;
1679 rtx se_insn = NULL;
1680 edge eg = INDEX_EDGE (edge_list, e);
1682 start_sequence ();
1683 emit_insn (PATTERN (expr->se_insn));
1684 se_insn = get_insns ();
1685 end_sequence ();
1687 if (eg->flags & EDGE_ABNORMAL)
1689 rtx new_insn = NULL;
1691 new_insn = insert_insn_end_bb_new (se_insn, bb);
1692 gcc_assert (new_insn && INSN_P (new_insn));
1694 if (dump_file)
1696 fprintf (dump_file,
1697 "PRE: end of bb %d, insn %d, ",
1698 bb->index, INSN_UID (new_insn));
1699 fprintf (dump_file,
1700 "inserting expression %d\n", idx);
1703 else
1705 insert_insn_on_edge (se_insn, eg);
1707 if (dump_file)
1709 fprintf (dump_file, "PRE: edge (%d,%d), ",
1710 bb->index,
1711 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
1712 fprintf (dump_file, "inserting expression %d\n", idx);
1715 did_insert = true;
1719 return did_insert;
1723 /* Since all the redundant extensions must be anticipatable, they must be a use
1724 extensions. Mark them as deleted. This will prevent them from been emitted
1725 in the first place.
1727 This is a subroutine of see_commit_changes called via htab_traverse.
1729 SLOT contains the current see_pre_extension_expr structure pointer. */
1731 static int
1732 see_pre_delete_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1734 struct see_pre_extension_expr *expr = *slot;
1735 struct see_occr *occr;
1736 int indx = expr->bitmap_index;
1738 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1740 if (TEST_BIT (pre_delete_map[occr->block_num], indx))
1742 /* Mark as deleted. */
1743 INSN_DELETED_P (occr->insn) = 1;
1744 if (dump_file)
1746 fprintf (dump_file,"Redundant extension deleted:\n");
1747 print_rtl_single (dump_file, occr->insn);
1751 return 1;
1755 /* Create the index_map mapping of an index to an expression.
1757 This is a subroutine of see_commit_changes called via htab_traverse.
1759 SLOT contains the current see_pre_extension_expr structure pointer.
1760 B a pointer to see_pre_extension_expr structure pointer. */
1762 static int
1763 see_map_extension (void **slot, void *b)
1765 struct see_pre_extension_expr *expr = *slot;
1766 struct see_pre_extension_expr **index_map =
1767 (struct see_pre_extension_expr **) b;
1769 index_map[expr->bitmap_index] = expr;
1771 return 1;
1775 /* Phase 4 top level function.
1776 In this phase we finally change the instruction stream.
1777 Here we insert extensions at their best placements and delete the
1778 redundant ones according to the output of the LCM. We also replace
1779 some of the instructions according to phase 2 merges results. */
1781 static void
1782 see_commit_changes (void)
1784 struct see_pre_extension_expr **index_map;
1785 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
1786 bool did_insert = false;
1787 int i;
1789 index_map = xcalloc (pre_extension_num,
1790 sizeof (struct see_pre_extension_expr *));
1792 if (dump_file)
1793 fprintf (dump_file,
1794 "* Phase 4: Commit changes to the insn stream. *\n");
1796 /* Produce a mapping of all the pre_extensions. */
1797 htab_traverse (see_pre_extension_hash, see_map_extension, (PTR) index_map);
1799 /* Delete redundant extension. This will prevent them from been emitted in
1800 the first place. */
1801 htab_traverse (see_pre_extension_hash, see_pre_delete_extension, NULL);
1803 /* At this point, we must free the DF object, since the number of basic blocks
1804 may change. */
1805 df_finish (df);
1806 df = NULL;
1808 /* Insert extensions on edges, according to the LCM result. */
1809 did_insert = see_pre_insert_extensions (index_map);
1811 if (did_insert)
1812 commit_edge_insertions ();
1814 /* Commit the rest of the changes. */
1815 for (i = 0; i < last_bb; i++)
1817 if (see_bb_splay_ar[i])
1819 /* Traverse over all the references in the basic block in forward
1820 order. */
1821 splay_tree_foreach (see_bb_splay_ar[i],
1822 see_commit_ref_changes, NULL);
1826 free (index_map);
1830 /* Phase 3 implementation: Eliminate globally redundant extensions. */
1832 /* Analyze the properties of a merged def extension for the LCM and record avail
1833 occurrences.
1835 This is a subroutine of see_analyze_ref_local_prop called
1836 via htab_traverse.
1838 SLOT contains the current def extension instruction.
1839 B is the see_ref_s structure pointer. */
1841 static int
1842 see_analyze_merged_def_local_prop (void **slot, void *b)
1844 rtx def_se = *slot;
1845 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1846 rtx ref = curr_ref_s->insn;
1847 struct see_pre_extension_expr *extension_expr;
1848 int indx;
1849 int bb_num = BLOCK_NUM (ref);
1850 htab_t curr_bb_hash;
1851 struct see_register_properties *curr_prop, **slot_prop;
1852 struct see_register_properties temp_prop;
1853 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
1854 struct see_occr *curr_occr = NULL;
1855 struct see_occr *tmp_occr = NULL;
1857 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
1858 /* The extension_expr must be found. */
1859 gcc_assert (extension_expr);
1861 curr_bb_hash = see_bb_hash_ar[bb_num];
1862 gcc_assert (curr_bb_hash);
1863 temp_prop.regno = REGNO (dest_extension_reg);
1864 slot_prop =
1865 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1866 &temp_prop, INSERT);
1867 curr_prop = *slot_prop;
1868 gcc_assert (curr_prop);
1870 indx = extension_expr->bitmap_index;
1872 /* Reset the transparency bit. */
1873 RESET_BIT (transp[bb_num], indx);
1874 /* Reset the killed bit. */
1875 RESET_BIT (ae_kill[bb_num], indx);
1877 if (curr_prop->first_se_after_last_def == DF_INSN_LUID (df, ref))
1879 /* Set the available bit. */
1880 SET_BIT (comp[bb_num], indx);
1881 /* Record the available occurrence. */
1882 curr_occr = xmalloc (sizeof (struct see_occr));
1883 curr_occr->next = NULL;
1884 curr_occr->insn = def_se;
1885 curr_occr->block_num = bb_num;
1886 tmp_occr = extension_expr->avail_occr;
1887 if (!tmp_occr)
1888 extension_expr->avail_occr = curr_occr;
1889 else
1891 while (tmp_occr->next)
1892 tmp_occr = tmp_occr->next;
1893 tmp_occr->next = curr_occr;
1897 return 1;
1901 /* Analyze the properties of a unmerged def extension for the LCM.
1903 This is a subroutine of see_analyze_ref_local_prop called
1904 via htab_traverse.
1906 SLOT contains the current def extension instruction.
1907 B is the see_ref_s structure pointer. */
1909 static int
1910 see_analyze_unmerged_def_local_prop (void **slot, void *b)
1912 rtx def_se = *slot;
1913 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1914 rtx ref = curr_ref_s->insn;
1915 struct see_pre_extension_expr *extension_expr;
1916 int indx;
1917 int bb_num = BLOCK_NUM (ref);
1918 htab_t curr_bb_hash;
1919 struct see_register_properties *curr_prop, **slot_prop;
1920 struct see_register_properties temp_prop;
1921 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
1923 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
1924 /* The extension_expr must be found. */
1925 gcc_assert (extension_expr);
1927 curr_bb_hash = see_bb_hash_ar[bb_num];
1928 gcc_assert (curr_bb_hash);
1929 temp_prop.regno = REGNO (dest_extension_reg);
1930 slot_prop =
1931 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1932 &temp_prop, INSERT);
1933 curr_prop = *slot_prop;
1934 gcc_assert (curr_prop);
1936 indx = extension_expr->bitmap_index;
1938 /* Reset the transparency bit. */
1939 RESET_BIT (transp[bb_num], indx);
1940 /* Set the killed bit. */
1941 SET_BIT (ae_kill[bb_num], indx);
1943 return 1;
1947 /* Analyze the properties of a use extension for the LCM and record anic and
1948 avail occurrences.
1950 This is a subroutine of see_analyze_ref_local_prop called
1951 via htab_traverse.
1953 SLOT contains the current use extension instruction.
1954 B is the see_ref_s structure pointer. */
1956 static int
1957 see_analyze_use_local_prop (void **slot, void *b)
1959 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1960 rtx use_se = *slot;
1961 rtx ref = curr_ref_s->insn;
1962 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
1963 struct see_pre_extension_expr *extension_expr;
1964 struct see_register_properties *curr_prop, **slot_prop;
1965 struct see_register_properties temp_prop;
1966 struct see_occr *curr_occr = NULL;
1967 struct see_occr *tmp_occr = NULL;
1968 htab_t curr_bb_hash;
1969 int indx;
1970 int bb_num = BLOCK_NUM (ref);
1972 extension_expr = see_seek_pre_extension_expr (use_se, USE_EXTENSION);
1973 /* The extension_expr must be found. */
1974 gcc_assert (extension_expr);
1976 curr_bb_hash = see_bb_hash_ar[bb_num];
1977 gcc_assert (curr_bb_hash);
1978 temp_prop.regno = REGNO (dest_extension_reg);
1979 slot_prop =
1980 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1981 &temp_prop, INSERT);
1982 curr_prop = *slot_prop;
1983 gcc_assert (curr_prop);
1985 indx = extension_expr->bitmap_index;
1987 if (curr_prop->first_se_before_any_def == DF_INSN_LUID (df, ref))
1989 /* Set the anticipatable bit. */
1990 SET_BIT (antloc[bb_num], indx);
1991 /* Record the anticipatable occurrence. */
1992 curr_occr = xmalloc (sizeof (struct see_occr));
1993 curr_occr->next = NULL;
1994 curr_occr->insn = use_se;
1995 curr_occr->block_num = bb_num;
1996 tmp_occr = extension_expr->antic_occr;
1997 if (!tmp_occr)
1998 extension_expr->antic_occr = curr_occr;
1999 else
2001 while (tmp_occr->next)
2002 tmp_occr = tmp_occr->next;
2003 tmp_occr->next = curr_occr;
2005 if (curr_prop->last_def < 0)
2007 /* Set the available bit. */
2008 SET_BIT (comp[bb_num], indx);
2009 /* Record the available occurrence. */
2010 curr_occr = xmalloc (sizeof (struct see_occr));
2011 curr_occr->next = NULL;
2012 curr_occr->insn = use_se;
2013 curr_occr->block_num = bb_num;
2014 tmp_occr = extension_expr->avail_occr;
2015 if (!tmp_occr)
2016 extension_expr->avail_occr = curr_occr;
2017 else
2019 while (tmp_occr->next)
2020 tmp_occr = tmp_occr->next;
2021 tmp_occr->next = curr_occr;
2024 /* Note: there is no need to reset the killed bit since it must be zero at
2025 this point. */
2027 else if (curr_prop->first_se_after_last_def == DF_INSN_LUID (df, ref))
2029 /* Set the available bit. */
2030 SET_BIT (comp[bb_num], indx);
2031 /* Reset the killed bit. */
2032 RESET_BIT (ae_kill[bb_num], indx);
2033 /* Record the available occurrence. */
2034 curr_occr = xmalloc (sizeof (struct see_occr));
2035 curr_occr->next = NULL;
2036 curr_occr->insn = use_se;
2037 curr_occr->block_num = bb_num;
2038 tmp_occr = extension_expr->avail_occr;
2039 if (!tmp_occr)
2040 extension_expr->avail_occr = curr_occr;
2041 else
2043 while (tmp_occr->next)
2044 tmp_occr = tmp_occr->next;
2045 tmp_occr->next = curr_occr;
2048 return 1;
2052 /* Here we traverse over all the merged and unmerged extensions of the reference
2053 and analyze their properties for the LCM.
2055 This is a subroutine of see_execute_LCM called via splay_tree_foreach.
2057 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2058 see_ref_s structure. */
2060 static int
2061 see_analyze_ref_local_prop (splay_tree_node stn,
2062 void *data ATTRIBUTE_UNUSED)
2064 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
2065 htab_t unmerged_def_se_hash =
2066 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
2067 htab_t merged_def_se_hash =
2068 ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
2070 /* Analyze use extensions that were not merged with the reference. */
2071 if (use_se_hash)
2072 htab_traverse_noresize (use_se_hash, see_analyze_use_local_prop,
2073 (PTR) (stn->value));
2075 /* Analyze def extensions that were not merged with the reference. */
2076 if (unmerged_def_se_hash)
2077 htab_traverse (unmerged_def_se_hash, see_analyze_unmerged_def_local_prop,
2078 (PTR) (stn->value));
2080 /* Analyze def extensions that were merged with the reference. */
2081 if (merged_def_se_hash)
2082 htab_traverse (merged_def_se_hash, see_analyze_merged_def_local_prop,
2083 (PTR) (stn->value));
2085 /* Continue to the next definition. */
2086 return 0;
2090 /* Phase 3 top level function.
2091 In this phase, we set the input bit vectors of the LCM according to data
2092 gathered in phase 2.
2093 Then we run the edge based LCM. */
2095 static void
2096 see_execute_LCM (void)
2098 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
2099 int i = 0;
2101 if (dump_file)
2102 fprintf (dump_file,
2103 "* Phase 3: Eliminate globally redundant extensions. *\n");
2105 /* Initialize the global sbitmap vectors. */
2106 transp = sbitmap_vector_alloc (last_bb, pre_extension_num);
2107 comp = sbitmap_vector_alloc (last_bb, pre_extension_num);
2108 antloc = sbitmap_vector_alloc (last_bb, pre_extension_num);
2109 ae_kill = sbitmap_vector_alloc (last_bb, pre_extension_num);
2110 sbitmap_vector_ones (transp, last_bb);
2111 sbitmap_vector_zero (comp, last_bb);
2112 sbitmap_vector_zero (antloc, last_bb);
2113 sbitmap_vector_zero (ae_kill, last_bb);
2115 /* Traverse over all the splay trees of the basic blocks. */
2116 for (i = 0; i < last_bb; i++)
2118 if (see_bb_splay_ar[i])
2120 /* Traverse over all the references in the basic block in forward
2121 order. */
2122 splay_tree_foreach (see_bb_splay_ar[i],
2123 see_analyze_ref_local_prop, NULL);
2127 /* Add fake exit edges before running the lcm. */
2128 add_noreturn_fake_exit_edges ();
2130 /* Run the LCM. */
2131 edge_list = pre_edge_lcm (pre_extension_num, transp, comp, antloc,
2132 ae_kill, &pre_insert_map, &pre_delete_map);
2134 /* Remove the fake edges. */
2135 remove_fake_exit_edges ();
2139 /* Phase 2 implementation: Merge and eliminate locally redundant extensions. */
2141 /* In this function we set the register properties for the register that is
2142 defined and extended in the reference.
2143 The properties are defined in see_register_properties structure which is
2144 allocated per basic block and per register.
2145 Later the extension is inserted into the see_pre_extension_hash for the next
2146 phase of the optimization.
2148 This is a subroutine of see_handle_extensions_for_one_ref called
2149 via htab_traverse.
2151 SLOT contains the current def extension instruction.
2152 B is the see_ref_s structure pointer. */
2154 static int
2155 see_set_prop_merged_def (void **slot, void *b)
2157 rtx def_se = *slot;
2158 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2159 rtx insn = curr_ref_s->insn;
2160 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2161 htab_t curr_bb_hash;
2162 struct see_register_properties *curr_prop = NULL;
2163 struct see_register_properties **slot_prop;
2164 struct see_register_properties temp_prop;
2165 int ref_luid = DF_INSN_LUID (df, insn);
2167 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2168 if (!curr_bb_hash)
2170 /* The hash doesn't exist yet. Create it. */
2171 curr_bb_hash = htab_create (10,
2172 hash_descriptor_properties,
2173 eq_descriptor_properties,
2174 hash_del_properties);
2175 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2178 /* Find the right register properties in the right basic block. */
2179 temp_prop.regno = REGNO (dest_extension_reg);
2180 slot_prop =
2181 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2182 &temp_prop, INSERT);
2184 if (slot_prop && *slot_prop != NULL)
2186 /* Property already exists. */
2187 curr_prop = *slot_prop;
2188 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2190 curr_prop->last_def = ref_luid;
2191 curr_prop->first_se_after_last_def = ref_luid;
2193 else
2195 /* Property doesn't exist yet. */
2196 curr_prop = xmalloc (sizeof (struct see_register_properties));
2197 curr_prop->regno = REGNO (dest_extension_reg);
2198 curr_prop->last_def = ref_luid;
2199 curr_prop->first_se_before_any_def = -1;
2200 curr_prop->first_se_after_last_def = ref_luid;
2201 *slot_prop = curr_prop;
2204 /* Insert the def_se into see_pre_extension_hash if it isn't already
2205 there. */
2206 see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
2208 return 1;
2212 /* In this function we set the register properties for the register that is
2213 defined but not extended in the reference.
2214 The properties are defined in see_register_properties structure which is
2215 allocated per basic block and per register.
2216 Later the extension is inserted into the see_pre_extension_hash for the next
2217 phase of the optimization.
2219 This is a subroutine of see_handle_extensions_for_one_ref called
2220 via htab_traverse.
2222 SLOT contains the current def extension instruction.
2223 B is the see_ref_s structure pointer. */
2225 static int
2226 see_set_prop_unmerged_def (void **slot, void *b)
2228 rtx def_se = *slot;
2229 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2230 rtx insn = curr_ref_s->insn;
2231 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2232 htab_t curr_bb_hash;
2233 struct see_register_properties *curr_prop = NULL;
2234 struct see_register_properties **slot_prop;
2235 struct see_register_properties temp_prop;
2236 int ref_luid = DF_INSN_LUID (df, insn);
2238 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2239 if (!curr_bb_hash)
2241 /* The hash doesn't exist yet. Create it. */
2242 curr_bb_hash = htab_create (10,
2243 hash_descriptor_properties,
2244 eq_descriptor_properties,
2245 hash_del_properties);
2246 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2249 /* Find the right register properties in the right basic block. */
2250 temp_prop.regno = REGNO (dest_extension_reg);
2251 slot_prop =
2252 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2253 &temp_prop, INSERT);
2255 if (slot_prop && *slot_prop != NULL)
2257 /* Property already exists. */
2258 curr_prop = *slot_prop;
2259 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2261 curr_prop->last_def = ref_luid;
2262 curr_prop->first_se_after_last_def = -1;
2264 else
2266 /* Property doesn't exist yet. */
2267 curr_prop = xmalloc (sizeof (struct see_register_properties));
2268 curr_prop->regno = REGNO (dest_extension_reg);
2269 curr_prop->last_def = ref_luid;
2270 curr_prop->first_se_before_any_def = -1;
2271 curr_prop->first_se_after_last_def = -1;
2272 *slot_prop = curr_prop;
2275 /* Insert the def_se into see_pre_extension_hash if it isn't already
2276 there. */
2277 see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
2279 return 1;
2283 /* In this function we set the register properties for the register that is used
2284 in the reference.
2285 The properties are defined in see_register_properties structure which is
2286 allocated per basic block and per register.
2287 When a redundant use extension is found it is removed from the hash of the
2288 reference.
2289 If the extension is non redundant it is inserted into the
2290 see_pre_extension_hash for the next phase of the optimization.
2292 This is a subroutine of see_handle_extensions_for_one_ref called
2293 via htab_traverse.
2295 SLOT contains the current use extension instruction.
2296 B is the see_ref_s structure pointer. */
2298 static int
2299 see_set_prop_unmerged_use (void **slot, void *b)
2301 rtx use_se = *slot;
2302 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2303 rtx insn = curr_ref_s->insn;
2304 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
2305 htab_t curr_bb_hash;
2306 struct see_register_properties *curr_prop = NULL;
2307 struct see_register_properties **slot_prop;
2308 struct see_register_properties temp_prop;
2309 bool locally_redundant = false;
2310 int ref_luid = DF_INSN_LUID (df, insn);
2312 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2313 if (!curr_bb_hash)
2315 /* The hash doesn't exist yet. Create it. */
2316 curr_bb_hash = htab_create (10,
2317 hash_descriptor_properties,
2318 eq_descriptor_properties,
2319 hash_del_properties);
2320 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2323 /* Find the right register properties in the right basic block. */
2324 temp_prop.regno = REGNO (dest_extension_reg);
2325 slot_prop =
2326 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2327 &temp_prop, INSERT);
2329 if (slot_prop && *slot_prop != NULL)
2331 /* Property already exists. */
2332 curr_prop = *slot_prop;
2333 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2336 if (curr_prop->last_def < 0 && curr_prop->first_se_before_any_def < 0)
2337 curr_prop->first_se_before_any_def = ref_luid;
2338 else if (curr_prop->last_def < 0
2339 && curr_prop->first_se_before_any_def >= 0)
2341 /* In this case the extension is locally redundant. */
2342 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2343 locally_redundant = true;
2345 else if (curr_prop->last_def >= 0
2346 && curr_prop->first_se_after_last_def < 0)
2347 curr_prop->first_se_after_last_def = ref_luid;
2348 else if (curr_prop->last_def >= 0
2349 && curr_prop->first_se_after_last_def >= 0)
2351 /* In this case the extension is locally redundant. */
2352 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2353 locally_redundant = true;
2355 else
2356 gcc_unreachable ();
2358 else
2360 /* Property doesn't exist yet. Create a new one. */
2361 curr_prop = xmalloc (sizeof (struct see_register_properties));
2362 curr_prop->regno = REGNO (dest_extension_reg);
2363 curr_prop->last_def = -1;
2364 curr_prop->first_se_before_any_def = ref_luid;
2365 curr_prop->first_se_after_last_def = -1;
2366 *slot_prop = curr_prop;
2369 /* Insert the use_se into see_pre_extension_hash if it isn't already
2370 there. */
2371 if (!locally_redundant)
2372 see_seek_pre_extension_expr (use_se, USE_EXTENSION);
2373 if (locally_redundant && dump_file)
2375 fprintf (dump_file, "Locally redundant extension:\n");
2376 print_rtl_single (dump_file, use_se);
2378 return 1;
2382 /* Print an extension instruction.
2384 This is a subroutine of see_handle_extensions_for_one_ref called
2385 via htab_traverse.
2386 SLOT contains the extension instruction. */
2388 static int
2389 see_print_one_extension (void **slot, void *b ATTRIBUTE_UNUSED)
2391 rtx def_se = *slot;
2393 gcc_assert (def_se && INSN_P (def_se));
2394 print_rtl_single (dump_file, def_se);
2396 return 1;
2399 /* Function called by note_uses to replace used subexpressions.
2401 X is a pointer to the subexpression and DATA is a pointer to a
2402 see_replace_data structure that contains the data for the replacement. */
2404 static void
2405 see_replace_src (rtx *x, void *data)
2407 struct see_replace_data *d
2408 = (struct see_replace_data *) data;
2410 *x = replace_rtx (*x, d->from, d->to);
2414 /* At this point the pattern is expected to be:
2416 ref: set (dest_reg) (rhs)
2417 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2419 The merge of these two instructions didn't succeed.
2421 We try to generate the pattern:
2422 set (subreg (dest_extension_reg)) (rhs)
2424 We do this in 4 steps:
2425 a. Replace every use of dest_reg with a new pseudo register.
2426 b. Replace every instance of dest_reg with the subreg.
2427 c. Replace every use of the new pseudo register back to dest_reg.
2428 d. Try to recognize and simplify.
2430 If the manipulation failed, leave the original ref but try to generate and
2431 recognize a simple move instruction:
2432 set (subreg (dest_extension_reg)) (dest_reg)
2433 This move instruction will be emitted right after the ref to the instruction
2434 stream and assure the correctness of the code after def_se will be removed.
2436 CURR_REF_S is the current reference.
2437 DEF_SE is the extension that couldn't be merged. */
2439 static void
2440 see_def_extension_not_merged (struct see_ref_s *curr_ref_s, rtx def_se)
2442 struct see_replace_data d;
2443 /* If the original insn was already merged with an extension before,
2444 take the merged one. */
2445 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn :
2446 curr_ref_s->insn;
2447 rtx merged_ref_next = (curr_ref_s->merged_insn) ?
2448 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX;
2449 rtx ref_copy = copy_rtx (ref);
2450 rtx source_extension_reg = see_get_extension_reg (def_se, 0);
2451 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2452 rtx move_insn = NULL;
2453 rtx set, rhs;
2454 rtx dest_reg, dest_real_reg;
2455 rtx new_pseudo_reg, subreg;
2456 enum machine_mode source_extension_mode = GET_MODE (source_extension_reg);
2457 enum machine_mode dest_mode;
2459 set = single_set (def_se);
2460 gcc_assert (set);
2461 rhs = SET_SRC (set);
2462 gcc_assert (GET_CODE (rhs) == SIGN_EXTEND
2463 || GET_CODE (rhs) == ZERO_EXTEND);
2464 dest_reg = XEXP (rhs, 0);
2465 gcc_assert (REG_P (dest_reg)
2466 || (GET_CODE (dest_reg) == SUBREG
2467 && REG_P (SUBREG_REG (dest_reg))));
2468 dest_real_reg = REG_P (dest_reg) ? dest_reg : SUBREG_REG (dest_reg);
2469 dest_mode = GET_MODE (dest_reg);
2471 subreg = gen_lowpart_SUBREG (dest_mode, dest_extension_reg);
2472 new_pseudo_reg = gen_reg_rtx (source_extension_mode);
2474 /* Step a: Replace every use of dest_real_reg with a new pseudo register. */
2475 d.from = dest_real_reg;
2476 d.to = new_pseudo_reg;
2477 note_uses (&PATTERN (ref_copy), see_replace_src, &d);
2478 /* Step b: Replace every instance of dest_reg with the subreg. */
2479 ref_copy = replace_rtx (ref_copy, dest_reg, subreg);
2481 /* Step c: Replace every use of the new pseudo register back to
2482 dest_real_reg. */
2483 d.from = new_pseudo_reg;
2484 d.to = dest_real_reg;
2485 note_uses (&PATTERN (ref_copy), see_replace_src, &d);
2487 if (rtx_equal_p (PATTERN (ref), PATTERN (ref_copy))
2488 || insn_invalid_p (ref_copy))
2490 /* The manipulation failed. */
2492 /* Create a new copy. */
2493 ref_copy = copy_rtx (ref);
2495 /* Create a simple move instruction that will replace the def_se. */
2496 start_sequence ();
2497 emit_move_insn (subreg, dest_reg);
2498 move_insn = get_insns ();
2499 end_sequence ();
2501 /* Link the manipulated instruction to the newly created move instruction
2502 and to the former created move instructions. */
2503 PREV_INSN (ref_copy) = NULL_RTX;
2504 NEXT_INSN (ref_copy) = move_insn;
2505 PREV_INSN (move_insn) = ref_copy;
2506 NEXT_INSN (move_insn) = merged_ref_next;
2507 if (merged_ref_next != NULL_RTX)
2508 PREV_INSN (merged_ref_next) = move_insn;
2509 curr_ref_s->merged_insn = ref_copy;
2511 if (dump_file)
2513 fprintf (dump_file, "Following def merge failure a move ");
2514 fprintf (dump_file, "insn was added after the ref.\n");
2515 fprintf (dump_file, "Original ref:\n");
2516 print_rtl_single (dump_file, ref);
2517 fprintf (dump_file, "Move insn that was added:\n");
2518 print_rtl_single (dump_file, move_insn);
2520 return;
2523 /* The manipulation succeeded. Store the new manipulated reference. */
2525 /* Try to simplify the new manipulated insn. */
2526 validate_simplify_insn (ref_copy);
2528 /* Create a simple move instruction to assure the correctness of the code. */
2529 start_sequence ();
2530 emit_move_insn (dest_reg, subreg);
2531 move_insn = get_insns ();
2532 end_sequence ();
2534 /* Link the manipulated instruction to the newly created move instruction and
2535 to the former created move instructions. */
2536 PREV_INSN (ref_copy) = NULL_RTX;
2537 NEXT_INSN (ref_copy) = move_insn;
2538 PREV_INSN (move_insn) = ref_copy;
2539 NEXT_INSN (move_insn) = merged_ref_next;
2540 if (merged_ref_next != NULL_RTX)
2541 PREV_INSN (merged_ref_next) = move_insn;
2542 curr_ref_s->merged_insn = ref_copy;
2544 if (dump_file)
2546 fprintf (dump_file, "Following merge failure the ref was transformed!\n");
2547 fprintf (dump_file, "Original ref:\n");
2548 print_rtl_single (dump_file, ref);
2549 fprintf (dump_file, "Transformed ref:\n");
2550 print_rtl_single (dump_file, ref_copy);
2551 fprintf (dump_file, "Move insn that was added:\n");
2552 print_rtl_single (dump_file, move_insn);
2557 /* Merge the reference instruction (ref) with the current use extension.
2559 use_se extends a NARROWmode register to a WIDEmode register.
2560 ref uses the WIDEmode register.
2562 The pattern we try to merge is this:
2563 use_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2564 ref: use (dest_extension_reg)
2566 where dest_extension_reg and source_extension_reg can be subregs.
2568 The merge is done by generating, simplifying and recognizing the pattern:
2569 use (sign/zero_extend (source_extension_reg))
2571 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2572 we don't try to merge it with use_se and we continue as if the merge failed.
2574 This is a subroutine of see_handle_extensions_for_one_ref called
2575 via htab_traverse.
2576 SLOT contains the current use extension instruction.
2577 B is the see_ref_s structure pointer. */
2579 static int
2580 see_merge_one_use_extension (void **slot, void *b)
2582 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2583 rtx use_se = *slot;
2584 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn :
2585 curr_ref_s->insn;
2586 rtx merged_ref_next = (curr_ref_s->merged_insn) ?
2587 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX;
2588 rtx ref_copy = copy_rtx (ref);
2589 rtx extension_set = single_set (use_se);
2590 rtx extension_rhs = NULL;
2591 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
2592 rtx note = NULL;
2593 rtx simplified_note = NULL;
2595 gcc_assert (use_se && curr_ref_s && extension_set);
2597 extension_rhs = SET_SRC (extension_set);
2599 /* In REG_EQUIV and REG_EQUAL notes that mention the register we need to
2600 replace the uses of the dest_extension_reg with the rhs of the extension
2601 instruction. This is necessary since there might not be an extension in
2602 the path between the definition and the note when this optimization is
2603 over. */
2604 note = find_reg_equal_equiv_note (ref_copy);
2605 if (note)
2607 simplified_note = simplify_replace_rtx (XEXP (note, 0),
2608 dest_extension_reg,
2609 extension_rhs);
2610 if (rtx_equal_p (XEXP (note, 0), simplified_note))
2611 /* Replacement failed. Remove the note. */
2612 remove_note (ref_copy, note);
2613 else
2614 XEXP (note, 0) = simplified_note;
2617 if (!see_want_to_be_merged_with_extension (ref, use_se, USE_EXTENSION))
2619 /* The use in the reference is too simple. Don't try to merge. */
2620 if (dump_file)
2622 fprintf (dump_file, "Use merge skipped!\n");
2623 fprintf (dump_file, "Original instructions:\n");
2624 print_rtl_single (dump_file, use_se);
2625 print_rtl_single (dump_file, ref);
2627 /* Don't remove the current use_se from the use_se_hash and continue to
2628 the next extension. */
2629 return 1;
2632 validate_replace_src_group (dest_extension_reg, extension_rhs, ref_copy);
2634 if (!num_changes_pending ())
2635 /* In this case this is not a real use (the only use is/was in the notes
2636 list). Remove the use extension from the hash. This will prevent it
2637 from been emitted in the first place. */
2639 if (dump_file)
2641 fprintf (dump_file, "Use extension not necessary before:\n");
2642 print_rtl_single (dump_file, ref);
2644 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2645 PREV_INSN (ref_copy) = NULL_RTX;
2646 NEXT_INSN (ref_copy) = merged_ref_next;
2647 if (merged_ref_next != NULL_RTX)
2648 PREV_INSN (merged_ref_next) = ref_copy;
2649 curr_ref_s->merged_insn = ref_copy;
2650 return 1;
2653 if (!apply_change_group ())
2655 /* The merge failed. */
2656 if (dump_file)
2658 fprintf (dump_file, "Use merge failed!\n");
2659 fprintf (dump_file, "Original instructions:\n");
2660 print_rtl_single (dump_file, use_se);
2661 print_rtl_single (dump_file, ref);
2663 /* Don't remove the current use_se from the use_se_hash and continue to
2664 the next extension. */
2665 return 1;
2668 /* The merge succeeded! */
2670 /* Try to simplify the new merged insn. */
2671 validate_simplify_insn (ref_copy);
2673 PREV_INSN (ref_copy) = NULL_RTX;
2674 NEXT_INSN (ref_copy) = merged_ref_next;
2675 if (merged_ref_next != NULL_RTX)
2676 PREV_INSN (merged_ref_next) = ref_copy;
2677 curr_ref_s->merged_insn = ref_copy;
2679 if (dump_file)
2681 fprintf (dump_file, "Use merge succeeded!\n");
2682 fprintf (dump_file, "Original instructions:\n");
2683 print_rtl_single (dump_file, use_se);
2684 print_rtl_single (dump_file, ref);
2685 fprintf (dump_file, "Merged instruction:\n");
2686 print_rtl_single (dump_file, ref_copy);
2689 /* Remove the current use_se from the use_se_hash. This will prevent it from
2690 been emitted in the first place. */
2691 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2692 return 1;
2696 /* Merge the reference instruction (ref) with the extension that follows it
2697 in the same basic block (def_se).
2698 ref sets a NARROWmode register and def_se extends it to WIDEmode register.
2700 The pattern we try to merge is this:
2701 ref: set (dest_reg) (rhs)
2702 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2704 where dest_reg and source_extension_reg can both be subregs (together)
2705 and (REGNO (dest_reg) == REGNO (source_extension_reg))
2707 The merge is done by generating, simplifying and recognizing the pattern:
2708 set (dest_extension_reg) (sign/zero_extend (rhs))
2709 If ref is a parallel instruction we just replace the relevant set in it.
2711 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2712 we don't try to merge it with def_se and we continue as if the merge failed.
2714 This is a subroutine of see_handle_extensions_for_one_ref called
2715 via htab_traverse.
2717 SLOT contains the current def extension instruction.
2718 B is the see_ref_s structure pointer. */
2720 static int
2721 see_merge_one_def_extension (void **slot, void *b)
2723 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2724 rtx def_se = *slot;
2725 /* If the original insn was already merged with an extension before,
2726 take the merged one. */
2727 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn :
2728 curr_ref_s->insn;
2729 rtx merged_ref_next = (curr_ref_s->merged_insn) ?
2730 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX;
2731 rtx ref_copy = copy_rtx (ref);
2732 rtx new_set = NULL;
2733 rtx source_extension_reg = see_get_extension_reg (def_se, 0);
2734 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2735 rtx move_insn, *rtx_slot, subreg;
2736 rtx temp_extension = NULL;
2737 rtx simplified_temp_extension = NULL;
2738 rtx *pat;
2739 enum rtx_code code;
2740 enum rtx_code extension_code;
2741 enum machine_mode source_extension_mode;
2742 enum machine_mode source_mode;
2743 enum machine_mode dest_extension_mode;
2744 bool merge_success = false;
2745 int i;
2747 gcc_assert (def_se
2748 && INSN_P (def_se)
2749 && curr_ref_s
2750 && ref
2751 && INSN_P (ref));
2753 if (!see_want_to_be_merged_with_extension (ref, def_se, DEF_EXTENSION))
2755 /* The definition in the reference is too simple. Don't try to merge. */
2756 if (dump_file)
2758 fprintf (dump_file, "Def merge skipped!\n");
2759 fprintf (dump_file, "Original instructions:\n");
2760 print_rtl_single (dump_file, ref);
2761 print_rtl_single (dump_file, def_se);
2764 see_def_extension_not_merged (curr_ref_s, def_se);
2765 /* Continue to the next extension. */
2766 return 1;
2769 extension_code = see_get_extension_data (def_se, &source_mode);
2771 /* Try to merge and simplify the extension. */
2772 source_extension_mode = GET_MODE (source_extension_reg);
2773 dest_extension_mode = GET_MODE (dest_extension_reg);
2775 pat = &PATTERN (ref_copy);
2776 code = GET_CODE (*pat);
2778 if (code == PARALLEL)
2780 bool need_to_apply_change = false;
2782 for (i = 0; i < XVECLEN (*pat, 0); i++)
2784 rtx *sub = &XVECEXP (*pat, 0, i);
2786 if (GET_CODE (*sub) == SET
2787 && GET_MODE (SET_SRC (*sub)) != VOIDmode
2788 && GET_MODE (SET_DEST (*sub)) == source_mode
2789 && ((REG_P (SET_DEST (*sub))
2790 && REGNO (SET_DEST (*sub)) == REGNO (source_extension_reg))
2791 || (GET_CODE (SET_DEST (*sub)) == SUBREG
2792 && REG_P (SUBREG_REG (SET_DEST (*sub)))
2793 && (REGNO (SUBREG_REG (SET_DEST (*sub))) ==
2794 REGNO (source_extension_reg)))))
2796 rtx orig_src = SET_SRC (*sub);
2798 if (extension_code == SIGN_EXTEND)
2799 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode,
2800 orig_src);
2801 else
2802 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode,
2803 orig_src);
2804 simplified_temp_extension = simplify_rtx (temp_extension);
2805 temp_extension =
2806 (simplified_temp_extension) ? simplified_temp_extension :
2807 temp_extension;
2808 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg,
2809 temp_extension);
2810 validate_change (ref_copy, sub, new_set, 1);
2811 need_to_apply_change = true;
2814 if (need_to_apply_change)
2815 if (apply_change_group ())
2816 merge_success = true;
2818 else if (code == SET
2819 && GET_MODE (SET_SRC (*pat)) != VOIDmode
2820 && GET_MODE (SET_DEST (*pat)) == source_mode
2821 && ((REG_P (SET_DEST (*pat))
2822 && REGNO (SET_DEST (*pat)) == REGNO (source_extension_reg))
2823 || (GET_CODE (SET_DEST (*pat)) == SUBREG
2824 && REG_P (SUBREG_REG (SET_DEST (*pat)))
2825 && (REGNO (SUBREG_REG (SET_DEST (*pat))) ==
2826 REGNO (source_extension_reg)))))
2828 rtx orig_src = SET_SRC (*pat);
2830 if (extension_code == SIGN_EXTEND)
2831 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode, orig_src);
2832 else
2833 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode, orig_src);
2834 simplified_temp_extension = simplify_rtx (temp_extension);
2835 temp_extension = (simplified_temp_extension) ? simplified_temp_extension :
2836 temp_extension;
2837 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg, temp_extension);
2838 if (validate_change (ref_copy, pat, new_set, 0))
2839 merge_success = true;
2841 if (!merge_success)
2843 /* The merge failed. */
2844 if (dump_file)
2846 fprintf (dump_file, "Def merge failed!\n");
2847 fprintf (dump_file, "Original instructions:\n");
2848 print_rtl_single (dump_file, ref);
2849 print_rtl_single (dump_file, def_se);
2852 see_def_extension_not_merged (curr_ref_s, def_se);
2853 /* Continue to the next extension. */
2854 return 1;
2857 /* The merge succeeded! */
2859 /* Create a simple move instruction to assure the correctness of the code. */
2860 subreg = gen_lowpart_SUBREG (source_extension_mode, dest_extension_reg);
2861 start_sequence ();
2862 emit_move_insn (source_extension_reg, subreg);
2863 move_insn = get_insns ();
2864 end_sequence ();
2866 /* Link the merged instruction to the newly created move instruction and
2867 to the former created move instructions. */
2868 PREV_INSN (ref_copy) = NULL_RTX;
2869 NEXT_INSN (ref_copy) = move_insn;
2870 PREV_INSN (move_insn) = ref_copy;
2871 NEXT_INSN (move_insn) = merged_ref_next;
2872 if (merged_ref_next != NULL_RTX)
2873 PREV_INSN (merged_ref_next) = move_insn;
2874 curr_ref_s->merged_insn = ref_copy;
2876 if (dump_file)
2878 fprintf (dump_file, "Def merge succeeded!\n");
2879 fprintf (dump_file, "Original instructions:\n");
2880 print_rtl_single (dump_file, ref);
2881 print_rtl_single (dump_file, def_se);
2882 fprintf (dump_file, "Merged instruction:\n");
2883 print_rtl_single (dump_file, ref_copy);
2884 fprintf (dump_file, "Move instruction that was added:\n");
2885 print_rtl_single (dump_file, move_insn);
2888 /* Remove the current def_se from the unmerged_def_se_hash and insert it to
2889 the merged_def_se_hash. */
2890 htab_clear_slot (curr_ref_s->unmerged_def_se_hash, (PTR *)slot);
2891 if (!curr_ref_s->merged_def_se_hash)
2892 curr_ref_s->merged_def_se_hash = htab_create (10,
2893 hash_descriptor_extension,
2894 eq_descriptor_extension,
2895 NULL);
2896 rtx_slot = (rtx *) htab_find_slot (curr_ref_s->merged_def_se_hash,
2897 dest_extension_reg, INSERT);
2898 gcc_assert (*rtx_slot == NULL);
2899 *rtx_slot = def_se;
2901 return 1;
2905 /* Try to eliminate extensions in this order:
2906 a. Try to merge only the def extensions, one by one.
2907 b. Try to merge only the use extensions, one by one.
2909 TODO:
2910 Try to merge any couple of use extensions simultaneously.
2911 Try to merge any def extension with one or two uses extensions
2912 simultaneously.
2914 After all the merges are done, update the register properties for the basic
2915 block and eliminate locally redundant use extensions.
2917 This is a subroutine of see_merge_and_eliminate_extensions called
2918 via splay_tree_foreach.
2919 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2920 see_ref_s structure. */
2922 static int
2923 see_handle_extensions_for_one_ref (splay_tree_node stn,
2924 void *data ATTRIBUTE_UNUSED)
2926 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
2927 htab_t unmerged_def_se_hash =
2928 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
2929 htab_t merged_def_se_hash;
2930 rtx ref = ((struct see_ref_s *) (stn->value))->insn;
2932 if (dump_file)
2934 fprintf (dump_file, "Handling ref:\n");
2935 print_rtl_single (dump_file, ref);
2938 /* a. Try to eliminate only def extensions, one by one. */
2939 if (unmerged_def_se_hash)
2940 htab_traverse_noresize (unmerged_def_se_hash, see_merge_one_def_extension,
2941 (PTR) (stn->value));
2943 if (use_se_hash)
2944 /* b. Try to eliminate only use extensions, one by one. */
2945 htab_traverse_noresize (use_se_hash, see_merge_one_use_extension,
2946 (PTR) (stn->value));
2948 merged_def_se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
2950 if (dump_file)
2952 fprintf (dump_file, "The hashes of the current reference:\n");
2953 if (unmerged_def_se_hash)
2955 fprintf (dump_file, "unmerged_def_se_hash:\n");
2956 htab_traverse (unmerged_def_se_hash, see_print_one_extension, NULL);
2958 if (merged_def_se_hash)
2960 fprintf (dump_file, "merged_def_se_hash:\n");
2961 htab_traverse (merged_def_se_hash, see_print_one_extension, NULL);
2963 if (use_se_hash)
2965 fprintf (dump_file, "use_se_hash:\n");
2966 htab_traverse (use_se_hash, see_print_one_extension, NULL);
2970 /* Now that all the merges are done, update the register properties of the
2971 basic block and eliminate locally redundant extensions.
2972 It is important that we first traverse the use extensions hash and
2973 afterwards the def extensions hashes. */
2975 if (use_se_hash)
2976 htab_traverse_noresize (use_se_hash, see_set_prop_unmerged_use,
2977 (PTR) (stn->value));
2979 if (unmerged_def_se_hash)
2980 htab_traverse (unmerged_def_se_hash, see_set_prop_unmerged_def,
2981 (PTR) (stn->value));
2983 if (merged_def_se_hash)
2984 htab_traverse (merged_def_se_hash, see_set_prop_merged_def,
2985 (PTR) (stn->value));
2987 /* Continue to the next definition. */
2988 return 0;
2992 /* Phase 2 top level function.
2993 In this phase, we try to merge def extensions and use extensions with their
2994 references, and eliminate redundant extensions in the same basic block.
2995 We also gather information for the next phases. */
2997 static void
2998 see_merge_and_eliminate_extensions (void)
3000 int i = 0;
3002 if (dump_file)
3003 fprintf (dump_file,
3004 "* Phase 2: Merge and eliminate locally redundant extensions. *\n");
3006 /* Traverse over all the splay trees of the basic blocks. */
3007 for (i = 0; i < last_bb; i++)
3009 if (see_bb_splay_ar[i])
3011 if (dump_file)
3012 fprintf (dump_file, "Handling references for bb %d\n", i);
3013 /* Traverse over all the references in the basic block in forward
3014 order. */
3015 splay_tree_foreach (see_bb_splay_ar[i],
3016 see_handle_extensions_for_one_ref, NULL);
3022 /* Phase 1 implementation: Propagate extensions to uses. */
3024 /* Insert REF_INSN into the splay tree of its basic block.
3025 SE_INSN is the extension to store in the proper hash according to TYPE.
3027 Return true if everything went well.
3028 Otherwise, return false (this will cause the optimization to be aborted). */
3030 static bool
3031 see_store_reference_and_extension (rtx ref_insn, rtx se_insn,
3032 enum extension_type type)
3034 rtx *rtx_slot;
3035 int curr_bb_num;
3036 splay_tree_node stn = NULL;
3037 htab_t se_hash = NULL;
3038 struct see_ref_s *ref_s = NULL;
3040 /* Check the arguments. */
3041 gcc_assert (ref_insn && se_insn);
3042 if (!see_bb_splay_ar)
3043 return false;
3045 curr_bb_num = BLOCK_NUM (ref_insn);
3046 gcc_assert (curr_bb_num < last_bb && curr_bb_num >= 0);
3048 /* Insert the reference to the splay tree of its basic block. */
3049 if (!see_bb_splay_ar[curr_bb_num])
3050 /* The splay tree for this block doesn't exist yet, create it. */
3051 see_bb_splay_ar[curr_bb_num] = splay_tree_new (splay_tree_compare_ints,
3052 NULL, see_free_ref_s);
3053 else
3054 /* Splay tree already exists, check if the current reference is already
3055 in it. */
3057 stn = splay_tree_lookup (see_bb_splay_ar[curr_bb_num],
3058 DF_INSN_LUID (df, ref_insn));
3059 if (stn)
3060 switch (type)
3062 case EXPLICIT_DEF_EXTENSION:
3063 se_hash =
3064 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
3065 if (!se_hash)
3067 se_hash = htab_create (10,
3068 hash_descriptor_extension,
3069 eq_descriptor_extension,
3070 NULL);
3071 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash =
3072 se_hash;
3074 break;
3075 case IMPLICIT_DEF_EXTENSION:
3076 se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
3077 if (!se_hash)
3079 se_hash = htab_create (10,
3080 hash_descriptor_extension,
3081 eq_descriptor_extension,
3082 NULL);
3083 ((struct see_ref_s *) (stn->value))->merged_def_se_hash =
3084 se_hash;
3086 break;
3087 case USE_EXTENSION:
3088 se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
3089 if (!se_hash)
3091 se_hash = htab_create (10,
3092 hash_descriptor_extension,
3093 eq_descriptor_extension,
3094 NULL);
3095 ((struct see_ref_s *) (stn->value))->use_se_hash = se_hash;
3097 break;
3098 default:
3099 gcc_unreachable ();
3103 /* Initialize a new see_ref_s structure and insert it to the splay
3104 tree. */
3105 if (!stn)
3107 ref_s = xmalloc (sizeof (struct see_ref_s));
3108 ref_s->luid = DF_INSN_LUID (df, ref_insn);
3109 ref_s->insn = ref_insn;
3110 ref_s->merged_insn = NULL;
3112 /* Initialize the hashes. */
3113 switch (type)
3115 case EXPLICIT_DEF_EXTENSION:
3116 ref_s->unmerged_def_se_hash = htab_create (10,
3117 hash_descriptor_extension,
3118 eq_descriptor_extension,
3119 NULL);
3120 se_hash = ref_s->unmerged_def_se_hash;
3121 ref_s->merged_def_se_hash = NULL;
3122 ref_s->use_se_hash = NULL;
3123 break;
3124 case IMPLICIT_DEF_EXTENSION:
3125 ref_s->merged_def_se_hash = htab_create (10,
3126 hash_descriptor_extension,
3127 eq_descriptor_extension,
3128 NULL);
3129 se_hash = ref_s->merged_def_se_hash;
3130 ref_s->unmerged_def_se_hash = NULL;
3131 ref_s->use_se_hash = NULL;
3132 break;
3133 case USE_EXTENSION:
3134 ref_s->use_se_hash = htab_create (10,
3135 hash_descriptor_extension,
3136 eq_descriptor_extension,
3137 NULL);
3138 se_hash = ref_s->use_se_hash;
3139 ref_s->unmerged_def_se_hash = NULL;
3140 ref_s->merged_def_se_hash = NULL;
3141 break;
3142 default:
3143 gcc_unreachable ();
3147 /* Insert the new extension instruction into the correct se_hash of the
3148 current reference. */
3149 rtx_slot = (rtx *) htab_find_slot (se_hash, se_insn, INSERT);
3150 if (*rtx_slot != NULL)
3152 gcc_assert (type == USE_EXTENSION);
3153 gcc_assert (rtx_equal_p (PATTERN (*rtx_slot), PATTERN (se_insn)));
3155 else
3156 *rtx_slot = se_insn;
3158 /* If this is a new reference, insert it into the splay_tree. */
3159 if (!stn)
3160 splay_tree_insert (see_bb_splay_ar[curr_bb_num],
3161 DF_INSN_LUID (df, ref_insn), (splay_tree_value) ref_s);
3162 return true;
3166 /* Go over all the defs, for each relevant definition (defined below) store its
3167 instruction as a reference.
3169 A definition is relevant if its root has
3170 ((entry_type == SIGN_EXTENDED_DEF) || (entry_type == ZERO_EXTENDED_DEF)) and
3171 his source_mode is not narrower then the the roots source_mode.
3173 Return the number of relevant defs or negative number if something bad had
3174 happened and the optimization should be aborted. */
3176 static int
3177 see_handle_relevant_defs (void)
3179 rtx insn = NULL;
3180 rtx se_insn = NULL;
3181 rtx reg = NULL;
3182 rtx ref_insn = NULL;
3183 struct web_entry *root_entry = NULL;
3184 unsigned int i;
3185 int num_relevant_defs = 0;
3186 enum rtx_code extension_code;
3188 for (i = 0; i < defs_num; i++)
3190 insn = DF_REF_INSN (DF_DEFS_GET (df, i));
3191 reg = DF_REF_REAL_REG (DF_DEFS_GET (df, i));
3193 if (!insn)
3194 continue;
3196 if (!INSN_P (insn))
3197 continue;
3199 root_entry = unionfind_root (&def_entry[i]);
3201 if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF
3202 && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF)
3203 /* The current web is not relevant. Continue to the next def. */
3204 continue;
3206 if (root_entry->reg)
3207 /* It isn't possible to have two different register for the same
3208 web. */
3209 gcc_assert (rtx_equal_p (root_entry->reg, reg));
3210 else
3211 root_entry->reg = reg;
3213 /* The current definition is an EXTENDED_DEF or a definition that its
3214 source_mode is narrower then its web's source_mode.
3215 This means that we need to generate the implicit extension explicitly
3216 and store it in the current reference's merged_def_se_hash. */
3217 if (ENTRY_EI (&def_entry[i])->local_relevancy == EXTENDED_DEF
3218 || (ENTRY_EI (&def_entry[i])->local_source_mode <
3219 ENTRY_EI (root_entry)->source_mode))
3221 num_relevant_defs++;
3223 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF)
3224 extension_code = SIGN_EXTEND;
3225 else
3226 extension_code = ZERO_EXTEND;
3228 se_insn =
3229 see_gen_normalized_extension (reg, extension_code,
3230 ENTRY_EI (root_entry)->source_mode);
3232 /* This is a dummy extension, mark it as deleted. */
3233 INSN_DELETED_P (se_insn) = 1;
3235 if (!see_store_reference_and_extension (insn, se_insn,
3236 IMPLICIT_DEF_EXTENSION))
3237 /* Something bad happened. Abort the optimization. */
3238 return -1;
3239 continue;
3242 ref_insn = PREV_INSN (insn);
3243 gcc_assert (BLOCK_NUM (ref_insn) == BLOCK_NUM (insn));
3245 num_relevant_defs++;
3247 if (!see_store_reference_and_extension (ref_insn, insn,
3248 EXPLICIT_DEF_EXTENSION))
3249 /* Something bad happened. Abort the optimization. */
3250 return -1;
3252 return num_relevant_defs;
3256 /* Go over all the uses, for each use in relevant web store its instruction as
3257 a reference and generate an extension before it.
3259 Return the number of relevant uses or negative number if something bad had
3260 happened and the optimization should be aborted. */
3262 static int
3263 see_handle_relevant_uses (void)
3265 rtx insn = NULL;
3266 rtx reg = NULL;
3267 struct web_entry *root_entry = NULL;
3268 rtx se_insn = NULL;
3269 unsigned int i;
3270 int num_relevant_uses = 0;
3271 enum rtx_code extension_code;
3273 for (i = 0; i < uses_num; i++)
3275 insn = DF_REF_INSN (DF_USES_GET (df, i));
3276 reg = DF_REF_REAL_REG (DF_USES_GET (df, i));
3278 if (!insn)
3279 continue;
3281 if (!INSN_P (insn))
3282 continue;
3284 root_entry = unionfind_root (&use_entry[i]);
3286 if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF
3287 && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF)
3288 /* The current web is not relevant. Continue to the next use. */
3289 continue;
3291 if (root_entry->reg)
3292 /* It isn't possible to have two different register for the same
3293 web. */
3294 gcc_assert (rtx_equal_p (root_entry->reg, reg));
3295 else
3296 root_entry->reg = reg;
3298 /* Generate the use extension. */
3299 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF)
3300 extension_code = SIGN_EXTEND;
3301 else
3302 extension_code = ZERO_EXTEND;
3304 se_insn =
3305 see_gen_normalized_extension (reg, extension_code,
3306 ENTRY_EI (root_entry)->source_mode);
3307 if (!se_insn)
3308 /* This is very bad, abort the transformation. */
3309 return -1;
3311 num_relevant_uses++;
3313 if (!see_store_reference_and_extension (insn, se_insn,
3314 USE_EXTENSION))
3315 /* Something bad happened. Abort the optimization. */
3316 return -1;
3319 return num_relevant_uses;
3323 /* Updates the relevancy of all the uses.
3324 The information of the i'th use is stored in use_entry[i].
3325 Currently all the uses are relevant for the optimization except for uses that
3326 are in LIBCALL or RETVAL instructions. */
3328 static void
3329 see_update_uses_relevancy (void)
3331 rtx insn = NULL;
3332 rtx reg = NULL;
3333 struct see_entry_extra_info *curr_entry_extra_info;
3334 enum entry_type et;
3335 unsigned int i;
3337 if (!df || !use_entry)
3338 return;
3340 for (i = 0; i < uses_num; i++)
3343 insn = DF_REF_INSN (DF_USES_GET (df, i));
3344 reg = DF_REF_REAL_REG (DF_USES_GET (df, i));
3346 et = RELEVANT_USE;
3348 if (insn)
3350 if (!INSN_P (insn))
3351 et = NOT_RELEVANT;
3352 if (insn && find_reg_note (insn, REG_LIBCALL, NULL_RTX))
3353 et = NOT_RELEVANT;
3354 if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
3355 et = NOT_RELEVANT;
3357 else
3358 et = NOT_RELEVANT;
3360 if (dump_file)
3362 fprintf (dump_file, "u%i insn %i reg %i ",
3363 i, (insn ? INSN_UID (insn) : -1), REGNO (reg));
3364 if (et == NOT_RELEVANT)
3365 fprintf (dump_file, "NOT RELEVANT \n");
3366 else
3367 fprintf (dump_file, "RELEVANT USE \n");
3370 curr_entry_extra_info = xmalloc (sizeof (struct see_entry_extra_info));
3371 curr_entry_extra_info->relevancy = et;
3372 curr_entry_extra_info->local_relevancy = et;
3373 use_entry[i].extra_info = curr_entry_extra_info;
3374 use_entry[i].reg = NULL;
3375 use_entry[i].pred = NULL;
3380 /* A definition in a candidate for this optimization only if its pattern is
3381 recognized as relevant in this function.
3382 INSN is the instruction to be recognized.
3384 - If this is the pattern of a common sign extension after definition:
3385 PREV_INSN (INSN): def (reg:NARROWmode r)
3386 INSN: set ((reg:WIDEmode r')
3387 (sign_extend:WIDEmode (reg:NARROWmode r)))
3388 return SIGN_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3390 - If this is the pattern of a common zero extension after definition:
3391 PREV_INSN (INSN): def (reg:NARROWmode r)
3392 INSN: set ((reg:WIDEmode r')
3393 (zero_extend:WIDEmode (reg:NARROWmode r)))
3394 return ZERO_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3396 - Otherwise,
3398 For the pattern:
3399 INSN: set ((reg:WIDEmode r) (sign_extend:WIDEmode (...expr...)))
3400 return EXTENDED_DEF and set SOURCE_MODE to the mode of expr.
3402 For the pattern:
3403 INSN: set ((reg:WIDEmode r) (zero_extend:WIDEmode (...expr...)))
3404 return EXTENDED_DEF and set SOURCE_MODE_UNSIGNED to the mode of expr.
3406 For the pattern:
3407 INSN: set ((reg:WIDEmode r) (CONST_INT (...)))
3408 return EXTENDED_DEF and set SOURCE_MODE(_UNSIGNED) to the narrowest mode that
3409 is implicitly sign(zero) extended to WIDEmode in the INSN.
3411 - FIXME: Extensions that are not adjacent to their definition and EXTENDED_DEF
3412 that is part of a PARALLEL instruction are not handled.
3413 These restriction can be relaxed. */
3415 static enum entry_type
3416 see_analyze_one_def (rtx insn, enum machine_mode *source_mode,
3417 enum machine_mode *source_mode_unsigned)
3419 enum rtx_code extension_code;
3420 rtx rhs = NULL;
3421 rtx lhs = NULL;
3422 rtx set = NULL;
3423 rtx source_register = NULL;
3424 rtx prev_insn = NULL;
3425 rtx next_insn = NULL;
3426 enum machine_mode mode;
3427 enum machine_mode next_source_mode;
3428 HOST_WIDE_INT val = 0;
3429 HOST_WIDE_INT val2 = 0;
3430 int i = 0;
3432 *source_mode = MAX_MACHINE_MODE;
3433 *source_mode_unsigned = MAX_MACHINE_MODE;
3435 if (!insn)
3436 return NOT_RELEVANT;
3438 if (!INSN_P (insn))
3439 return NOT_RELEVANT;
3441 extension_code = see_get_extension_data (insn, source_mode);
3442 switch (extension_code)
3444 case SIGN_EXTEND:
3445 case ZERO_EXTEND:
3446 source_register = see_get_extension_reg (insn, 0);
3447 /* FIXME: This restriction can be relaxed. The only thing that is
3448 important is that the reference would be inside the same basic block
3449 as the extension. */
3450 prev_insn = PREV_INSN (insn);
3451 if (!prev_insn || !INSN_P (prev_insn))
3452 return NOT_RELEVANT;
3454 if (!reg_set_between_p (source_register, PREV_INSN (prev_insn), insn))
3455 return NOT_RELEVANT;
3457 if (find_reg_note (prev_insn, REG_LIBCALL, NULL_RTX))
3458 return NOT_RELEVANT;
3460 if (find_reg_note (prev_insn, REG_RETVAL, NULL_RTX))
3461 return NOT_RELEVANT;
3463 /* If we can't use copy_rtx on the reference it can't be a reference. */
3464 if (GET_CODE (PATTERN (prev_insn)) == PARALLEL
3465 && asm_noperands (PATTERN (prev_insn)) >= 0)
3466 return NOT_RELEVANT;
3468 /* Now, check if this extension is a reference itself. If so, it is not
3469 relevant. Handling this extension as relevant would make things much
3470 more complicated. */
3471 next_insn = NEXT_INSN (insn);
3472 if (next_insn
3473 && INSN_P (next_insn)
3474 && (see_get_extension_data (next_insn, &next_source_mode) !=
3475 NOT_RELEVANT))
3477 rtx curr_dest_register = see_get_extension_reg (insn, 1);
3478 rtx next_source_register = see_get_extension_reg (next_insn, 0);
3480 if (REGNO (curr_dest_register) == REGNO (next_source_register))
3481 return NOT_RELEVANT;
3484 if (extension_code == SIGN_EXTEND)
3485 return SIGN_EXTENDED_DEF;
3486 else
3487 return ZERO_EXTENDED_DEF;
3489 case UNKNOWN:
3490 /* This may still be an EXTENDED_DEF. */
3492 /* FIXME: This restriction can be relaxed. It is possible to handle
3493 PARALLEL insns too. */
3494 set = single_set (insn);
3495 if (!set)
3496 return NOT_RELEVANT;
3497 rhs = SET_SRC (set);
3498 lhs = SET_DEST (set);
3500 /* Don't handle extensions to something other then register or
3501 subregister. */
3502 if (!REG_P (lhs) && !SUBREG_REG (lhs))
3503 return NOT_RELEVANT;
3505 switch (GET_CODE (rhs))
3507 case SIGN_EXTEND:
3508 *source_mode = GET_MODE (XEXP (rhs, 0));
3509 *source_mode_unsigned = MAX_MACHINE_MODE;
3510 return EXTENDED_DEF;
3511 case ZERO_EXTEND:
3512 *source_mode = MAX_MACHINE_MODE;
3513 *source_mode_unsigned = GET_MODE (XEXP (rhs, 0));
3514 return EXTENDED_DEF;
3515 case CONST_INT:
3517 val = INTVAL (rhs);
3519 /* Find the narrowest mode, val could fit into. */
3520 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT), i = 0;
3521 GET_MODE_BITSIZE (mode) < BITS_PER_WORD;
3522 mode = GET_MODE_WIDER_MODE (mode), i++)
3524 val2 = trunc_int_for_mode (val, mode);
3525 if (val2 == val && *source_mode == MAX_MACHINE_MODE)
3526 *source_mode = mode;
3527 if (val == (val & (HOST_WIDE_INT)GET_MODE_MASK (mode))
3528 && *source_mode_unsigned == MAX_MACHINE_MODE)
3529 *source_mode_unsigned = mode;
3530 if (*source_mode != MAX_MACHINE_MODE
3531 && *source_mode_unsigned !=MAX_MACHINE_MODE)
3532 return EXTENDED_DEF;
3534 if (*source_mode != MAX_MACHINE_MODE
3535 || *source_mode_unsigned !=MAX_MACHINE_MODE)
3536 return EXTENDED_DEF;
3537 return NOT_RELEVANT;
3538 default:
3539 return NOT_RELEVANT;
3541 default:
3542 gcc_unreachable ();
3547 /* Updates the relevancy and source_mode of all the definitions.
3548 The information of the i'th definition is stored in def_entry[i]. */
3550 static void
3551 see_update_defs_relevancy (void)
3553 struct see_entry_extra_info *curr_entry_extra_info;
3554 unsigned int i;
3555 rtx insn = NULL;
3556 rtx reg = NULL;
3557 enum entry_type et;
3558 enum machine_mode source_mode;
3559 enum machine_mode source_mode_unsigned;
3561 if (!df || !def_entry)
3562 return;
3564 for (i = 0; i < defs_num; i++)
3566 insn = DF_REF_INSN (DF_DEFS_GET (df, i));
3567 reg = DF_REF_REAL_REG (DF_DEFS_GET (df, i));
3569 et = see_analyze_one_def (insn, &source_mode, &source_mode_unsigned);
3571 curr_entry_extra_info = xmalloc (sizeof (struct see_entry_extra_info));
3572 curr_entry_extra_info->relevancy = et;
3573 curr_entry_extra_info->local_relevancy = et;
3574 if (et != EXTENDED_DEF)
3576 curr_entry_extra_info->source_mode = source_mode;
3577 curr_entry_extra_info->local_source_mode = source_mode;
3579 else
3581 curr_entry_extra_info->source_mode_signed = source_mode;
3582 curr_entry_extra_info->source_mode_unsigned = source_mode_unsigned;
3584 def_entry[i].extra_info = curr_entry_extra_info;
3585 def_entry[i].reg = NULL;
3586 def_entry[i].pred = NULL;
3588 if (dump_file)
3590 if (et == NOT_RELEVANT)
3592 fprintf (dump_file, "d%i insn %i reg %i ",
3593 i, (insn ? INSN_UID (insn) : -1), REGNO (reg));
3594 fprintf (dump_file, "NOT RELEVANT \n");
3596 else
3598 fprintf (dump_file, "d%i insn %i reg %i ",
3599 i ,INSN_UID (insn), REGNO (reg));
3600 fprintf (dump_file, "RELEVANT - ");
3601 switch (et)
3603 case SIGN_EXTENDED_DEF :
3604 fprintf (dump_file, "SIGN_EXTENDED_DEF, source_mode = %s\n",
3605 GET_MODE_NAME (source_mode));
3606 break;
3607 case ZERO_EXTENDED_DEF :
3608 fprintf (dump_file, "ZERO_EXTENDED_DEF, source_mode = %s\n",
3609 GET_MODE_NAME (source_mode));
3610 break;
3611 case EXTENDED_DEF :
3612 fprintf (dump_file, "EXTENDED_DEF, ");
3613 if (source_mode != MAX_MACHINE_MODE
3614 && source_mode_unsigned != MAX_MACHINE_MODE)
3616 fprintf (dump_file, "positive const, ");
3617 fprintf (dump_file, "source_mode_signed = %s, ",
3618 GET_MODE_NAME (source_mode));
3619 fprintf (dump_file, "source_mode_unsigned = %s\n",
3620 GET_MODE_NAME (source_mode_unsigned));
3622 else if (source_mode != MAX_MACHINE_MODE)
3623 fprintf (dump_file, "source_mode_signed = %s\n",
3624 GET_MODE_NAME (source_mode));
3625 else
3626 fprintf (dump_file, "source_mode_unsigned = %s\n",
3627 GET_MODE_NAME (source_mode_unsigned));
3628 break;
3629 default :
3630 gcc_unreachable ();
3638 /* Phase 1 top level function.
3639 In this phase the relevancy of all the definitions and uses are checked,
3640 later the webs are produces and the extensions are generated.
3641 These extensions are not emitted yet into the insns stream.
3643 returns true if at list one relevant web was found and there were no
3644 problems, otherwise return false. */
3646 static bool
3647 see_propagate_extensions_to_uses (void)
3649 unsigned int i = 0;
3650 int num_relevant_uses;
3651 int num_relevant_defs;
3653 if (dump_file)
3654 fprintf (dump_file,
3655 "* Phase 1: Propagate extensions to uses. *\n");
3657 /* Update the relevancy of references using the DF object. */
3658 see_update_defs_relevancy ();
3659 see_update_uses_relevancy ();
3661 /* Produce the webs and update the extra_info of the root.
3662 In general, a web is relevant if all its definitions and uses are relevant
3663 and there is at least one definition that was marked as SIGN_EXTENDED_DEF
3664 or ZERO_EXTENDED_DEF. */
3665 for (i = 0; i < uses_num; i++)
3666 union_defs (df, DF_USES_GET (df, i), def_entry, use_entry,
3667 see_update_leader_extra_info);
3669 /* Generate use extensions for references and insert these
3670 references to see_bb_splay_ar data structure. */
3671 num_relevant_uses = see_handle_relevant_uses ();
3673 if (num_relevant_uses < 0)
3674 return false;
3676 /* Store the def extensions in their references structures and insert these
3677 references to see_bb_splay_ar data structure. */
3678 num_relevant_defs = see_handle_relevant_defs ();
3680 if (num_relevant_defs < 0)
3681 return false;
3683 return num_relevant_uses > 0 || num_relevant_defs > 0;
3687 /* Main entry point for the sign extension elimination optimization. */
3689 static void
3690 see_main (void)
3692 bool cont = false;
3693 int i = 0;
3695 /* Initialize global data structures. */
3696 see_initialize_data_structures ();
3698 /* Phase 1: Propagate extensions to uses. */
3699 cont = see_propagate_extensions_to_uses ();
3701 if (cont)
3703 init_recog ();
3705 /* Phase 2: Merge and eliminate locally redundant extensions. */
3706 see_merge_and_eliminate_extensions ();
3708 /* Phase 3: Eliminate globally redundant extensions. */
3709 see_execute_LCM ();
3711 /* Phase 4: Commit changes to the insn stream. */
3712 see_commit_changes ();
3714 if (dump_file)
3716 /* For debug purpose only. */
3717 fprintf (dump_file, "see_pre_extension_hash:\n");
3718 htab_traverse (see_pre_extension_hash, see_print_pre_extension_expr,
3719 NULL);
3721 for (i = 0; i < last_bb; i++)
3723 if (see_bb_hash_ar[i])
3724 /* Traverse over all the references in the basic block in
3725 forward order. */
3727 fprintf (dump_file,
3728 "Searching register properties in bb %d\n", i);
3729 htab_traverse (see_bb_hash_ar[i],
3730 see_print_register_properties, NULL);
3736 /* Free global data structures. */
3737 see_free_data_structures ();
3741 static bool
3742 gate_handle_see (void)
3744 return optimize > 1 && flag_see;
3747 static unsigned int
3748 rest_of_handle_see (void)
3750 int no_new_pseudos_bcp = no_new_pseudos;
3752 no_new_pseudos = 0;
3753 see_main ();
3754 no_new_pseudos = no_new_pseudos_bcp;
3756 delete_trivially_dead_insns (get_insns (), max_reg_num ());
3757 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
3758 (PROP_DEATH_NOTES));
3759 cleanup_cfg (CLEANUP_EXPENSIVE);
3760 reg_scan (get_insns (), max_reg_num ());
3762 return 0;
3765 struct tree_opt_pass pass_see =
3767 "see", /* name */
3768 gate_handle_see, /* gate */
3769 rest_of_handle_see, /* execute */
3770 NULL, /* sub */
3771 NULL, /* next */
3772 0, /* static_pass_number */
3773 TV_SEE, /* tv_id */
3774 0, /* properties_required */
3775 0, /* properties_provided */
3776 0, /* properties_destroyed */
3777 0, /* todo_flags_start */
3778 TODO_dump_func, /* todo_flags_finish */
3779 'u' /* letter */