re PR target/35659 (Miscompiled code with -O2 (but not with -O2 -funroll-loops) on...
[official-gcc.git] / gcc / see.c
blob27e70216f052fa1f6b2eab743bf9bad5a25368e7
1 /* Sign extension elimination optimization for GNU compiler.
2 Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Leehod Baruch <leehod@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 -Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>.
21 Problem description:
22 --------------------
23 In order to support 32bit computations on a 64bit machine, sign
24 extension instructions are generated to ensure the correctness of
25 the computation.
26 A possible policy (as currently implemented) is to generate a sign
27 extension right after each 32bit computation.
28 Depending on the instruction set of the architecture, some of these
29 sign extension instructions may be redundant.
30 There are two cases in which the extension may be redundant:
32 Case1:
33 The instruction that uses the 64bit operands that are sign
34 extended has a dual mode that works with 32bit operands.
35 For example:
37 int32 a, b;
39 a = .... --> a = ....
40 a = sign extend a -->
41 b = .... --> b = ....
42 b = sign extend a -->
43 -->
44 cmpd a, b --> cmpw a, b //half word compare
46 Case2:
47 The instruction that defines the 64bit operand (which is later sign
48 extended) has a dual mode that defines and sign-extends simultaneously
49 a 32bit operand. For example:
51 int32 a;
53 ld a --> lwa a // load half word and sign extend
54 a = sign extend a -->
55 -->
56 return a --> return a
59 General idea for solution:
60 --------------------------
61 First, try to merge the sign extension with the instruction that
62 defines the source of the extension and (separately) with the
63 instructions that uses the extended result. By doing this, both cases
64 of redundancies (as described above) will be eliminated.
66 Then, use partial redundancy elimination to place the non redundant
67 ones at optimal placements.
70 Implementation by example:
71 --------------------------
72 Note: The instruction stream is not changed till the last phase.
74 Phase 0: Initial code, as currently generated by gcc.
76 def1 def3
77 se1 def2 se3
78 | \ | / |
79 | \ | / |
80 | \ | / |
81 | \ | / |
82 | \ | / |
83 | \|/ |
84 use1 use2 use3
85 use4
86 def1 + se1:
87 set ((reg:SI 10) (..def1rhs..))
88 set ((reg:DI 100) (sign_extend:DI (reg:SI 10)))
90 def2:
91 set ((reg:DI 100) (const_int 7))
93 def3 + se3:
94 set ((reg:SI 20) (..def3rhs..))
95 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
97 use1:
98 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
100 use2, use3, use4:
101 set ((...) (reg:DI 100))
103 Phase 1: Propagate extensions to uses.
105 def1 def3
106 se1 def2 se3
107 | \ | / |
108 | \ | / |
109 | \ | / |
110 | \ | / |
111 | \ | / |
112 | \|/ |
113 se se se
114 use1 use2 use3
116 use4
118 From here, all of the subregs are lowpart !
120 def1, def2, def3: No change.
122 use1:
123 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
124 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
126 use2, use3, use4:
127 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
128 set ((...) (reg:DI 100))
131 Phase 2: Merge and eliminate locally redundant extensions.
134 *def1 def2 *def3
135 [se removed] se se3
136 | \ | / |
137 | \ | / |
138 | \ | / |
139 | \ | / |
140 | \ | / |
141 | \|/ |
142 [se removed] se se
143 *use1 use2 use3
144 [se removed]
145 use4
147 The instructions that were changed at this phase are marked with
148 asterisk.
150 *def1: Merge failed.
151 Remove the sign extension instruction, modify def1 and
152 insert a move instruction to assure to correctness of the code.
153 set ((subreg:SI (reg:DI 100)) (..def1rhs..))
154 set ((reg:SI 10) (subreg:SI (reg:DI 100)))
156 def2 + se: There is no need for merge.
157 Def2 is not changed but a sign extension instruction is
158 created.
159 set ((reg:DI 100) (const_int 7))
160 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
162 *def3 + se3: Merge succeeded.
163 set ((reg:DI 100) (sign_extend:DI (..def3rhs..)))
164 set ((reg:SI 20) (reg:DI 100))
165 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
166 (The extension instruction is the original one).
168 *use1: Merge succeeded. Remove the sign extension instruction.
169 set ((reg:CC...)
170 (compare:CC (subreg:SI (reg:DI 100)) (...)))
172 use2, use3: Merge failed. No change.
174 use4: The extension is locally redundant, therefore it is eliminated
175 at this point.
178 Phase 3: Eliminate globally redundant extensions.
180 Following the LCM output:
182 def1 def2 def3
183 se se3
184 | \ | / |
185 | \ | / |
186 | se | / |
187 | \ | / |
188 | \ | / |
189 | \|/ |
190 [ses removed]
191 use1 use2 use3
192 use4
195 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
197 se3:
198 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
201 Phase 4: Commit changes to the insn stream.
204 def1 def3 *def1 def2 *def3
205 se1 def2 se3 [se removed] [se removed]
206 | \ | / | | \ | / |
207 | \ | / | ------> | \ | / |
208 | \ | / | ------> | se | / |
209 | \ | / | | \ | / |
210 | \ | / | | \ | / |
211 | \|/ | | \|/ |
212 use1 use2 use3 *use1 use2 use3
213 use4 use4
215 The instructions that were changed during the whole optimization are
216 marked with asterisk.
218 The result:
220 def1 + se1:
221 [ set ((reg:SI 10) (..def1rhs..)) ] - Deleted
222 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 10))) ] - Deleted
223 set ((subreg:SI (reg:DI 100)) (..def3rhs..)) - Inserted
224 set ((reg:SI 10) (subreg:SI (reg:DI 100))) - Inserted
226 def2:
227 set ((reg:DI 100) (const_int 7)) - No change
229 def3 + se3:
230 [ set ((reg:SI 20) (..def3rhs..)) ] - Deleted
231 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) ] - Deleted
232 set ((reg:DI 100) (sign_extend:DI (..def3rhs..))) - Inserted
233 set ((reg:SI 20) (reg:DI 100)) - Inserted
235 use1:
236 [ set ((reg:CC...) (compare:CC (reg:DI 100) (...))) ] - Deleted
237 set ((reg:CC...) - Inserted
238 (compare:CC (subreg:SI (reg:DI 100)) (...)))
240 use2, use3, use4:
241 set ((...) (reg:DI 100)) - No change
243 se: - Inserted
244 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
246 Note: Most of the simple move instructions that were inserted will be
247 trivially dead and therefore eliminated.
249 The implementation outline:
250 ---------------------------
251 Some definitions:
252 A web is RELEVANT if at the end of phase 1, his leader's
253 relevancy is {ZERO, SIGN}_EXTENDED_DEF. The source_mode of
254 the web is the source_mode of his leader.
255 A definition is a candidate for the optimization if it is part
256 of a RELEVANT web and his local source_mode is not narrower
257 then the source_mode of its web.
258 A use is a candidate for the optimization if it is part of a
259 RELEVANT web.
260 A simple explicit extension is a single set instruction that
261 extends a register (or a subregister) to a register (or
262 subregister).
263 A complex explicit extension is an explicit extension instruction
264 that is not simple.
265 A def extension is a simple explicit extension that is
266 also a candidate for the optimization. This extension is part
267 of the instruction stream, it is not generated by this
268 optimization.
269 A use extension is a simple explicit extension that is generated
270 and stored for candidate use during this optimization. It is
271 not emitted to the instruction stream till the last phase of
272 the optimization.
273 A reference is an instruction that satisfy at least on of these
274 criteria:
275 - It contains a definition with EXTENDED_DEF relevancy in a RELEVANT web.
276 - It is followed by a def extension.
277 - It contains a candidate use.
279 Phase 1: Propagate extensions to uses.
280 In this phase, we find candidate extensions for the optimization
281 and we generate (but not emit) proper extensions "right before the
282 uses".
284 a. Build a DF object.
285 b. Traverse over all the instructions that contains a definition
286 and set their local relevancy and local source_mode like this:
287 - If the instruction is a simple explicit extension instruction,
288 mark it as {ZERO, SIGN}_EXTENDED_DEF according to the extension
289 type and mark its source_mode to be the mode of the quantity
290 that is been extended.
291 - Otherwise, If the instruction has an implicit extension,
292 which means that its high part is an extension of its low part,
293 or if it is a complicated explicit extension, mark it as
294 EXTENDED_DEF and set its source_mode to be the narrowest
295 mode that is been extended in the instruction.
296 c. Traverse over all the instructions that contains a use and set
297 their local relevancy to RELEVANT_USE (except for few corner
298 cases).
299 d. Produce the web. During union of two entries, update the
300 relevancy and source_mode of the leader. There are two major
301 guide lines for this update:
302 - If one of the entries is NOT_RELEVANT, mark the leader
303 NOT_RELEVANT.
304 - If one is ZERO_EXTENDED_DEF and the other is SIGN_EXTENDED_DEF
305 (or vice versa) mark the leader as NOT_RELEVANT. We don't
306 handle this kind of mixed webs.
307 (For more details about this update process,
308 see see_update_leader_extra_info ()).
309 e. Generate uses extensions according to the relevancy and
310 source_mode of the webs.
312 Phase 2: Merge and eliminate locally redundant extensions.
313 In this phase, we try to merge def extensions and use
314 extensions with their references, and eliminate redundant extensions
315 in the same basic block.
317 Traverse over all the references. Do this in basic block number and
318 luid number forward order.
319 For each reference do:
320 a. Peephole optimization - try to merge it with all its
321 def extensions and use extensions in the following
322 order:
323 - Try to merge only the def extensions, one by one.
324 - Try to merge only the use extensions, one by one.
325 - Try to merge any couple of use extensions simultaneously.
326 - Try to merge any def extension with one or two uses
327 extensions simultaneously.
328 b. Handle each EXTENDED_DEF in it as if it was already merged with
329 an extension.
331 During the merge process we save the following data for each
332 register in each basic block:
333 a. The first instruction that defines the register in the basic
334 block.
335 b. The last instruction that defines the register in the basic
336 block.
337 c. The first extension of this register before the first
338 instruction that defines it in the basic block.
339 c. The first extension of this register after the last
340 instruction that defines it in the basic block.
341 This data will help us eliminate (or more precisely, not generate)
342 locally redundant extensions, and will be useful in the next stage.
344 While merging extensions with their reference there are 4 possible
345 situations:
346 a. A use extension was merged with the reference:
347 Delete the extension instruction and save the merged reference
348 for phase 4. (For details, see see_use_extension_merged ())
349 b. A use extension failed to be merged with the reference:
350 If there is already such an extension in the same basic block
351 and it is not dead at this point, delete the unmerged extension
352 (it is locally redundant), otherwise properly update the above
353 basic block data.
354 (For details, see see_merge_one_use_extension ())
355 c. A def extension was merged with the reference:
356 Mark this extension as a merged_def extension and properly
357 update the above basic block data.
358 (For details, see see_merge_one_def_extension ())
359 d. A def extension failed to be merged with the reference:
360 Replace the definition of the NARROWmode register in the
361 reference with the proper subreg of WIDEmode register and save
362 the result as a merged reference. Also, properly update the
363 the above basic block data.
364 (For details, see see_def_extension_not_merged ())
366 Phase 3: Eliminate globally redundant extensions.
367 In this phase, we set the bit vectors input of the edge based LCM
368 using the recorded data on the registers in each basic block.
369 We also save pointers for all the anticipatable and available
370 occurrences of the relevant extensions. Then we run the LCM.
372 a. Initialize the comp, antloc, kill bit vectors to zero and the
373 transp bit vector to ones.
375 b. Traverse over all the references. Do this in basic block number
376 and luid number forward order. For each reference:
377 - Go over all its use extensions. For each such extension -
378 If it is not dead from the beginning of the basic block SET
379 the antloc bit of the current extension in the current
380 basic block bits.
381 If it is not dead till the end of the basic block SET the
382 comp bit of the current extension in the current basic
383 block bits.
384 - Go over all its def extensions that were merged with
385 it. For each such extension -
386 If it is not dead till the end of the basic block SET the
387 comp bit of the current extension in the current basic
388 block bits.
389 RESET the proper transp and kill bits.
390 - Go over all its def extensions that were not merged
391 with it. For each such extension -
392 RESET the transp bit and SET the kill bit of the current
393 extension in the current basic block bits.
395 c. Run the edge based LCM.
397 Phase 4: Commit changes to the insn stream.
398 This is the only phase that actually changes the instruction stream.
399 Up to this point the optimization could be aborted at any time.
400 Here we insert extensions at their best placements and delete the
401 redundant ones according to the output of the LCM. We also replace
402 some of the instructions according to the second phase merges results.
404 a. Use the pre_delete_map (from the output of the LCM) in order to
405 delete redundant extensions. This will prevent them from been
406 emitted in the first place.
408 b. Insert extensions on edges where needed according to
409 pre_insert_map and edge_list (from the output of the LCM).
411 c. For each reference do-
412 - Emit all the uses extensions that were not deleted until now,
413 right before the reference.
414 - Delete all the merged and unmerged def extensions from
415 the instruction stream.
416 - Replace the reference with the merged one, if exist.
418 The implementation consists of four data structures:
419 - Data structure I
420 Purpose: To handle the relevancy of the uses, definitions and webs.
421 Relevant structures: web_entry (from df.h), see_entry_extra_info.
422 Details: This is a disjoint-set data structure. Most of its functions are
423 implemented in web.c. Each definition and use in the code are
424 elements. A web_entry structure is allocated for each element to
425 hold the element's relevancy and source_mode. The union rules are
426 defined in see_update_leader_extra_info ().
427 - Data structure II
428 Purpose: To store references and their extensions (uses and defs)
429 and to enable traverse over these references according to basic
430 block order.
431 Relevant structure: see_ref_s.
432 Details: This data structure consists of an array of splay trees. One splay
433 tree for each basic block. The splay tree nodes are references and
434 the keys are the luids of the references.
435 A see_ref_s structure is allocated for each reference. It holds the
436 reference itself, its def and uses extensions and later the merged
437 version of the reference.
438 Using this data structure we can traverse over all the references of
439 a basic block and their extensions in forward order.
440 - Data structure III.
441 Purpose: To store local properties of registers for each basic block.
442 This data will later help us build the LCM sbitmap_vectors
443 input.
444 Relevant structure: see_register_properties.
445 Details: This data structure consists of an array of hash tables. One hash
446 for each basic block. The hash node are a register properties
447 and the keys are the numbers of the registers.
448 A see_register_properties structure is allocated for each register
449 that we might be interested in its properties.
450 Using this data structure we can easily find the properties of a
451 register in a specific basic block. This is necessary for locally
452 redundancy elimination and for setting up the LCM input.
453 - Data structure IV.
454 Purpose: To store the extensions that are candidate for PRE and their
455 anticipatable and available occurrences.
456 Relevant structure: see_occr, see_pre_extension_expr.
457 Details: This data structure is a hash tables. Its nodes are the extensions
458 that are candidate for PRE.
459 A see_pre_extension_expr structure is allocated for each candidate
460 extension. It holds a copy of the extension and a linked list of all
461 the anticipatable and available occurrences of it.
462 We use this data structure when we read the output of the LCM. */
464 #include "config.h"
465 #include "system.h"
466 #include "coretypes.h"
467 #include "tm.h"
469 #include "obstack.h"
470 #include "rtl.h"
471 #include "output.h"
472 #include "df.h"
473 #include "insn-config.h"
474 #include "recog.h"
475 #include "expr.h"
476 #include "splay-tree.h"
477 #include "hashtab.h"
478 #include "regs.h"
479 #include "timevar.h"
480 #include "tree-pass.h"
481 #include "dce.h"
483 /* Used to classify defs and uses according to relevancy. */
484 enum entry_type {
485 NOT_RELEVANT,
486 SIGN_EXTENDED_DEF,
487 ZERO_EXTENDED_DEF,
488 EXTENDED_DEF,
489 RELEVANT_USE
492 /* Used to classify extensions in relevant webs. */
493 enum extension_type {
494 DEF_EXTENSION,
495 EXPLICIT_DEF_EXTENSION,
496 IMPLICIT_DEF_EXTENSION,
497 USE_EXTENSION
500 /* Global data structures and flags. */
502 /* This structure will be assigned for each web_entry structure (defined
503 in df.h). It is placed in the extra_info field of a web_entry and holds the
504 relevancy and source mode of the web_entry. */
506 struct see_entry_extra_info
508 /* The relevancy of the ref. */
509 enum entry_type relevancy;
510 /* The relevancy of the ref.
511 This field is updated only once - when this structure is created. */
512 enum entry_type local_relevancy;
513 /* The source register mode. */
514 enum machine_mode source_mode;
515 /* This field is used only if the relevancy is ZERO/SIGN_EXTENDED_DEF.
516 It is updated only once when this structure is created. */
517 enum machine_mode local_source_mode;
518 /* This field is used only if the relevancy is EXTENDED_DEF.
519 It holds the narrowest mode that is sign extended. */
520 enum machine_mode source_mode_signed;
521 /* This field is used only if the relevancy is EXTENDED_DEF.
522 It holds the narrowest mode that is zero extended. */
523 enum machine_mode source_mode_unsigned;
526 /* There is one such structure for every reference. It stores the reference
527 itself as well as its extensions (uses and definitions).
528 Used as the value in splay_tree see_bb_splay_ar[]. */
529 struct see_ref_s
531 /* The luid of the insn. */
532 unsigned int luid;
533 /* The insn of the ref. */
534 rtx insn;
535 /* The merged insn that was formed from the reference's insn and extensions.
536 If all merges failed, it remains NULL. */
537 rtx merged_insn;
538 /* The def extensions of the reference that were not merged with
539 it. */
540 htab_t unmerged_def_se_hash;
541 /* The def extensions of the reference that were merged with
542 it. Implicit extensions of the reference will be stored here too. */
543 htab_t merged_def_se_hash;
544 /* The uses extensions of reference. */
545 htab_t use_se_hash;
548 /* There is one such structure for every relevant extended register in a
549 specific basic block. This data will help us build the LCM sbitmap_vectors
550 input. */
551 struct see_register_properties
553 /* The register number. */
554 unsigned int regno;
555 /* The last luid of the reference that defines this register in this basic
556 block. */
557 int last_def;
558 /* The luid of the reference that has the first extension of this register
559 that appears before any definition in this basic block. */
560 int first_se_before_any_def;
561 /* The luid of the reference that has the first extension of this register
562 that appears after the last definition in this basic block. */
563 int first_se_after_last_def;
566 /* Occurrence of an expression.
567 There must be at most one available occurrence and at most one anticipatable
568 occurrence per basic block. */
569 struct see_occr
571 /* Next occurrence of this expression. */
572 struct see_occr *next;
573 /* The insn that computes the expression. */
574 rtx insn;
575 int block_num;
578 /* There is one such structure for every relevant extension expression.
579 It holds a copy of this extension instruction as well as a linked lists of
580 pointers to all the antic and avail occurrences of it. */
581 struct see_pre_extension_expr
583 /* A copy of the extension instruction. */
584 rtx se_insn;
585 /* Index in the available expression bitmaps. */
586 int bitmap_index;
587 /* List of anticipatable occurrences in basic blocks in the function.
588 An "anticipatable occurrence" is the first occurrence in the basic block,
589 the operands are not modified in the basic block prior to the occurrence
590 and the output is not used between the start of the block and the
591 occurrence. */
592 struct see_occr *antic_occr;
593 /* List of available occurrence in basic blocks in the function.
594 An "available occurrence" is the last occurrence in the basic block and
595 the operands are not modified by following statements in the basic block
596 [including this insn]. */
597 struct see_occr *avail_occr;
600 /* Helper structure for the note_uses and see_replace_src functions. */
601 struct see_replace_data
603 rtx from;
604 rtx to;
607 /* Helper structure for the note_uses and see_mentioned_reg functions. */
608 struct see_mentioned_reg_data
610 rtx reg;
611 bool mentioned;
614 /* An array of web_entries. The i'th definition in the df object is associated
615 with def_entry[i] */
616 static struct web_entry *def_entry = NULL;
617 /* An array of web_entries. The i'th use in the df object is associated with
618 use_entry[i] */
619 static struct web_entry *use_entry = NULL;
620 /* Array of splay_trees.
621 see_bb_splay_ar[i] refers to the splay tree of the i'th basic block.
622 The splay tree will hold see_ref_s structures. The key is the luid
623 of the insn. This way we can traverse over the references of each basic
624 block in forward or backward order. */
625 static splay_tree *see_bb_splay_ar = NULL;
626 /* Array of hashes.
627 see_bb_hash_ar[i] refers to the hash of the i'th basic block.
628 The hash will hold see_register_properties structure. The key is regno. */
629 static htab_t *see_bb_hash_ar = NULL;
630 /* Hash table that holds a copy of all the extensions. The key is the right
631 hand side of the se_insn field. */
632 static htab_t see_pre_extension_hash = NULL;
634 /* Local LCM properties of expressions. */
635 /* Nonzero for expressions that are transparent in the block. */
636 static sbitmap *transp = NULL;
637 /* Nonzero for expressions that are computed (available) in the block. */
638 static sbitmap *comp = NULL;
639 /* Nonzero for expressions that are locally anticipatable in the block. */
640 static sbitmap *antloc = NULL;
641 /* Nonzero for expressions that are locally killed in the block. */
642 static sbitmap *ae_kill = NULL;
643 /* Nonzero for expressions which should be inserted on a specific edge. */
644 static sbitmap *pre_insert_map = NULL;
645 /* Nonzero for expressions which should be deleted in a specific block. */
646 static sbitmap *pre_delete_map = NULL;
647 /* Contains the edge_list returned by pre_edge_lcm. */
648 static struct edge_list *edge_list = NULL;
649 /* Records the last basic block at the beginning of the optimization. */
650 static int last_bb;
651 /* Records the number of uses at the beginning of the optimization. */
652 static unsigned int uses_num;
653 /* Records the number of definitions at the beginning of the optimization. */
654 static unsigned int defs_num;
656 #define ENTRY_EI(ENTRY) ((struct see_entry_extra_info *) (ENTRY)->extra_info)
658 /* Functions implementation. */
660 /* Verifies that EXTENSION's pattern is this:
662 set (reg/subreg reg1) (sign/zero_extend:WIDEmode (reg/subreg reg2))
664 If it doesn't have the expected pattern return NULL.
665 Otherwise, if RETURN_DEST_REG is set, return reg1 else return reg2. */
667 static rtx
668 see_get_extension_reg (rtx extension, bool return_dest_reg)
670 rtx set, rhs, lhs;
671 rtx reg1 = NULL;
672 rtx reg2 = NULL;
674 /* Parallel pattern for extension not supported for the moment. */
675 if (GET_CODE (PATTERN (extension)) == PARALLEL)
676 return NULL;
678 set = single_set (extension);
679 if (!set)
680 return NULL;
681 lhs = SET_DEST (set);
682 rhs = SET_SRC (set);
684 if (REG_P (lhs))
685 reg1 = lhs;
686 else if (REG_P (SUBREG_REG (lhs)))
687 reg1 = SUBREG_REG (lhs);
688 else
689 return NULL;
691 if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND)
692 return NULL;
694 rhs = XEXP (rhs, 0);
695 if (REG_P (rhs))
696 reg2 = rhs;
697 else if (REG_P (SUBREG_REG (rhs)))
698 reg2 = SUBREG_REG (rhs);
699 else
700 return NULL;
702 if (return_dest_reg)
703 return reg1;
704 return reg2;
707 /* Verifies that EXTENSION's pattern is this:
709 set (reg/subreg reg1) (sign/zero_extend: (...expr...)
711 If it doesn't have the expected pattern return UNKNOWN.
712 Otherwise, set SOURCE_MODE to be the mode of the extended expr and return
713 the rtx code of the extension. */
715 static enum entry_type
716 see_get_extension_data (rtx extension, enum machine_mode *source_mode)
718 rtx rhs, lhs, set;
720 if (!extension || !INSN_P (extension))
721 return NOT_RELEVANT;
723 /* Parallel pattern for extension not supported for the moment. */
724 if (GET_CODE (PATTERN (extension)) == PARALLEL)
725 return NOT_RELEVANT;
727 set = single_set (extension);
728 if (!set)
729 return NOT_RELEVANT;
730 rhs = SET_SRC (set);
731 lhs = SET_DEST (set);
733 /* Don't handle extensions to something other then register or
734 subregister. */
735 if (!REG_P (lhs) && GET_CODE (lhs) != SUBREG)
736 return NOT_RELEVANT;
738 if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND)
739 return NOT_RELEVANT;
741 if (!REG_P (XEXP (rhs, 0))
742 && !(GET_CODE (XEXP (rhs, 0)) == SUBREG
743 && REG_P (SUBREG_REG (XEXP (rhs, 0)))))
744 return NOT_RELEVANT;
746 *source_mode = GET_MODE (XEXP (rhs, 0));
748 if (GET_CODE (rhs) == SIGN_EXTEND)
749 return SIGN_EXTENDED_DEF;
750 return ZERO_EXTENDED_DEF;
754 /* Generate instruction with the pattern:
755 set ((reg r) (sign/zero_extend (subreg:mode (reg r))))
756 (the register r on both sides of the set is the same register).
757 And recognize it.
758 If the recognition failed, this is very bad, return NULL (This will abort
759 the entire optimization).
760 Otherwise, return the generated instruction. */
762 static rtx
763 see_gen_normalized_extension (rtx reg, enum entry_type extension_code,
764 enum machine_mode mode)
766 rtx subreg, insn;
767 rtx extension = NULL;
769 if (!reg
770 || !REG_P (reg)
771 || (extension_code != SIGN_EXTENDED_DEF
772 && extension_code != ZERO_EXTENDED_DEF))
773 return NULL;
775 subreg = gen_lowpart_SUBREG (mode, reg);
776 if (extension_code == SIGN_EXTENDED_DEF)
777 extension = gen_rtx_SIGN_EXTEND (GET_MODE (reg), subreg);
778 else
779 extension = gen_rtx_ZERO_EXTEND (GET_MODE (reg), subreg);
781 start_sequence ();
782 emit_insn (gen_rtx_SET (VOIDmode, reg, extension));
783 insn = get_insns ();
784 end_sequence ();
786 if (insn_invalid_p (insn))
787 /* Recognition failed, this is very bad for this optimization.
788 Abort the optimization. */
789 return NULL;
790 return insn;
793 /* Hashes and splay_trees related functions implementation. */
795 /* Helper functions for the pre_extension hash.
796 This kind of hash will hold see_pre_extension_expr structures.
798 The key is the right hand side of the se_insn field.
799 Note that the se_insn is an expression that looks like:
801 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
802 (subreg:NARROWmode (reg:WIDEmode r2)))) */
804 /* Return TRUE if P1 has the same value in its rhs as P2.
805 Otherwise, return FALSE.
806 P1 and P2 are see_pre_extension_expr structures. */
808 static int
809 eq_descriptor_pre_extension (const void *p1, const void *p2)
811 const struct see_pre_extension_expr *const extension1 =
812 (const struct see_pre_extension_expr *) p1;
813 const struct see_pre_extension_expr *const extension2 =
814 (const struct see_pre_extension_expr *) p2;
815 rtx set1 = single_set (extension1->se_insn);
816 rtx set2 = single_set (extension2->se_insn);
817 rtx rhs1, rhs2;
819 gcc_assert (set1 && set2);
820 rhs1 = SET_SRC (set1);
821 rhs2 = SET_SRC (set2);
823 return rtx_equal_p (rhs1, rhs2);
827 /* P is a see_pre_extension_expr struct, use the RHS of the se_insn field.
828 Note that the RHS is an expression that looks like this:
829 (sign_extend:WIDEmode (subreg:NARROWmode (reg:WIDEmode r))) */
831 static hashval_t
832 hash_descriptor_pre_extension (const void *p)
834 const struct see_pre_extension_expr *const extension =
835 (const struct see_pre_extension_expr *) p;
836 rtx set = single_set (extension->se_insn);
837 rtx rhs;
839 gcc_assert (set);
840 rhs = SET_SRC (set);
842 return hash_rtx (rhs, GET_MODE (rhs), 0, NULL, 0);
846 /* Free the allocated memory of the current see_pre_extension_expr struct.
848 It frees the two linked list of the occurrences structures. */
850 static void
851 hash_del_pre_extension (void *p)
853 struct see_pre_extension_expr *const extension =
854 (struct see_pre_extension_expr *) p;
855 struct see_occr *curr_occr = extension->antic_occr;
856 struct see_occr *next_occr = NULL;
858 /* Free the linked list of the anticipatable occurrences. */
859 while (curr_occr)
861 next_occr = curr_occr->next;
862 free (curr_occr);
863 curr_occr = next_occr;
866 /* Free the linked list of the available occurrences. */
867 curr_occr = extension->avail_occr;
868 while (curr_occr)
870 next_occr = curr_occr->next;
871 free (curr_occr);
872 curr_occr = next_occr;
875 /* Free the see_pre_extension_expr structure itself. */
876 free (extension);
880 /* Helper functions for the register_properties hash.
881 This kind of hash will hold see_register_properties structures.
883 The value of the key is the regno field of the structure. */
885 /* Return TRUE if P1 has the same value in the regno field as P2.
886 Otherwise, return FALSE.
887 Where P1 and P2 are see_register_properties structures. */
889 static int
890 eq_descriptor_properties (const void *p1, const void *p2)
892 const struct see_register_properties *const curr_prop1 =
893 (const struct see_register_properties *) p1;
894 const struct see_register_properties *const curr_prop2 =
895 (const struct see_register_properties *) p2;
897 return curr_prop1->regno == curr_prop2->regno;
901 /* P is a see_register_properties struct, use the register number in the
902 regno field. */
904 static hashval_t
905 hash_descriptor_properties (const void *p)
907 const struct see_register_properties *const curr_prop =
908 (const struct see_register_properties *) p;
909 return curr_prop->regno;
913 /* Free the allocated memory of the current see_register_properties struct. */
914 static void
915 hash_del_properties (void *p)
917 struct see_register_properties *const curr_prop =
918 (struct see_register_properties *) p;
919 free (curr_prop);
923 /* Helper functions for an extension hash.
924 This kind of hash will hold insns that look like:
926 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
927 (subreg:NARROWmode (reg:WIDEmode r2))))
929 set ((reg:WIDEmode r1) (sign_extend:WIDEmode (reg:NARROWmode r2)))
931 The value of the key is (REGNO (reg:WIDEmode r1))
932 It is possible to search this hash in two ways:
933 1. By a register rtx. The Value that is been compared to the keys is the
934 REGNO of it.
935 2. By an insn with the above pattern. The Value that is been compared to
936 the keys is the REGNO of the reg on the lhs. */
938 /* Return TRUE if P1 has the same value as P2. Otherwise, return FALSE.
939 Where P1 is an insn and P2 is an insn or a register. */
941 static int
942 eq_descriptor_extension (const void *p1, const void *p2)
944 const_rtx const insn = (const_rtx) p1;
945 const_rtx const element = (const_rtx) p2;
946 rtx set1 = single_set (insn);
947 rtx dest_reg1;
948 rtx set2 = NULL;
949 const_rtx dest_reg2 = NULL;
951 gcc_assert (set1 && element && (REG_P (element) || INSN_P (element)));
953 dest_reg1 = SET_DEST (set1);
955 if (INSN_P (element))
957 set2 = single_set (element);
958 dest_reg2 = SET_DEST (set2);
960 else
961 dest_reg2 = element;
963 return REGNO (dest_reg1) == REGNO (dest_reg2);
967 /* If P is an insn, use the register number of its lhs
968 otherwise, P is a register, use its number. */
970 static hashval_t
971 hash_descriptor_extension (const void *p)
973 const_rtx const r = (const_rtx) p;
974 rtx set, lhs;
976 if (r && REG_P (r))
977 return REGNO (r);
979 gcc_assert (r && INSN_P (r));
980 set = single_set (r);
981 gcc_assert (set);
982 lhs = SET_DEST (set);
983 return REGNO (lhs);
987 /* Helper function for a see_bb_splay_ar[i] splay tree.
988 It frees all the allocated memory of a struct see_ref_s pointer.
990 VALUE is the value of a splay tree node. */
992 static void
993 see_free_ref_s (splay_tree_value value)
995 struct see_ref_s *ref_s = (struct see_ref_s *)value;
997 if (ref_s->unmerged_def_se_hash)
998 htab_delete (ref_s->unmerged_def_se_hash);
999 if (ref_s->merged_def_se_hash)
1000 htab_delete (ref_s->merged_def_se_hash);
1001 if (ref_s->use_se_hash)
1002 htab_delete (ref_s->use_se_hash);
1003 free (ref_s);
1007 /* Rest of the implementation. */
1009 /* Search the extension hash for a suitable entry for EXTENSION.
1010 TYPE is the type of EXTENSION (USE_EXTENSION or DEF_EXTENSION).
1012 If TYPE is DEF_EXTENSION we need to normalize EXTENSION before searching the
1013 extension hash.
1015 If a suitable entry was found, return the slot. Otherwise, store EXTENSION
1016 in the hash and return NULL. */
1018 static struct see_pre_extension_expr *
1019 see_seek_pre_extension_expr (rtx extension, enum extension_type type)
1021 struct see_pre_extension_expr **slot_pre_exp, temp_pre_exp;
1022 rtx dest_extension_reg = see_get_extension_reg (extension, 1);
1023 enum entry_type extension_code;
1024 enum machine_mode source_extension_mode;
1026 if (type == DEF_EXTENSION)
1028 extension_code = see_get_extension_data (extension,
1029 &source_extension_mode);
1030 gcc_assert (extension_code != NOT_RELEVANT);
1031 extension =
1032 see_gen_normalized_extension (dest_extension_reg, extension_code,
1033 source_extension_mode);
1035 temp_pre_exp.se_insn = extension;
1036 slot_pre_exp =
1037 (struct see_pre_extension_expr **) htab_find_slot (see_pre_extension_hash,
1038 &temp_pre_exp, INSERT);
1039 if (*slot_pre_exp == NULL)
1040 /* This is the first time this extension instruction is encountered. Store
1041 it in the hash. */
1043 (*slot_pre_exp) = XNEW (struct see_pre_extension_expr);
1044 (*slot_pre_exp)->se_insn = extension;
1045 (*slot_pre_exp)->bitmap_index =
1046 (htab_elements (see_pre_extension_hash) - 1);
1047 (*slot_pre_exp)->antic_occr = NULL;
1048 (*slot_pre_exp)->avail_occr = NULL;
1049 return NULL;
1051 return *slot_pre_exp;
1055 /* This function defines how to update the extra_info of the web_entry.
1057 FIRST is the pointer of the extra_info of the first web_entry.
1058 SECOND is the pointer of the extra_info of the second web_entry.
1059 The first web_entry will be the predecessor (leader) of the second web_entry
1060 after the union.
1062 Return true if FIRST and SECOND points to the same web entry structure and
1063 nothing is done. Otherwise, return false. */
1065 static bool
1066 see_update_leader_extra_info (struct web_entry *first, struct web_entry *second)
1068 struct see_entry_extra_info *first_ei, *second_ei;
1070 first = unionfind_root (first);
1071 second = unionfind_root (second);
1073 if (unionfind_union (first, second))
1074 return true;
1076 first_ei = (struct see_entry_extra_info *) first->extra_info;
1077 second_ei = (struct see_entry_extra_info *) second->extra_info;
1079 gcc_assert (first_ei && second_ei);
1081 if (second_ei->relevancy == NOT_RELEVANT)
1083 first_ei->relevancy = NOT_RELEVANT;
1084 return false;
1086 switch (first_ei->relevancy)
1088 case NOT_RELEVANT:
1089 break;
1090 case RELEVANT_USE:
1091 switch (second_ei->relevancy)
1093 case RELEVANT_USE:
1094 break;
1095 case EXTENDED_DEF:
1096 first_ei->relevancy = second_ei->relevancy;
1097 first_ei->source_mode_signed = second_ei->source_mode_signed;
1098 first_ei->source_mode_unsigned = second_ei->source_mode_unsigned;
1099 break;
1100 case SIGN_EXTENDED_DEF:
1101 case ZERO_EXTENDED_DEF:
1102 first_ei->relevancy = second_ei->relevancy;
1103 first_ei->source_mode = second_ei->source_mode;
1104 break;
1105 default:
1106 gcc_unreachable ();
1108 break;
1109 case SIGN_EXTENDED_DEF:
1110 switch (second_ei->relevancy)
1112 case SIGN_EXTENDED_DEF:
1113 /* The mode of the root should be the wider one in this case. */
1114 first_ei->source_mode =
1115 (first_ei->source_mode > second_ei->source_mode) ?
1116 first_ei->source_mode : second_ei->source_mode;
1117 break;
1118 case RELEVANT_USE:
1119 break;
1120 case ZERO_EXTENDED_DEF:
1121 /* Don't mix webs with zero extend and sign extend. */
1122 first_ei->relevancy = NOT_RELEVANT;
1123 break;
1124 case EXTENDED_DEF:
1125 if (second_ei->source_mode_signed == MAX_MACHINE_MODE)
1126 first_ei->relevancy = NOT_RELEVANT;
1127 else
1128 /* The mode of the root should be the wider one in this case. */
1129 first_ei->source_mode =
1130 (first_ei->source_mode > second_ei->source_mode_signed) ?
1131 first_ei->source_mode : second_ei->source_mode_signed;
1132 break;
1133 default:
1134 gcc_unreachable ();
1136 break;
1137 /* This case is similar to the previous one, with little changes. */
1138 case ZERO_EXTENDED_DEF:
1139 switch (second_ei->relevancy)
1141 case SIGN_EXTENDED_DEF:
1142 /* Don't mix webs with zero extend and sign extend. */
1143 first_ei->relevancy = NOT_RELEVANT;
1144 break;
1145 case RELEVANT_USE:
1146 break;
1147 case ZERO_EXTENDED_DEF:
1148 /* The mode of the root should be the wider one in this case. */
1149 first_ei->source_mode =
1150 (first_ei->source_mode > second_ei->source_mode) ?
1151 first_ei->source_mode : second_ei->source_mode;
1152 break;
1153 case EXTENDED_DEF:
1154 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE)
1155 first_ei->relevancy = NOT_RELEVANT;
1156 else
1157 /* The mode of the root should be the wider one in this case. */
1158 first_ei->source_mode =
1159 (first_ei->source_mode > second_ei->source_mode_unsigned) ?
1160 first_ei->source_mode : second_ei->source_mode_unsigned;
1161 break;
1162 default:
1163 gcc_unreachable ();
1165 break;
1166 case EXTENDED_DEF:
1167 if (first_ei->source_mode_signed != MAX_MACHINE_MODE
1168 && first_ei->source_mode_unsigned != MAX_MACHINE_MODE)
1170 switch (second_ei->relevancy)
1172 case SIGN_EXTENDED_DEF:
1173 first_ei->relevancy = SIGN_EXTENDED_DEF;
1174 first_ei->source_mode =
1175 (first_ei->source_mode_signed > second_ei->source_mode) ?
1176 first_ei->source_mode_signed : second_ei->source_mode;
1177 break;
1178 case RELEVANT_USE:
1179 break;
1180 case ZERO_EXTENDED_DEF:
1181 first_ei->relevancy = ZERO_EXTENDED_DEF;
1182 first_ei->source_mode =
1183 (first_ei->source_mode_unsigned > second_ei->source_mode) ?
1184 first_ei->source_mode_unsigned : second_ei->source_mode;
1185 break;
1186 case EXTENDED_DEF:
1187 if (second_ei->source_mode_unsigned != MAX_MACHINE_MODE)
1188 first_ei->source_mode_unsigned =
1189 (first_ei->source_mode_unsigned >
1190 second_ei->source_mode_unsigned) ?
1191 first_ei->source_mode_unsigned :
1192 second_ei->source_mode_unsigned;
1193 if (second_ei->source_mode_signed != MAX_MACHINE_MODE)
1194 first_ei->source_mode_signed =
1195 (first_ei->source_mode_signed >
1196 second_ei->source_mode_signed) ?
1197 first_ei->source_mode_signed : second_ei->source_mode_signed;
1198 break;
1199 default:
1200 gcc_unreachable ();
1203 else if (first_ei->source_mode_signed == MAX_MACHINE_MODE)
1205 gcc_assert (first_ei->source_mode_unsigned != MAX_MACHINE_MODE);
1206 switch (second_ei->relevancy)
1208 case SIGN_EXTENDED_DEF:
1209 first_ei->relevancy = NOT_RELEVANT;
1210 break;
1211 case RELEVANT_USE:
1212 break;
1213 case ZERO_EXTENDED_DEF:
1214 first_ei->relevancy = ZERO_EXTENDED_DEF;
1215 first_ei->source_mode =
1216 (first_ei->source_mode_unsigned > second_ei->source_mode) ?
1217 first_ei->source_mode_unsigned : second_ei->source_mode;
1218 break;
1219 case EXTENDED_DEF:
1220 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE)
1221 first_ei->relevancy = NOT_RELEVANT;
1222 else
1223 first_ei->source_mode_unsigned =
1224 (first_ei->source_mode_unsigned >
1225 second_ei->source_mode_unsigned) ?
1226 first_ei->source_mode_unsigned :
1227 second_ei->source_mode_unsigned;
1228 break;
1229 default:
1230 gcc_unreachable ();
1233 else
1235 gcc_assert (first_ei->source_mode_unsigned == MAX_MACHINE_MODE);
1236 gcc_assert (first_ei->source_mode_signed != MAX_MACHINE_MODE);
1237 switch (second_ei->relevancy)
1239 case SIGN_EXTENDED_DEF:
1240 first_ei->relevancy = SIGN_EXTENDED_DEF;
1241 first_ei->source_mode =
1242 (first_ei->source_mode_signed > second_ei->source_mode) ?
1243 first_ei->source_mode_signed : second_ei->source_mode;
1244 break;
1245 case RELEVANT_USE:
1246 break;
1247 case ZERO_EXTENDED_DEF:
1248 first_ei->relevancy = NOT_RELEVANT;
1249 break;
1250 case EXTENDED_DEF:
1251 if (second_ei->source_mode_signed == MAX_MACHINE_MODE)
1252 first_ei->relevancy = NOT_RELEVANT;
1253 else
1254 first_ei->source_mode_signed =
1255 (first_ei->source_mode_signed >
1256 second_ei->source_mode_signed) ?
1257 first_ei->source_mode_signed : second_ei->source_mode_signed;
1258 break;
1259 default:
1260 gcc_unreachable ();
1263 break;
1264 default:
1265 /* Unknown pattern type. */
1266 gcc_unreachable ();
1269 return false;
1273 /* Free global data structures. */
1275 static void
1276 see_free_data_structures (void)
1278 int i;
1279 unsigned int j;
1281 /* Free the bitmap vectors. */
1282 if (transp)
1284 sbitmap_vector_free (transp);
1285 transp = NULL;
1286 sbitmap_vector_free (comp);
1287 comp = NULL;
1288 sbitmap_vector_free (antloc);
1289 antloc = NULL;
1290 sbitmap_vector_free (ae_kill);
1291 ae_kill = NULL;
1293 if (pre_insert_map)
1295 sbitmap_vector_free (pre_insert_map);
1296 pre_insert_map = NULL;
1298 if (pre_delete_map)
1300 sbitmap_vector_free (pre_delete_map);
1301 pre_delete_map = NULL;
1303 if (edge_list)
1305 free_edge_list (edge_list);
1306 edge_list = NULL;
1309 /* Free the extension hash. */
1310 htab_delete (see_pre_extension_hash);
1312 /* Free the array of hashes. */
1313 for (i = 0; i < last_bb; i++)
1314 if (see_bb_hash_ar[i])
1315 htab_delete (see_bb_hash_ar[i]);
1316 free (see_bb_hash_ar);
1318 /* Free the array of splay trees. */
1319 for (i = 0; i < last_bb; i++)
1320 if (see_bb_splay_ar[i])
1321 splay_tree_delete (see_bb_splay_ar[i]);
1322 free (see_bb_splay_ar);
1324 /* Free the array of web entries and their extra info field. */
1325 for (j = 0; j < defs_num; j++)
1326 free (def_entry[j].extra_info);
1327 free (def_entry);
1328 for (j = 0; j < uses_num; j++)
1329 free (use_entry[j].extra_info);
1330 free (use_entry);
1334 /* Initialize global data structures and variables. */
1336 static void
1337 see_initialize_data_structures (void)
1339 unsigned int max_reg = max_reg_num ();
1340 unsigned int i;
1342 /* Build the df object. */
1343 df_set_flags (DF_EQ_NOTES);
1344 df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN);
1345 df_analyze ();
1346 df_set_flags (DF_DEFER_INSN_RESCAN);
1348 if (dump_file)
1349 df_dump (dump_file);
1351 /* Record the last basic block at the beginning of the optimization. */
1352 last_bb = last_basic_block;
1354 /* Record the number of uses and defs at the beginning of the optimization. */
1355 uses_num = 0;
1356 defs_num = 0;
1357 for (i = 0; i < max_reg; i++)
1359 uses_num += DF_REG_USE_COUNT (i) + DF_REG_EQ_USE_COUNT (i);
1360 defs_num += DF_REG_DEF_COUNT (i);
1363 /* Allocate web entries array for the union-find data structure. */
1364 def_entry = XCNEWVEC (struct web_entry, defs_num);
1365 use_entry = XCNEWVEC (struct web_entry, uses_num);
1367 /* Allocate an array of splay trees.
1368 One splay tree for each basic block. */
1369 see_bb_splay_ar = XCNEWVEC (splay_tree, last_bb);
1371 /* Allocate an array of hashes.
1372 One hash for each basic block. */
1373 see_bb_hash_ar = XCNEWVEC (htab_t, last_bb);
1375 /* Allocate the extension hash. It will hold the extensions that we want
1376 to PRE. */
1377 see_pre_extension_hash = htab_create (10,
1378 hash_descriptor_pre_extension,
1379 eq_descriptor_pre_extension,
1380 hash_del_pre_extension);
1384 /* Function called by note_uses to check if a register is used in a
1385 subexpressions.
1387 X is a pointer to the subexpression and DATA is a pointer to a
1388 see_mentioned_reg_data structure that contains the register to look for and
1389 a place for the result. */
1391 static void
1392 see_mentioned_reg (rtx *x, void *data)
1394 struct see_mentioned_reg_data *d
1395 = (struct see_mentioned_reg_data *) data;
1397 if (reg_mentioned_p (d->reg, *x))
1398 d->mentioned = true;
1402 /* We don't want to merge a use extension with a reference if the extended
1403 register is used only in a simple move instruction. We also don't want to
1404 merge a def extension with a reference if the source register of the
1405 extension is defined only in a simple move in the reference.
1407 REF is the reference instruction.
1408 EXTENSION is the use extension or def extension instruction.
1409 TYPE is the type of the extension (use or def).
1411 Return true if the reference is complicated enough, so we would like to merge
1412 it with the extension. Otherwise, return false. */
1414 static bool
1415 see_want_to_be_merged_with_extension (rtx ref, rtx extension,
1416 enum extension_type type)
1418 rtx pat;
1419 rtx dest_extension_reg = see_get_extension_reg (extension, 1);
1420 rtx source_extension_reg = see_get_extension_reg (extension, 0);
1421 enum rtx_code code;
1422 struct see_mentioned_reg_data d;
1423 int i;
1425 pat = PATTERN (ref);
1426 code = GET_CODE (pat);
1428 if (code == PARALLEL)
1430 for (i = 0; i < XVECLEN (pat, 0); i++)
1432 rtx sub = XVECEXP (pat, 0, i);
1434 if (GET_CODE (sub) == SET
1435 && (REG_P (SET_DEST (sub))
1436 || (GET_CODE (SET_DEST (sub)) == SUBREG
1437 && REG_P (SUBREG_REG (SET_DEST (sub)))))
1438 && (REG_P (SET_SRC (sub))
1439 || (GET_CODE (SET_SRC (sub)) == SUBREG
1440 && REG_P (SUBREG_REG (SET_SRC (sub))))))
1442 /* This is a simple move SET. */
1443 if (type == DEF_EXTENSION
1444 && reg_mentioned_p (source_extension_reg, SET_DEST (sub)))
1445 return false;
1447 else
1449 /* This is not a simple move SET.
1450 Check if it uses the source of the extension. */
1451 if (type == USE_EXTENSION)
1453 d.reg = dest_extension_reg;
1454 d.mentioned = false;
1455 note_uses (&sub, see_mentioned_reg, &d);
1456 if (d.mentioned)
1457 return true;
1461 if (type == USE_EXTENSION)
1462 return false;
1464 else
1466 if (code == SET
1467 && (REG_P (SET_DEST (pat))
1468 || (GET_CODE (SET_DEST (pat)) == SUBREG
1469 && REG_P (SUBREG_REG (SET_DEST (pat)))))
1470 && (REG_P (SET_SRC (pat))
1471 || (GET_CODE (SET_SRC (pat)) == SUBREG
1472 && REG_P (SUBREG_REG (SET_SRC (pat))))))
1473 /* This is a simple move SET. */
1474 return false;
1477 return true;
1481 /* Print the register number of the current see_register_properties
1482 structure.
1484 This is a subroutine of see_main called via htab_traverse.
1485 SLOT contains the current see_register_properties structure pointer. */
1487 static int
1488 see_print_register_properties (void **slot, void *b ATTRIBUTE_UNUSED)
1490 const struct see_register_properties *const prop =
1491 (const struct see_register_properties *) *slot;
1493 gcc_assert (prop);
1494 fprintf (dump_file, "Property found for register %d\n", prop->regno);
1495 return 1;
1499 /* Print the extension instruction of the current see_register_properties
1500 structure.
1502 This is a subroutine of see_main called via htab_traverse.
1503 SLOT contains the current see_pre_extension_expr structure pointer. */
1505 static int
1506 see_print_pre_extension_expr (void **slot, void *b ATTRIBUTE_UNUSED)
1508 const struct see_pre_extension_expr *const pre_extension =
1509 (const struct see_pre_extension_expr *) *slot;
1511 gcc_assert (pre_extension
1512 && pre_extension->se_insn
1513 && INSN_P (pre_extension->se_insn));
1515 fprintf (dump_file, "Index %d for:\n", pre_extension->bitmap_index);
1516 print_rtl_single (dump_file, pre_extension->se_insn);
1518 return 1;
1522 /* Phase 4 implementation: Commit changes to the insn stream. */
1524 /* Delete the merged def extension.
1526 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1528 SLOT contains the current def extension instruction.
1529 B is the see_ref_s structure pointer. */
1531 static int
1532 see_delete_merged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1534 rtx def_se = (rtx) *slot;
1536 if (dump_file)
1538 fprintf (dump_file, "Deleting merged def extension:\n");
1539 print_rtl_single (dump_file, def_se);
1542 if (INSN_DELETED_P (def_se))
1543 /* This def extension is an implicit one. No need to delete it since
1544 it is not in the insn stream. */
1545 return 1;
1547 delete_insn (def_se);
1548 return 1;
1552 /* Delete the unmerged def extension.
1554 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1556 SLOT contains the current def extension instruction.
1557 B is the see_ref_s structure pointer. */
1559 static int
1560 see_delete_unmerged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1562 rtx def_se = (rtx) *slot;
1564 if (dump_file)
1566 fprintf (dump_file, "Deleting unmerged def extension:\n");
1567 print_rtl_single (dump_file, def_se);
1570 delete_insn (def_se);
1571 return 1;
1575 /* Emit the non-redundant use extension to the instruction stream.
1577 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1579 SLOT contains the current use extension instruction.
1580 B is the see_ref_s structure pointer. */
1582 static int
1583 see_emit_use_extension (void **slot, void *b)
1585 rtx use_se = (rtx) *slot;
1586 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1588 if (INSN_DELETED_P (use_se))
1589 /* This use extension was previously removed according to the lcm
1590 output. */
1591 return 1;
1593 if (dump_file)
1595 fprintf (dump_file, "Inserting use extension:\n");
1596 print_rtl_single (dump_file, use_se);
1599 add_insn_before (use_se, curr_ref_s->insn, NULL);
1601 return 1;
1605 /* For each relevant reference:
1606 a. Emit the non-redundant use extensions.
1607 b. Delete the def extensions.
1608 c. Replace the original reference with the merged one (if exists) and add the
1609 move instructions that were generated.
1611 This is a subroutine of see_commit_changes called via splay_tree_foreach.
1613 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
1614 see_ref_s structure. */
1616 static int
1617 see_commit_ref_changes (splay_tree_node stn,
1618 void *data ATTRIBUTE_UNUSED)
1620 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
1621 htab_t unmerged_def_se_hash =
1622 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
1623 htab_t merged_def_se_hash =
1624 ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
1625 rtx ref = ((struct see_ref_s *) (stn->value))->insn;
1626 rtx merged_ref = ((struct see_ref_s *) (stn->value))->merged_insn;
1628 /* Emit the non-redundant use extensions. */
1629 if (use_se_hash)
1630 htab_traverse_noresize (use_se_hash, see_emit_use_extension,
1631 (PTR) (stn->value));
1633 /* Delete the def extensions. */
1634 if (unmerged_def_se_hash)
1635 htab_traverse (unmerged_def_se_hash, see_delete_unmerged_def_extension,
1636 (PTR) (stn->value));
1638 if (merged_def_se_hash)
1639 htab_traverse (merged_def_se_hash, see_delete_merged_def_extension,
1640 (PTR) (stn->value));
1642 /* Replace the original reference with the merged one (if exists) and add the
1643 move instructions that were generated. */
1644 if (merged_ref && !INSN_DELETED_P (ref))
1646 if (dump_file)
1648 fprintf (dump_file, "Replacing orig reference:\n");
1649 print_rtl_single (dump_file, ref);
1650 fprintf (dump_file, "With merged reference:\n");
1651 print_rtl_single (dump_file, merged_ref);
1653 emit_insn_after (merged_ref, ref);
1654 delete_insn (ref);
1657 /* Continue to the next reference. */
1658 return 0;
1662 /* Insert partially redundant expressions on edges to make the expressions fully
1663 redundant.
1665 INDEX_MAP is a mapping of an index to an expression.
1666 Return true if an instruction was inserted on an edge.
1667 Otherwise, return false. */
1669 static bool
1670 see_pre_insert_extensions (struct see_pre_extension_expr **index_map)
1672 int num_edges = NUM_EDGES (edge_list);
1673 int set_size = pre_insert_map[0]->size;
1674 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
1676 int did_insert = 0;
1677 int e;
1678 int i;
1679 int j;
1681 for (e = 0; e < num_edges; e++)
1683 int indx;
1684 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
1686 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
1688 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
1690 for (j = indx; insert && j < (int) pre_extension_num;
1691 j++, insert >>= 1)
1692 if (insert & 1)
1694 struct see_pre_extension_expr *expr = index_map[j];
1695 int idx = expr->bitmap_index;
1696 rtx se_insn = NULL;
1697 edge eg = INDEX_EDGE (edge_list, e);
1699 start_sequence ();
1700 emit_insn (copy_insn (PATTERN (expr->se_insn)));
1701 se_insn = get_insns ();
1702 end_sequence ();
1704 if (eg->flags & EDGE_ABNORMAL)
1706 rtx new_insn = NULL;
1708 new_insn = insert_insn_end_bb_new (se_insn, bb);
1709 gcc_assert (new_insn && INSN_P (new_insn));
1711 if (dump_file)
1713 fprintf (dump_file,
1714 "PRE: end of bb %d, insn %d, ",
1715 bb->index, INSN_UID (new_insn));
1716 fprintf (dump_file,
1717 "inserting expression %d\n", idx);
1720 else
1722 insert_insn_on_edge (se_insn, eg);
1724 if (dump_file)
1726 fprintf (dump_file, "PRE: edge (%d,%d), ",
1727 bb->index,
1728 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
1729 fprintf (dump_file, "inserting expression %d\n", idx);
1732 did_insert = true;
1736 return did_insert;
1740 /* Since all the redundant extensions must be anticipatable, they must be a use
1741 extensions. Mark them as deleted. This will prevent them from been emitted
1742 in the first place.
1744 This is a subroutine of see_commit_changes called via htab_traverse.
1746 SLOT contains the current see_pre_extension_expr structure pointer. */
1748 static int
1749 see_pre_delete_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1751 struct see_pre_extension_expr *const expr =
1752 (struct see_pre_extension_expr *) *slot;
1753 struct see_occr *occr;
1754 int indx = expr->bitmap_index;
1756 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1758 if (TEST_BIT (pre_delete_map[occr->block_num], indx))
1760 /* Mark as deleted. */
1761 INSN_DELETED_P (occr->insn) = 1;
1762 if (dump_file)
1764 fprintf (dump_file,"Redundant extension deleted:\n");
1765 print_rtl_single (dump_file, occr->insn);
1769 return 1;
1773 /* Create the index_map mapping of an index to an expression.
1775 This is a subroutine of see_commit_changes called via htab_traverse.
1777 SLOT contains the current see_pre_extension_expr structure pointer.
1778 B a pointer to see_pre_extension_expr structure pointer. */
1780 static int
1781 see_map_extension (void **slot, void *b)
1783 struct see_pre_extension_expr *const expr =
1784 (struct see_pre_extension_expr *) *slot;
1785 struct see_pre_extension_expr **const index_map =
1786 (struct see_pre_extension_expr **) b;
1788 index_map[expr->bitmap_index] = expr;
1790 return 1;
1794 /* Phase 4 top level function.
1795 In this phase we finally change the instruction stream.
1796 Here we insert extensions at their best placements and delete the
1797 redundant ones according to the output of the LCM. We also replace
1798 some of the instructions according to phase 2 merges results. */
1800 static void
1801 see_commit_changes (void)
1803 struct see_pre_extension_expr **index_map;
1804 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
1805 bool did_insert = false;
1806 int i;
1808 index_map = XCNEWVEC (struct see_pre_extension_expr *, pre_extension_num);
1810 if (dump_file)
1811 fprintf (dump_file,
1812 "* Phase 4: Commit changes to the insn stream. *\n");
1814 /* Produce a mapping of all the pre_extensions. */
1815 htab_traverse (see_pre_extension_hash, see_map_extension, (PTR) index_map);
1817 /* Delete redundant extension. This will prevent them from been emitted in
1818 the first place. */
1819 htab_traverse (see_pre_extension_hash, see_pre_delete_extension, NULL);
1821 /* Insert extensions on edges, according to the LCM result. */
1822 did_insert = see_pre_insert_extensions (index_map);
1824 if (did_insert)
1825 commit_edge_insertions ();
1827 /* Commit the rest of the changes. */
1828 for (i = 0; i < last_bb; i++)
1830 if (see_bb_splay_ar[i])
1832 /* Traverse over all the references in the basic block in forward
1833 order. */
1834 splay_tree_foreach (see_bb_splay_ar[i],
1835 see_commit_ref_changes, NULL);
1839 free (index_map);
1843 /* Phase 3 implementation: Eliminate globally redundant extensions. */
1845 /* Analyze the properties of a merged def extension for the LCM and record avail
1846 occurrences.
1848 This is a subroutine of see_analyze_ref_local_prop called
1849 via htab_traverse.
1851 SLOT contains the current def extension instruction.
1852 B is the see_ref_s structure pointer. */
1854 static int
1855 see_analyze_merged_def_local_prop (void **slot, void *b)
1857 rtx def_se = (rtx) *slot;
1858 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1859 rtx ref = curr_ref_s->insn;
1860 struct see_pre_extension_expr *extension_expr;
1861 int indx;
1862 int bb_num = BLOCK_NUM (ref);
1863 htab_t curr_bb_hash;
1864 struct see_register_properties *curr_prop, **slot_prop;
1865 struct see_register_properties temp_prop;
1866 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
1867 struct see_occr *curr_occr = NULL;
1868 struct see_occr *tmp_occr = NULL;
1870 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
1871 /* The extension_expr must be found. */
1872 gcc_assert (extension_expr);
1874 curr_bb_hash = see_bb_hash_ar[bb_num];
1875 gcc_assert (curr_bb_hash);
1876 temp_prop.regno = REGNO (dest_extension_reg);
1877 slot_prop =
1878 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1879 &temp_prop, INSERT);
1880 curr_prop = *slot_prop;
1881 gcc_assert (curr_prop);
1883 indx = extension_expr->bitmap_index;
1885 /* Reset the transparency bit. */
1886 RESET_BIT (transp[bb_num], indx);
1887 /* Reset the killed bit. */
1888 RESET_BIT (ae_kill[bb_num], indx);
1890 if (curr_prop->first_se_after_last_def == DF_INSN_LUID (ref))
1892 /* Set the available bit. */
1893 SET_BIT (comp[bb_num], indx);
1894 /* Record the available occurrence. */
1895 curr_occr = XNEW (struct see_occr);
1896 curr_occr->next = NULL;
1897 curr_occr->insn = def_se;
1898 curr_occr->block_num = bb_num;
1899 tmp_occr = extension_expr->avail_occr;
1900 if (!tmp_occr)
1901 extension_expr->avail_occr = curr_occr;
1902 else
1904 while (tmp_occr->next)
1905 tmp_occr = tmp_occr->next;
1906 tmp_occr->next = curr_occr;
1910 return 1;
1914 /* Analyze the properties of a unmerged def extension for the LCM.
1916 This is a subroutine of see_analyze_ref_local_prop called
1917 via htab_traverse.
1919 SLOT contains the current def extension instruction.
1920 B is the see_ref_s structure pointer. */
1922 static int
1923 see_analyze_unmerged_def_local_prop (void **slot, void *b)
1925 rtx def_se = (rtx) *slot;
1926 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1927 rtx ref = curr_ref_s->insn;
1928 struct see_pre_extension_expr *extension_expr;
1929 int indx;
1930 int bb_num = BLOCK_NUM (ref);
1931 htab_t curr_bb_hash;
1932 struct see_register_properties *curr_prop, **slot_prop;
1933 struct see_register_properties temp_prop;
1934 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
1936 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
1937 /* The extension_expr must be found. */
1938 gcc_assert (extension_expr);
1940 curr_bb_hash = see_bb_hash_ar[bb_num];
1941 gcc_assert (curr_bb_hash);
1942 temp_prop.regno = REGNO (dest_extension_reg);
1943 slot_prop =
1944 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1945 &temp_prop, INSERT);
1946 curr_prop = *slot_prop;
1947 gcc_assert (curr_prop);
1949 indx = extension_expr->bitmap_index;
1951 /* Reset the transparency bit. */
1952 RESET_BIT (transp[bb_num], indx);
1953 /* Set the killed bit. */
1954 SET_BIT (ae_kill[bb_num], indx);
1956 return 1;
1960 /* Analyze the properties of a use extension for the LCM and record any and
1961 avail occurrences.
1963 This is a subroutine of see_analyze_ref_local_prop called
1964 via htab_traverse.
1966 SLOT contains the current use extension instruction.
1967 B is the see_ref_s structure pointer. */
1969 static int
1970 see_analyze_use_local_prop (void **slot, void *b)
1972 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1973 rtx use_se = (rtx) *slot;
1974 rtx ref = curr_ref_s->insn;
1975 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
1976 struct see_pre_extension_expr *extension_expr;
1977 struct see_register_properties *curr_prop, **slot_prop;
1978 struct see_register_properties temp_prop;
1979 struct see_occr *curr_occr = NULL;
1980 struct see_occr *tmp_occr = NULL;
1981 htab_t curr_bb_hash;
1982 int indx;
1983 int bb_num = BLOCK_NUM (ref);
1985 extension_expr = see_seek_pre_extension_expr (use_se, USE_EXTENSION);
1986 /* The extension_expr must be found. */
1987 gcc_assert (extension_expr);
1989 curr_bb_hash = see_bb_hash_ar[bb_num];
1990 gcc_assert (curr_bb_hash);
1991 temp_prop.regno = REGNO (dest_extension_reg);
1992 slot_prop =
1993 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1994 &temp_prop, INSERT);
1995 curr_prop = *slot_prop;
1996 gcc_assert (curr_prop);
1998 indx = extension_expr->bitmap_index;
2000 if (curr_prop->first_se_before_any_def == DF_INSN_LUID (ref))
2002 /* Set the anticipatable bit. */
2003 SET_BIT (antloc[bb_num], indx);
2004 /* Record the anticipatable occurrence. */
2005 curr_occr = XNEW (struct see_occr);
2006 curr_occr->next = NULL;
2007 curr_occr->insn = use_se;
2008 curr_occr->block_num = bb_num;
2009 tmp_occr = extension_expr->antic_occr;
2010 if (!tmp_occr)
2011 extension_expr->antic_occr = curr_occr;
2012 else
2014 while (tmp_occr->next)
2015 tmp_occr = tmp_occr->next;
2016 tmp_occr->next = curr_occr;
2018 if (curr_prop->last_def < 0)
2020 /* Set the available bit. */
2021 SET_BIT (comp[bb_num], indx);
2022 /* Record the available occurrence. */
2023 curr_occr = XNEW (struct see_occr);
2024 curr_occr->next = NULL;
2025 curr_occr->insn = use_se;
2026 curr_occr->block_num = bb_num;
2027 tmp_occr = extension_expr->avail_occr;
2028 if (!tmp_occr)
2029 extension_expr->avail_occr = curr_occr;
2030 else
2032 while (tmp_occr->next)
2033 tmp_occr = tmp_occr->next;
2034 tmp_occr->next = curr_occr;
2037 /* Note: there is no need to reset the killed bit since it must be zero at
2038 this point. */
2040 else if (curr_prop->first_se_after_last_def == DF_INSN_LUID (ref))
2042 /* Set the available bit. */
2043 SET_BIT (comp[bb_num], indx);
2044 /* Reset the killed bit. */
2045 RESET_BIT (ae_kill[bb_num], indx);
2046 /* Record the available occurrence. */
2047 curr_occr = XNEW (struct see_occr);
2048 curr_occr->next = NULL;
2049 curr_occr->insn = use_se;
2050 curr_occr->block_num = bb_num;
2051 tmp_occr = extension_expr->avail_occr;
2052 if (!tmp_occr)
2053 extension_expr->avail_occr = curr_occr;
2054 else
2056 while (tmp_occr->next)
2057 tmp_occr = tmp_occr->next;
2058 tmp_occr->next = curr_occr;
2061 return 1;
2065 /* Here we traverse over all the merged and unmerged extensions of the reference
2066 and analyze their properties for the LCM.
2068 This is a subroutine of see_execute_LCM called via splay_tree_foreach.
2070 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2071 see_ref_s structure. */
2073 static int
2074 see_analyze_ref_local_prop (splay_tree_node stn,
2075 void *data ATTRIBUTE_UNUSED)
2077 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
2078 htab_t unmerged_def_se_hash =
2079 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
2080 htab_t merged_def_se_hash =
2081 ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
2083 /* Analyze use extensions that were not merged with the reference. */
2084 if (use_se_hash)
2085 htab_traverse_noresize (use_se_hash, see_analyze_use_local_prop,
2086 (PTR) (stn->value));
2088 /* Analyze def extensions that were not merged with the reference. */
2089 if (unmerged_def_se_hash)
2090 htab_traverse (unmerged_def_se_hash, see_analyze_unmerged_def_local_prop,
2091 (PTR) (stn->value));
2093 /* Analyze def extensions that were merged with the reference. */
2094 if (merged_def_se_hash)
2095 htab_traverse (merged_def_se_hash, see_analyze_merged_def_local_prop,
2096 (PTR) (stn->value));
2098 /* Continue to the next definition. */
2099 return 0;
2103 /* Phase 3 top level function.
2104 In this phase, we set the input bit vectors of the LCM according to data
2105 gathered in phase 2.
2106 Then we run the edge based LCM. */
2108 static void
2109 see_execute_LCM (void)
2111 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
2112 int i = 0;
2114 if (dump_file)
2115 fprintf (dump_file,
2116 "* Phase 3: Eliminate globally redundant extensions. *\n");
2118 /* Initialize the global sbitmap vectors. */
2119 transp = sbitmap_vector_alloc (last_bb, pre_extension_num);
2120 comp = sbitmap_vector_alloc (last_bb, pre_extension_num);
2121 antloc = sbitmap_vector_alloc (last_bb, pre_extension_num);
2122 ae_kill = sbitmap_vector_alloc (last_bb, pre_extension_num);
2123 sbitmap_vector_ones (transp, last_bb);
2124 sbitmap_vector_zero (comp, last_bb);
2125 sbitmap_vector_zero (antloc, last_bb);
2126 sbitmap_vector_zero (ae_kill, last_bb);
2128 /* Traverse over all the splay trees of the basic blocks. */
2129 for (i = 0; i < last_bb; i++)
2131 if (see_bb_splay_ar[i])
2133 /* Traverse over all the references in the basic block in forward
2134 order. */
2135 splay_tree_foreach (see_bb_splay_ar[i],
2136 see_analyze_ref_local_prop, NULL);
2140 /* Add fake exit edges before running the lcm. */
2141 add_noreturn_fake_exit_edges ();
2143 /* Run the LCM. */
2144 edge_list = pre_edge_lcm (pre_extension_num, transp, comp, antloc,
2145 ae_kill, &pre_insert_map, &pre_delete_map);
2147 /* Remove the fake edges. */
2148 remove_fake_exit_edges ();
2152 /* Phase 2 implementation: Merge and eliminate locally redundant extensions. */
2154 /* In this function we set the register properties for the register that is
2155 defined and extended in the reference.
2156 The properties are defined in see_register_properties structure which is
2157 allocated per basic block and per register.
2158 Later the extension is inserted into the see_pre_extension_hash for the next
2159 phase of the optimization.
2161 This is a subroutine of see_handle_extensions_for_one_ref called
2162 via htab_traverse.
2164 SLOT contains the current def extension instruction.
2165 B is the see_ref_s structure pointer. */
2167 static int
2168 see_set_prop_merged_def (void **slot, void *b)
2170 rtx def_se = (rtx) *slot;
2171 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2172 rtx insn = curr_ref_s->insn;
2173 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2174 htab_t curr_bb_hash;
2175 struct see_register_properties *curr_prop = NULL;
2176 struct see_register_properties **slot_prop;
2177 struct see_register_properties temp_prop;
2178 int ref_luid = DF_INSN_LUID (insn);
2180 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2181 if (!curr_bb_hash)
2183 /* The hash doesn't exist yet. Create it. */
2184 curr_bb_hash = htab_create (10,
2185 hash_descriptor_properties,
2186 eq_descriptor_properties,
2187 hash_del_properties);
2188 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2191 /* Find the right register properties in the right basic block. */
2192 temp_prop.regno = REGNO (dest_extension_reg);
2193 slot_prop =
2194 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2195 &temp_prop, INSERT);
2197 if (slot_prop && *slot_prop != NULL)
2199 /* Property already exists. */
2200 curr_prop = *slot_prop;
2201 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2203 curr_prop->last_def = ref_luid;
2204 curr_prop->first_se_after_last_def = ref_luid;
2206 else
2208 /* Property doesn't exist yet. */
2209 curr_prop = XNEW (struct see_register_properties);
2210 curr_prop->regno = REGNO (dest_extension_reg);
2211 curr_prop->last_def = ref_luid;
2212 curr_prop->first_se_before_any_def = -1;
2213 curr_prop->first_se_after_last_def = ref_luid;
2214 *slot_prop = curr_prop;
2217 /* Insert the def_se into see_pre_extension_hash if it isn't already
2218 there. */
2219 see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
2221 return 1;
2225 /* In this function we set the register properties for the register that is
2226 defined but not extended in the reference.
2227 The properties are defined in see_register_properties structure which is
2228 allocated per basic block and per register.
2229 Later the extension is inserted into the see_pre_extension_hash for the next
2230 phase of the optimization.
2232 This is a subroutine of see_handle_extensions_for_one_ref called
2233 via htab_traverse.
2235 SLOT contains the current def extension instruction.
2236 B is the see_ref_s structure pointer. */
2238 static int
2239 see_set_prop_unmerged_def (void **slot, void *b)
2241 rtx def_se = (rtx) *slot;
2242 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2243 rtx insn = curr_ref_s->insn;
2244 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2245 htab_t curr_bb_hash;
2246 struct see_register_properties *curr_prop = NULL;
2247 struct see_register_properties **slot_prop;
2248 struct see_register_properties temp_prop;
2249 int ref_luid = DF_INSN_LUID (insn);
2251 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2252 if (!curr_bb_hash)
2254 /* The hash doesn't exist yet. Create it. */
2255 curr_bb_hash = htab_create (10,
2256 hash_descriptor_properties,
2257 eq_descriptor_properties,
2258 hash_del_properties);
2259 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2262 /* Find the right register properties in the right basic block. */
2263 temp_prop.regno = REGNO (dest_extension_reg);
2264 slot_prop =
2265 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2266 &temp_prop, INSERT);
2268 if (slot_prop && *slot_prop != NULL)
2270 /* Property already exists. */
2271 curr_prop = *slot_prop;
2272 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2274 curr_prop->last_def = ref_luid;
2275 curr_prop->first_se_after_last_def = -1;
2277 else
2279 /* Property doesn't exist yet. */
2280 curr_prop = XNEW (struct see_register_properties);
2281 curr_prop->regno = REGNO (dest_extension_reg);
2282 curr_prop->last_def = ref_luid;
2283 curr_prop->first_se_before_any_def = -1;
2284 curr_prop->first_se_after_last_def = -1;
2285 *slot_prop = curr_prop;
2288 /* Insert the def_se into see_pre_extension_hash if it isn't already
2289 there. */
2290 see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
2292 return 1;
2296 /* In this function we set the register properties for the register that is used
2297 in the reference.
2298 The properties are defined in see_register_properties structure which is
2299 allocated per basic block and per register.
2300 When a redundant use extension is found it is removed from the hash of the
2301 reference.
2302 If the extension is non redundant it is inserted into the
2303 see_pre_extension_hash for the next phase of the optimization.
2305 This is a subroutine of see_handle_extensions_for_one_ref called
2306 via htab_traverse.
2308 SLOT contains the current use extension instruction.
2309 B is the see_ref_s structure pointer. */
2311 static int
2312 see_set_prop_unmerged_use (void **slot, void *b)
2314 rtx use_se = (rtx) *slot;
2315 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2316 rtx insn = curr_ref_s->insn;
2317 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
2318 htab_t curr_bb_hash;
2319 struct see_register_properties *curr_prop = NULL;
2320 struct see_register_properties **slot_prop;
2321 struct see_register_properties temp_prop;
2322 bool locally_redundant = false;
2323 int ref_luid = DF_INSN_LUID (insn);
2325 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2326 if (!curr_bb_hash)
2328 /* The hash doesn't exist yet. Create it. */
2329 curr_bb_hash = htab_create (10,
2330 hash_descriptor_properties,
2331 eq_descriptor_properties,
2332 hash_del_properties);
2333 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2336 /* Find the right register properties in the right basic block. */
2337 temp_prop.regno = REGNO (dest_extension_reg);
2338 slot_prop =
2339 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2340 &temp_prop, INSERT);
2342 if (slot_prop && *slot_prop != NULL)
2344 /* Property already exists. */
2345 curr_prop = *slot_prop;
2346 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2349 if (curr_prop->last_def < 0 && curr_prop->first_se_before_any_def < 0)
2350 curr_prop->first_se_before_any_def = ref_luid;
2351 else if (curr_prop->last_def < 0
2352 && curr_prop->first_se_before_any_def >= 0)
2354 /* In this case the extension is locally redundant. */
2355 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2356 locally_redundant = true;
2358 else if (curr_prop->last_def >= 0
2359 && curr_prop->first_se_after_last_def < 0)
2360 curr_prop->first_se_after_last_def = ref_luid;
2361 else if (curr_prop->last_def >= 0
2362 && curr_prop->first_se_after_last_def >= 0)
2364 /* In this case the extension is locally redundant. */
2365 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2366 locally_redundant = true;
2368 else
2369 gcc_unreachable ();
2371 else
2373 /* Property doesn't exist yet. Create a new one. */
2374 curr_prop = XNEW (struct see_register_properties);
2375 curr_prop->regno = REGNO (dest_extension_reg);
2376 curr_prop->last_def = -1;
2377 curr_prop->first_se_before_any_def = ref_luid;
2378 curr_prop->first_se_after_last_def = -1;
2379 *slot_prop = curr_prop;
2382 /* Insert the use_se into see_pre_extension_hash if it isn't already
2383 there. */
2384 if (!locally_redundant)
2385 see_seek_pre_extension_expr (use_se, USE_EXTENSION);
2386 if (locally_redundant && dump_file)
2388 fprintf (dump_file, "Locally redundant extension:\n");
2389 print_rtl_single (dump_file, use_se);
2391 return 1;
2395 /* Print an extension instruction.
2397 This is a subroutine of see_handle_extensions_for_one_ref called
2398 via htab_traverse.
2399 SLOT contains the extension instruction. */
2401 static int
2402 see_print_one_extension (void **slot, void *b ATTRIBUTE_UNUSED)
2404 rtx def_se = (rtx) *slot;
2406 gcc_assert (def_se && INSN_P (def_se));
2407 print_rtl_single (dump_file, def_se);
2409 return 1;
2412 /* Function called by note_uses to replace used subexpressions.
2414 X is a pointer to the subexpression and DATA is a pointer to a
2415 see_replace_data structure that contains the data for the replacement. */
2417 static void
2418 see_replace_src (rtx *x, void *data)
2420 struct see_replace_data *d
2421 = (struct see_replace_data *) data;
2423 *x = replace_rtx (*x, d->from, d->to);
2427 static rtx
2428 see_copy_insn (rtx insn)
2430 rtx pat = copy_insn (PATTERN (insn)), ret;
2432 if (NONJUMP_INSN_P (insn))
2433 ret = make_insn_raw (pat);
2434 else if (JUMP_P (insn))
2435 ret = make_jump_insn_raw (pat);
2436 else if (CALL_P (insn))
2438 start_sequence ();
2439 ret = emit_call_insn (pat);
2440 end_sequence ();
2441 if (CALL_INSN_FUNCTION_USAGE (insn))
2442 CALL_INSN_FUNCTION_USAGE (ret)
2443 = copy_rtx (CALL_INSN_FUNCTION_USAGE (insn));
2444 SIBLING_CALL_P (ret) = SIBLING_CALL_P (insn);
2445 RTL_CONST_CALL_P (ret) = RTL_CONST_CALL_P (insn);
2446 RTL_PURE_CALL_P (ret) = RTL_PURE_CALL_P (insn);
2447 RTL_LOOPING_CONST_OR_PURE_CALL_P (ret)
2448 = RTL_LOOPING_CONST_OR_PURE_CALL_P (insn);
2450 else
2451 gcc_unreachable ();
2452 if (REG_NOTES (insn))
2453 REG_NOTES (ret) = copy_rtx (REG_NOTES (insn));
2454 INSN_LOCATOR (ret) = INSN_LOCATOR (insn);
2455 RTX_FRAME_RELATED_P (ret) = RTX_FRAME_RELATED_P (insn);
2456 PREV_INSN (ret) = NULL_RTX;
2457 NEXT_INSN (ret) = NULL_RTX;
2458 return ret;
2462 /* At this point the pattern is expected to be:
2464 ref: set (dest_reg) (rhs)
2465 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2467 The merge of these two instructions didn't succeed.
2469 We try to generate the pattern:
2470 set (subreg (dest_extension_reg)) (rhs)
2472 We do this in 4 steps:
2473 a. Replace every use of dest_reg with a new pseudo register.
2474 b. Replace every instance of dest_reg with the subreg.
2475 c. Replace every use of the new pseudo register back to dest_reg.
2476 d. Try to recognize and simplify.
2478 If the manipulation failed, leave the original ref but try to generate and
2479 recognize a simple move instruction:
2480 set (subreg (dest_extension_reg)) (dest_reg)
2481 This move instruction will be emitted right after the ref to the instruction
2482 stream and assure the correctness of the code after def_se will be removed.
2484 CURR_REF_S is the current reference.
2485 DEF_SE is the extension that couldn't be merged. */
2487 static void
2488 see_def_extension_not_merged (struct see_ref_s *curr_ref_s, rtx def_se)
2490 struct see_replace_data d;
2491 /* If the original insn was already merged with an extension before,
2492 take the merged one. */
2493 rtx ref = curr_ref_s->merged_insn
2494 ? curr_ref_s->merged_insn : curr_ref_s->insn;
2495 rtx merged_ref_next = curr_ref_s->merged_insn
2496 ? NEXT_INSN (curr_ref_s->merged_insn) : NULL_RTX;
2497 rtx ref_copy = see_copy_insn (ref);
2498 rtx source_extension_reg = see_get_extension_reg (def_se, 0);
2499 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2500 rtx set, rhs;
2501 rtx dest_reg, dest_real_reg;
2502 rtx new_pseudo_reg, subreg;
2503 enum machine_mode source_extension_mode = GET_MODE (source_extension_reg);
2504 enum machine_mode dest_mode;
2506 set = single_set (def_se);
2507 gcc_assert (set);
2508 rhs = SET_SRC (set);
2509 gcc_assert (GET_CODE (rhs) == SIGN_EXTEND
2510 || GET_CODE (rhs) == ZERO_EXTEND);
2511 dest_reg = XEXP (rhs, 0);
2512 gcc_assert (REG_P (dest_reg)
2513 || (GET_CODE (dest_reg) == SUBREG
2514 && REG_P (SUBREG_REG (dest_reg))));
2515 dest_real_reg = REG_P (dest_reg) ? dest_reg : SUBREG_REG (dest_reg);
2516 dest_mode = GET_MODE (dest_reg);
2518 subreg = gen_lowpart_SUBREG (dest_mode, dest_extension_reg);
2519 new_pseudo_reg = gen_reg_rtx (source_extension_mode);
2521 /* Step a: Replace every use of dest_real_reg with a new pseudo register. */
2522 d.from = dest_real_reg;
2523 d.to = new_pseudo_reg;
2524 note_uses (&PATTERN (ref_copy), see_replace_src, &d);
2525 /* Step b: Replace every instance of dest_reg with the subreg. */
2526 ref_copy = replace_rtx (ref_copy, dest_reg, copy_rtx (subreg));
2528 /* Step c: Replace every use of the new pseudo register back to
2529 dest_real_reg. */
2530 d.from = new_pseudo_reg;
2531 d.to = dest_real_reg;
2532 note_uses (&PATTERN (ref_copy), see_replace_src, &d);
2534 if (rtx_equal_p (PATTERN (ref), PATTERN (ref_copy))
2535 || insn_invalid_p (ref_copy))
2537 /* The manipulation failed. */
2538 df_insn_delete (NULL, INSN_UID (ref_copy));
2540 /* Create a new copy. */
2541 ref_copy = see_copy_insn (ref);
2543 if (curr_ref_s->merged_insn)
2544 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2546 /* Create a simple move instruction that will replace the def_se. */
2547 start_sequence ();
2548 emit_insn (ref_copy);
2549 emit_move_insn (subreg, dest_reg);
2550 if (merged_ref_next != NULL_RTX)
2551 emit_insn (merged_ref_next);
2552 curr_ref_s->merged_insn = get_insns ();
2553 end_sequence ();
2555 if (dump_file)
2557 fprintf (dump_file, "Following def merge failure a move ");
2558 fprintf (dump_file, "insn was added after the ref.\n");
2559 fprintf (dump_file, "Original ref:\n");
2560 print_rtl_single (dump_file, ref);
2561 fprintf (dump_file, "Move insn that was added:\n");
2562 print_rtl_single (dump_file, NEXT_INSN (curr_ref_s->merged_insn));
2564 return;
2567 /* The manipulation succeeded. Store the new manipulated reference. */
2569 /* It is possible for dest_reg to appear multiple times in ref_copy. In this
2570 case, ref_copy now has invalid sharing. Copying solves the problem.
2571 We don't use copy_rtx as an optimization for the common case (no sharing).
2572 We can't just use copy_rtx_if_shared since it does nothing on INSNs.
2573 Another possible solution would be to make validate_replace_rtx_1
2574 public and use it instead of replace_rtx. */
2575 reset_used_flags (PATTERN (ref_copy));
2576 reset_used_flags (REG_NOTES (ref_copy));
2577 PATTERN (ref_copy) = copy_rtx_if_shared (PATTERN (ref_copy));
2578 REG_NOTES (ref_copy) = copy_rtx_if_shared (REG_NOTES (ref_copy));
2580 /* Try to simplify the new manipulated insn. */
2581 validate_simplify_insn (ref_copy);
2583 if (curr_ref_s->merged_insn)
2584 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2586 /* Create a simple move instruction to assure the correctness of the code. */
2587 start_sequence ();
2588 emit_insn (ref_copy);
2589 emit_move_insn (dest_reg, subreg);
2590 if (merged_ref_next != NULL_RTX)
2591 emit_insn (merged_ref_next);
2592 curr_ref_s->merged_insn = get_insns ();
2593 end_sequence ();
2595 if (dump_file)
2597 fprintf (dump_file, "Following merge failure the ref was transformed!\n");
2598 fprintf (dump_file, "Original ref:\n");
2599 print_rtl_single (dump_file, ref);
2600 fprintf (dump_file, "Transformed ref:\n");
2601 print_rtl_single (dump_file, curr_ref_s->merged_insn);
2602 fprintf (dump_file, "Move insn that was added:\n");
2603 print_rtl_single (dump_file, NEXT_INSN (curr_ref_s->merged_insn));
2608 /* Merge the reference instruction (ref) with the current use extension.
2610 use_se extends a NARROWmode register to a WIDEmode register.
2611 ref uses the WIDEmode register.
2613 The pattern we try to merge is this:
2614 use_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2615 ref: use (dest_extension_reg)
2617 where dest_extension_reg and source_extension_reg can be subregs.
2619 The merge is done by generating, simplifying and recognizing the pattern:
2620 use (sign/zero_extend (source_extension_reg))
2622 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2623 we don't try to merge it with use_se and we continue as if the merge failed.
2625 This is a subroutine of see_handle_extensions_for_one_ref called
2626 via htab_traverse.
2627 SLOT contains the current use extension instruction.
2628 B is the see_ref_s structure pointer. */
2630 static int
2631 see_merge_one_use_extension (void **slot, void *b)
2633 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2634 rtx use_se = (rtx) *slot;
2635 rtx ref = curr_ref_s->merged_insn
2636 ? curr_ref_s->merged_insn : curr_ref_s->insn;
2637 rtx merged_ref_next = curr_ref_s->merged_insn
2638 ? NEXT_INSN (curr_ref_s->merged_insn) : NULL_RTX;
2639 rtx ref_copy = see_copy_insn (ref);
2640 rtx extension_set = single_set (use_se);
2641 rtx extension_rhs = NULL;
2642 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
2643 rtx note = NULL;
2644 rtx simplified_note = NULL;
2646 gcc_assert (use_se && curr_ref_s && extension_set);
2648 extension_rhs = SET_SRC (extension_set);
2650 /* In REG_EQUIV and REG_EQUAL notes that mention the register we need to
2651 replace the uses of the dest_extension_reg with the rhs of the extension
2652 instruction. This is necessary since there might not be an extension in
2653 the path between the definition and the note when this optimization is
2654 over. */
2655 note = find_reg_equal_equiv_note (ref_copy);
2656 if (note)
2658 simplified_note = simplify_replace_rtx (XEXP (note, 0),
2659 dest_extension_reg,
2660 extension_rhs);
2661 if (rtx_equal_p (XEXP (note, 0), simplified_note))
2662 /* Replacement failed. Remove the note. */
2663 remove_note (ref_copy, note);
2664 else
2665 set_unique_reg_note (ref_copy, REG_NOTE_KIND (note),
2666 simplified_note);
2669 if (!see_want_to_be_merged_with_extension (ref, use_se, USE_EXTENSION))
2671 /* The use in the reference is too simple. Don't try to merge. */
2672 if (dump_file)
2674 fprintf (dump_file, "Use merge skipped!\n");
2675 fprintf (dump_file, "Original instructions:\n");
2676 print_rtl_single (dump_file, use_se);
2677 print_rtl_single (dump_file, ref);
2679 df_insn_delete (NULL, INSN_UID (ref_copy));
2680 /* Don't remove the current use_se from the use_se_hash and continue to
2681 the next extension. */
2682 return 1;
2685 validate_replace_src_group (dest_extension_reg, extension_rhs, ref_copy);
2687 if (!num_changes_pending ())
2688 /* In this case this is not a real use (the only use is/was in the notes
2689 list). Remove the use extension from the hash. This will prevent it
2690 from been emitted in the first place. */
2692 if (dump_file)
2694 fprintf (dump_file, "Use extension not necessary before:\n");
2695 print_rtl_single (dump_file, ref);
2697 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2699 if (curr_ref_s->merged_insn)
2700 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2702 if (merged_ref_next != NULL_RTX)
2704 start_sequence ();
2705 emit_insn (ref_copy);
2706 emit_insn (merged_ref_next);
2707 curr_ref_s->merged_insn = get_insns ();
2708 end_sequence ();
2710 else
2711 curr_ref_s->merged_insn = ref_copy;
2712 return 1;
2715 if (!apply_change_group ())
2717 /* The merge failed. */
2718 if (dump_file)
2720 fprintf (dump_file, "Use merge failed!\n");
2721 fprintf (dump_file, "Original instructions:\n");
2722 print_rtl_single (dump_file, use_se);
2723 print_rtl_single (dump_file, ref);
2725 df_insn_delete (NULL, INSN_UID (ref_copy));
2726 /* Don't remove the current use_se from the use_se_hash and continue to
2727 the next extension. */
2728 return 1;
2731 /* The merge succeeded! */
2733 /* Try to simplify the new merged insn. */
2734 validate_simplify_insn (ref_copy);
2736 if (curr_ref_s->merged_insn)
2737 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2739 if (merged_ref_next != NULL_RTX)
2741 start_sequence ();
2742 emit_insn (ref_copy);
2743 emit_insn (merged_ref_next);
2744 curr_ref_s->merged_insn = get_insns ();
2745 end_sequence ();
2747 else
2748 curr_ref_s->merged_insn = ref_copy;
2750 if (dump_file)
2752 fprintf (dump_file, "Use merge succeeded!\n");
2753 fprintf (dump_file, "Original instructions:\n");
2754 print_rtl_single (dump_file, use_se);
2755 print_rtl_single (dump_file, ref);
2756 fprintf (dump_file, "Merged instruction:\n");
2757 print_rtl_single (dump_file, curr_ref_s->merged_insn);
2760 /* Remove the current use_se from the use_se_hash. This will prevent it from
2761 been emitted in the first place. */
2762 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2763 return 1;
2767 /* Merge the reference instruction (ref) with the extension that follows it
2768 in the same basic block (def_se).
2769 ref sets a NARROWmode register and def_se extends it to WIDEmode register.
2771 The pattern we try to merge is this:
2772 ref: set (dest_reg) (rhs)
2773 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2775 where dest_reg and source_extension_reg can both be subregs (together)
2776 and (REGNO (dest_reg) == REGNO (source_extension_reg))
2778 The merge is done by generating, simplifying and recognizing the pattern:
2779 set (dest_extension_reg) (sign/zero_extend (rhs))
2780 If ref is a parallel instruction we just replace the relevant set in it.
2782 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2783 we don't try to merge it with def_se and we continue as if the merge failed.
2785 This is a subroutine of see_handle_extensions_for_one_ref called
2786 via htab_traverse.
2788 SLOT contains the current def extension instruction.
2789 B is the see_ref_s structure pointer. */
2791 static int
2792 see_merge_one_def_extension (void **slot, void *b)
2794 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2795 rtx def_se = (rtx) *slot;
2796 /* If the original insn was already merged with an extension before,
2797 take the merged one. */
2798 rtx ref = curr_ref_s->merged_insn
2799 ? curr_ref_s->merged_insn : curr_ref_s->insn;
2800 rtx merged_ref_next = curr_ref_s->merged_insn
2801 ? NEXT_INSN (curr_ref_s->merged_insn) : NULL_RTX;
2802 rtx ref_copy = see_copy_insn (ref);
2803 rtx new_set = NULL;
2804 rtx source_extension_reg = see_get_extension_reg (def_se, 0);
2805 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2806 rtx *rtx_slot, subreg;
2807 rtx temp_extension = NULL;
2808 rtx simplified_temp_extension = NULL;
2809 rtx *pat;
2810 enum rtx_code code;
2811 enum entry_type extension_code;
2812 enum machine_mode source_extension_mode;
2813 enum machine_mode source_mode = VOIDmode;
2814 enum machine_mode dest_extension_mode;
2815 bool merge_success = false;
2816 int i;
2818 gcc_assert (def_se
2819 && INSN_P (def_se)
2820 && curr_ref_s
2821 && ref
2822 && INSN_P (ref));
2824 if (!see_want_to_be_merged_with_extension (ref, def_se, DEF_EXTENSION))
2826 /* The definition in the reference is too simple. Don't try to merge. */
2827 if (dump_file)
2829 fprintf (dump_file, "Def merge skipped!\n");
2830 fprintf (dump_file, "Original instructions:\n");
2831 print_rtl_single (dump_file, ref);
2832 print_rtl_single (dump_file, def_se);
2835 df_insn_delete (NULL, INSN_UID (ref_copy));
2836 see_def_extension_not_merged (curr_ref_s, def_se);
2837 /* Continue to the next extension. */
2838 return 1;
2841 extension_code = see_get_extension_data (def_se, &source_mode);
2843 /* Try to merge and simplify the extension. */
2844 source_extension_mode = GET_MODE (source_extension_reg);
2845 dest_extension_mode = GET_MODE (dest_extension_reg);
2847 pat = &PATTERN (ref_copy);
2848 code = GET_CODE (*pat);
2850 if (code == PARALLEL)
2852 bool need_to_apply_change = false;
2854 for (i = 0; i < XVECLEN (*pat, 0); i++)
2856 rtx *sub = &XVECEXP (*pat, 0, i);
2858 if (GET_CODE (*sub) == SET
2859 && GET_MODE (SET_SRC (*sub)) != VOIDmode
2860 && GET_MODE (SET_DEST (*sub)) == source_mode
2861 && ((REG_P (SET_DEST (*sub))
2862 && REGNO (SET_DEST (*sub)) == REGNO (source_extension_reg))
2863 || (GET_CODE (SET_DEST (*sub)) == SUBREG
2864 && REG_P (SUBREG_REG (SET_DEST (*sub)))
2865 && (REGNO (SUBREG_REG (SET_DEST (*sub))) ==
2866 REGNO (source_extension_reg)))))
2868 rtx orig_src = SET_SRC (*sub);
2870 if (extension_code == SIGN_EXTENDED_DEF)
2871 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode,
2872 orig_src);
2873 else
2874 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode,
2875 orig_src);
2876 simplified_temp_extension = simplify_rtx (temp_extension);
2877 temp_extension =
2878 (simplified_temp_extension) ? simplified_temp_extension :
2879 temp_extension;
2880 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg,
2881 temp_extension);
2882 validate_change (ref_copy, sub, new_set, 1);
2883 need_to_apply_change = true;
2886 if (need_to_apply_change)
2887 if (apply_change_group ())
2888 merge_success = true;
2890 else if (code == SET
2891 && GET_MODE (SET_SRC (*pat)) != VOIDmode
2892 && GET_MODE (SET_DEST (*pat)) == source_mode
2893 && ((REG_P (SET_DEST (*pat))
2894 && REGNO (SET_DEST (*pat)) == REGNO (source_extension_reg))
2895 || (GET_CODE (SET_DEST (*pat)) == SUBREG
2896 && REG_P (SUBREG_REG (SET_DEST (*pat)))
2897 && (REGNO (SUBREG_REG (SET_DEST (*pat))) ==
2898 REGNO (source_extension_reg)))))
2900 rtx orig_src = SET_SRC (*pat);
2902 if (extension_code == SIGN_EXTENDED_DEF)
2903 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode, orig_src);
2904 else
2905 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode, orig_src);
2906 simplified_temp_extension = simplify_rtx (temp_extension);
2907 temp_extension = (simplified_temp_extension) ? simplified_temp_extension :
2908 temp_extension;
2909 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg, temp_extension);
2910 if (validate_change (ref_copy, pat, new_set, 0))
2911 merge_success = true;
2913 if (!merge_success)
2915 /* The merge failed. */
2916 if (dump_file)
2918 fprintf (dump_file, "Def merge failed!\n");
2919 fprintf (dump_file, "Original instructions:\n");
2920 print_rtl_single (dump_file, ref);
2921 print_rtl_single (dump_file, def_se);
2924 df_insn_delete (NULL, INSN_UID (ref_copy));
2925 see_def_extension_not_merged (curr_ref_s, def_se);
2926 /* Continue to the next extension. */
2927 return 1;
2930 /* The merge succeeded! */
2931 if (curr_ref_s->merged_insn)
2932 df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn));
2934 /* Create a simple move instruction to assure the correctness of the code. */
2935 subreg = gen_lowpart_SUBREG (source_extension_mode, dest_extension_reg);
2936 start_sequence ();
2937 emit_insn (ref_copy);
2938 emit_move_insn (source_extension_reg, subreg);
2939 if (merged_ref_next != NULL_RTX)
2940 emit_insn (merged_ref_next);
2941 curr_ref_s->merged_insn = get_insns ();
2942 end_sequence ();
2944 if (dump_file)
2946 fprintf (dump_file, "Def merge succeeded!\n");
2947 fprintf (dump_file, "Original instructions:\n");
2948 print_rtl_single (dump_file, ref);
2949 print_rtl_single (dump_file, def_se);
2950 fprintf (dump_file, "Merged instruction:\n");
2951 print_rtl_single (dump_file, curr_ref_s->merged_insn);
2952 fprintf (dump_file, "Move instruction that was added:\n");
2953 print_rtl_single (dump_file, NEXT_INSN (curr_ref_s->merged_insn));
2956 /* Remove the current def_se from the unmerged_def_se_hash and insert it to
2957 the merged_def_se_hash. */
2958 htab_clear_slot (curr_ref_s->unmerged_def_se_hash, (PTR *)slot);
2959 if (!curr_ref_s->merged_def_se_hash)
2960 curr_ref_s->merged_def_se_hash = htab_create (10,
2961 hash_descriptor_extension,
2962 eq_descriptor_extension,
2963 NULL);
2964 rtx_slot = (rtx *) htab_find_slot (curr_ref_s->merged_def_se_hash,
2965 dest_extension_reg, INSERT);
2966 gcc_assert (*rtx_slot == NULL);
2967 *rtx_slot = def_se;
2969 return 1;
2973 /* Try to eliminate extensions in this order:
2974 a. Try to merge only the def extensions, one by one.
2975 b. Try to merge only the use extensions, one by one.
2977 TODO:
2978 Try to merge any couple of use extensions simultaneously.
2979 Try to merge any def extension with one or two uses extensions
2980 simultaneously.
2982 After all the merges are done, update the register properties for the basic
2983 block and eliminate locally redundant use extensions.
2985 This is a subroutine of see_merge_and_eliminate_extensions called
2986 via splay_tree_foreach.
2987 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2988 see_ref_s structure. */
2990 static int
2991 see_handle_extensions_for_one_ref (splay_tree_node stn,
2992 void *data ATTRIBUTE_UNUSED)
2994 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
2995 htab_t unmerged_def_se_hash =
2996 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
2997 htab_t merged_def_se_hash;
2998 rtx ref = ((struct see_ref_s *) (stn->value))->insn;
3000 if (dump_file)
3002 fprintf (dump_file, "Handling ref:\n");
3003 print_rtl_single (dump_file, ref);
3006 /* a. Try to eliminate only def extensions, one by one. */
3007 if (unmerged_def_se_hash)
3008 htab_traverse_noresize (unmerged_def_se_hash, see_merge_one_def_extension,
3009 (PTR) (stn->value));
3011 if (use_se_hash)
3012 /* b. Try to eliminate only use extensions, one by one. */
3013 htab_traverse_noresize (use_se_hash, see_merge_one_use_extension,
3014 (PTR) (stn->value));
3016 merged_def_se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
3018 if (dump_file)
3020 fprintf (dump_file, "The hashes of the current reference:\n");
3021 if (unmerged_def_se_hash)
3023 fprintf (dump_file, "unmerged_def_se_hash:\n");
3024 htab_traverse (unmerged_def_se_hash, see_print_one_extension, NULL);
3026 if (merged_def_se_hash)
3028 fprintf (dump_file, "merged_def_se_hash:\n");
3029 htab_traverse (merged_def_se_hash, see_print_one_extension, NULL);
3031 if (use_se_hash)
3033 fprintf (dump_file, "use_se_hash:\n");
3034 htab_traverse (use_se_hash, see_print_one_extension, NULL);
3038 /* Now that all the merges are done, update the register properties of the
3039 basic block and eliminate locally redundant extensions.
3040 It is important that we first traverse the use extensions hash and
3041 afterwards the def extensions hashes. */
3043 if (use_se_hash)
3044 htab_traverse_noresize (use_se_hash, see_set_prop_unmerged_use,
3045 (PTR) (stn->value));
3047 if (unmerged_def_se_hash)
3048 htab_traverse (unmerged_def_se_hash, see_set_prop_unmerged_def,
3049 (PTR) (stn->value));
3051 if (merged_def_se_hash)
3052 htab_traverse (merged_def_se_hash, see_set_prop_merged_def,
3053 (PTR) (stn->value));
3055 /* Continue to the next definition. */
3056 return 0;
3060 /* Phase 2 top level function.
3061 In this phase, we try to merge def extensions and use extensions with their
3062 references, and eliminate redundant extensions in the same basic block.
3063 We also gather information for the next phases. */
3065 static void
3066 see_merge_and_eliminate_extensions (void)
3068 int i = 0;
3070 if (dump_file)
3071 fprintf (dump_file,
3072 "* Phase 2: Merge and eliminate locally redundant extensions. *\n");
3074 /* Traverse over all the splay trees of the basic blocks. */
3075 for (i = 0; i < last_bb; i++)
3077 if (see_bb_splay_ar[i])
3079 if (dump_file)
3080 fprintf (dump_file, "Handling references for bb %d\n", i);
3081 /* Traverse over all the references in the basic block in forward
3082 order. */
3083 splay_tree_foreach (see_bb_splay_ar[i],
3084 see_handle_extensions_for_one_ref, NULL);
3090 /* Phase 1 implementation: Propagate extensions to uses. */
3092 /* Insert REF_INSN into the splay tree of its basic block.
3093 SE_INSN is the extension to store in the proper hash according to TYPE.
3095 Return true if everything went well.
3096 Otherwise, return false (this will cause the optimization to be aborted). */
3098 static bool
3099 see_store_reference_and_extension (rtx ref_insn, rtx se_insn,
3100 enum extension_type type)
3102 rtx *rtx_slot;
3103 int curr_bb_num;
3104 splay_tree_node stn = NULL;
3105 htab_t se_hash = NULL;
3106 struct see_ref_s *ref_s = NULL;
3108 /* Check the arguments. */
3109 gcc_assert (ref_insn && se_insn);
3110 if (!see_bb_splay_ar)
3111 return false;
3113 curr_bb_num = BLOCK_NUM (ref_insn);
3114 gcc_assert (curr_bb_num < last_bb && curr_bb_num >= 0);
3116 /* Insert the reference to the splay tree of its basic block. */
3117 if (!see_bb_splay_ar[curr_bb_num])
3118 /* The splay tree for this block doesn't exist yet, create it. */
3119 see_bb_splay_ar[curr_bb_num] = splay_tree_new (splay_tree_compare_ints,
3120 NULL, see_free_ref_s);
3121 else
3122 /* Splay tree already exists, check if the current reference is already
3123 in it. */
3125 stn = splay_tree_lookup (see_bb_splay_ar[curr_bb_num],
3126 DF_INSN_LUID (ref_insn));
3127 if (stn)
3128 switch (type)
3130 case EXPLICIT_DEF_EXTENSION:
3131 se_hash =
3132 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
3133 if (!se_hash)
3135 se_hash = htab_create (10,
3136 hash_descriptor_extension,
3137 eq_descriptor_extension,
3138 NULL);
3139 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash =
3140 se_hash;
3142 break;
3143 case IMPLICIT_DEF_EXTENSION:
3144 se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
3145 if (!se_hash)
3147 se_hash = htab_create (10,
3148 hash_descriptor_extension,
3149 eq_descriptor_extension,
3150 NULL);
3151 ((struct see_ref_s *) (stn->value))->merged_def_se_hash =
3152 se_hash;
3154 break;
3155 case USE_EXTENSION:
3156 se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
3157 if (!se_hash)
3159 se_hash = htab_create (10,
3160 hash_descriptor_extension,
3161 eq_descriptor_extension,
3162 NULL);
3163 ((struct see_ref_s *) (stn->value))->use_se_hash = se_hash;
3165 break;
3166 default:
3167 gcc_unreachable ();
3171 /* Initialize a new see_ref_s structure and insert it to the splay
3172 tree. */
3173 if (!stn)
3175 ref_s = XNEW (struct see_ref_s);
3176 ref_s->luid = DF_INSN_LUID (ref_insn);
3177 ref_s->insn = ref_insn;
3178 ref_s->merged_insn = NULL;
3180 /* Initialize the hashes. */
3181 switch (type)
3183 case EXPLICIT_DEF_EXTENSION:
3184 ref_s->unmerged_def_se_hash = htab_create (10,
3185 hash_descriptor_extension,
3186 eq_descriptor_extension,
3187 NULL);
3188 se_hash = ref_s->unmerged_def_se_hash;
3189 ref_s->merged_def_se_hash = NULL;
3190 ref_s->use_se_hash = NULL;
3191 break;
3192 case IMPLICIT_DEF_EXTENSION:
3193 ref_s->merged_def_se_hash = htab_create (10,
3194 hash_descriptor_extension,
3195 eq_descriptor_extension,
3196 NULL);
3197 se_hash = ref_s->merged_def_se_hash;
3198 ref_s->unmerged_def_se_hash = NULL;
3199 ref_s->use_se_hash = NULL;
3200 break;
3201 case USE_EXTENSION:
3202 ref_s->use_se_hash = htab_create (10,
3203 hash_descriptor_extension,
3204 eq_descriptor_extension,
3205 NULL);
3206 se_hash = ref_s->use_se_hash;
3207 ref_s->unmerged_def_se_hash = NULL;
3208 ref_s->merged_def_se_hash = NULL;
3209 break;
3210 default:
3211 gcc_unreachable ();
3215 /* Insert the new extension instruction into the correct se_hash of the
3216 current reference. */
3217 rtx_slot = (rtx *) htab_find_slot (se_hash, se_insn, INSERT);
3218 if (*rtx_slot != NULL)
3220 gcc_assert (type == USE_EXTENSION);
3221 gcc_assert (rtx_equal_p (PATTERN (*rtx_slot), PATTERN (se_insn)));
3223 else
3224 *rtx_slot = se_insn;
3226 /* If this is a new reference, insert it into the splay_tree. */
3227 if (!stn)
3228 splay_tree_insert (see_bb_splay_ar[curr_bb_num],
3229 DF_INSN_LUID (ref_insn), (splay_tree_value) ref_s);
3230 return true;
3234 /* Go over all the defs, for each relevant definition (defined below) store its
3235 instruction as a reference.
3237 A definition is relevant if its root has
3238 ((entry_type == SIGN_EXTENDED_DEF) || (entry_type == ZERO_EXTENDED_DEF)) and
3239 his source_mode is not narrower then the roots source_mode.
3241 Return the number of relevant defs or negative number if something bad had
3242 happened and the optimization should be aborted. */
3244 static int
3245 see_handle_relevant_defs (df_ref ref, rtx insn)
3247 struct web_entry *root_entry = NULL;
3248 rtx se_insn = NULL;
3249 enum entry_type extension_code;
3250 rtx reg = DF_REF_REAL_REG (ref);
3251 rtx ref_insn = NULL;
3252 unsigned int i = DF_REF_ID (ref);
3254 root_entry = unionfind_root (&def_entry[DF_REF_ID (ref)]);
3256 if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF
3257 && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF)
3258 /* The current web is not relevant. Continue to the next def. */
3259 return 0;
3261 if (root_entry->reg)
3262 /* It isn't possible to have two different register for the same
3263 web. */
3264 gcc_assert (rtx_equal_p (root_entry->reg, reg));
3265 else
3266 root_entry->reg = reg;
3268 /* The current definition is an EXTENDED_DEF or a definition that its
3269 source_mode is narrower then its web's source_mode.
3270 This means that we need to generate the implicit extension explicitly
3271 and store it in the current reference's merged_def_se_hash. */
3272 if (ENTRY_EI (&def_entry[i])->local_relevancy == EXTENDED_DEF
3273 || (ENTRY_EI (&def_entry[i])->local_source_mode <
3274 ENTRY_EI (root_entry)->source_mode))
3277 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF)
3278 extension_code = SIGN_EXTENDED_DEF;
3279 else
3280 extension_code = ZERO_EXTENDED_DEF;
3282 se_insn =
3283 see_gen_normalized_extension (reg, extension_code,
3284 ENTRY_EI (root_entry)->source_mode);
3286 /* This is a dummy extension, mark it as deleted. */
3287 INSN_DELETED_P (se_insn) = 1;
3289 if (!see_store_reference_and_extension (insn, se_insn,
3290 IMPLICIT_DEF_EXTENSION))
3291 /* Something bad happened. Abort the optimization. */
3292 return -1;
3293 return 1;
3296 ref_insn = PREV_INSN (insn);
3297 gcc_assert (BLOCK_NUM (ref_insn) == BLOCK_NUM (insn));
3299 if (!see_store_reference_and_extension (ref_insn, insn,
3300 EXPLICIT_DEF_EXTENSION))
3301 /* Something bad happened. Abort the optimization. */
3302 return -1;
3304 return 0;
3307 /* Go over all the uses, for each use in relevant web store its instruction as
3308 a reference and generate an extension before it.
3310 Return the number of relevant uses or negative number if something bad had
3311 happened and the optimization should be aborted. */
3313 static int
3314 see_handle_relevant_uses (df_ref ref, rtx insn)
3316 struct web_entry *root_entry = NULL;
3317 rtx se_insn = NULL;
3318 enum entry_type extension_code;
3319 rtx reg = DF_REF_REAL_REG (ref);
3321 root_entry = unionfind_root (&use_entry[DF_REF_ID (ref)]);
3323 if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF
3324 && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF)
3325 /* The current web is not relevant. Continue to the next use. */
3326 return 0;
3328 if (root_entry->reg)
3329 /* It isn't possible to have two different register for the same
3330 web. */
3331 gcc_assert (rtx_equal_p (root_entry->reg, reg));
3332 else
3333 root_entry->reg = reg;
3335 /* Generate the use extension. */
3336 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF)
3337 extension_code = SIGN_EXTENDED_DEF;
3338 else
3339 extension_code = ZERO_EXTENDED_DEF;
3341 se_insn =
3342 see_gen_normalized_extension (reg, extension_code,
3343 ENTRY_EI (root_entry)->source_mode);
3344 if (!se_insn)
3345 /* This is very bad, abort the transformation. */
3346 return -1;
3348 if (!see_store_reference_and_extension (insn, se_insn,
3349 USE_EXTENSION))
3350 /* Something bad happened. Abort the optimization. */
3351 return -1;
3352 return 1;
3355 static int
3356 see_handle_relevant_refs (void)
3358 int num_relevant_refs = 0;
3359 basic_block bb;
3361 FOR_ALL_BB (bb)
3363 rtx insn;
3364 FOR_BB_INSNS (bb, insn)
3366 unsigned int uid = INSN_UID (insn);
3368 if (INSN_P (insn))
3370 df_ref *use_rec;
3371 df_ref *def_rec;
3373 for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
3375 df_ref use = *use_rec;
3376 int result = see_handle_relevant_uses (use, insn);
3377 if (result == -1)
3378 return -1;
3379 num_relevant_refs += result;
3381 for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++)
3383 df_ref use = *use_rec;
3384 int result = see_handle_relevant_uses (use, insn);
3385 if (result == -1)
3386 return -1;
3387 num_relevant_refs += result;
3389 for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
3391 df_ref def = *def_rec;
3392 int result = see_handle_relevant_defs (def, insn);
3393 if (result == -1)
3394 return -1;
3395 num_relevant_refs += result;
3400 return num_relevant_refs;
3404 /* Initialized the use_entry field for REF in INSN at INDEX with ET. */
3406 static void
3407 see_update_uses_relevancy (rtx insn, df_ref ref,
3408 enum entry_type et, unsigned int index)
3410 struct see_entry_extra_info *curr_entry_extra_info;
3412 if (dump_file)
3414 rtx reg = DF_REF_REAL_REG (ref);
3415 fprintf (dump_file, "u%i insn %i reg %i ",
3416 index, (insn ? INSN_UID (insn) : -1), REGNO (reg));
3417 if (et == NOT_RELEVANT)
3418 fprintf (dump_file, "NOT RELEVANT \n");
3419 else
3420 fprintf (dump_file, "RELEVANT USE \n");
3423 DF_REF_ID (ref) = index;
3424 curr_entry_extra_info = XNEW (struct see_entry_extra_info);
3425 curr_entry_extra_info->relevancy = et;
3426 curr_entry_extra_info->local_relevancy = et;
3427 use_entry[index].extra_info = curr_entry_extra_info;
3428 use_entry[index].reg = NULL;
3429 use_entry[index].pred = NULL;
3433 /* A definition in a candidate for this optimization only if its pattern is
3434 recognized as relevant in this function.
3435 INSN is the instruction to be recognized.
3437 - If this is the pattern of a common sign extension after definition:
3438 PREV_INSN (INSN): def (reg:NARROWmode r)
3439 INSN: set ((reg:WIDEmode r')
3440 (sign_extend:WIDEmode (reg:NARROWmode r)))
3441 return SIGN_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3443 - If this is the pattern of a common zero extension after definition:
3444 PREV_INSN (INSN): def (reg:NARROWmode r)
3445 INSN: set ((reg:WIDEmode r')
3446 (zero_extend:WIDEmode (reg:NARROWmode r)))
3447 return ZERO_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3449 - Otherwise,
3451 For the pattern:
3452 INSN: set ((reg:WIDEmode r) (sign_extend:WIDEmode (...expr...)))
3453 return EXTENDED_DEF and set SOURCE_MODE to the mode of expr.
3455 For the pattern:
3456 INSN: set ((reg:WIDEmode r) (zero_extend:WIDEmode (...expr...)))
3457 return EXTENDED_DEF and set SOURCE_MODE_UNSIGNED to the mode of expr.
3459 For the pattern:
3460 INSN: set ((reg:WIDEmode r) (CONST_INT (...)))
3461 return EXTENDED_DEF and set SOURCE_MODE(_UNSIGNED) to the narrowest mode that
3462 is implicitly sign(zero) extended to WIDEmode in the INSN.
3464 - FIXME: Extensions that are not adjacent to their definition and EXTENDED_DEF
3465 that is part of a PARALLEL instruction are not handled.
3466 These restriction can be relaxed. */
3468 static enum entry_type
3469 see_analyze_one_def (rtx insn, enum machine_mode *source_mode,
3470 enum machine_mode *source_mode_unsigned)
3472 enum entry_type extension_code;
3473 rtx rhs = NULL;
3474 rtx lhs = NULL;
3475 rtx set = NULL;
3476 rtx source_register = NULL;
3477 rtx prev_insn = NULL;
3478 rtx next_insn = NULL;
3479 enum machine_mode mode;
3480 enum machine_mode next_source_mode;
3481 HOST_WIDE_INT val = 0;
3482 HOST_WIDE_INT val2 = 0;
3483 int i = 0;
3485 *source_mode = MAX_MACHINE_MODE;
3486 *source_mode_unsigned = MAX_MACHINE_MODE;
3488 extension_code = see_get_extension_data (insn, source_mode);
3489 switch (extension_code)
3491 case SIGN_EXTENDED_DEF:
3492 case ZERO_EXTENDED_DEF:
3493 source_register = see_get_extension_reg (insn, 0);
3494 /* FIXME: This restriction can be relaxed. The only thing that is
3495 important is that the reference would be inside the same basic block
3496 as the extension. */
3497 prev_insn = PREV_INSN (insn);
3498 if (!prev_insn || !INSN_P (prev_insn))
3499 return NOT_RELEVANT;
3501 if (!reg_set_between_p (source_register, PREV_INSN (prev_insn), insn))
3502 return NOT_RELEVANT;
3504 /* If we can't use copy_rtx on the reference it can't be a reference. */
3505 if (GET_CODE (PATTERN (prev_insn)) == PARALLEL
3506 && asm_noperands (PATTERN (prev_insn)) >= 0)
3507 return NOT_RELEVANT;
3509 /* Now, check if this extension is a reference itself. If so, it is not
3510 relevant. Handling this extension as relevant would make things much
3511 more complicated. */
3512 next_insn = NEXT_INSN (insn);
3513 if (next_insn
3514 && INSN_P (next_insn)
3515 && (see_get_extension_data (next_insn, &next_source_mode) !=
3516 NOT_RELEVANT))
3518 rtx curr_dest_register = see_get_extension_reg (insn, 1);
3519 rtx next_source_register = see_get_extension_reg (next_insn, 0);
3521 if (REGNO (curr_dest_register) == REGNO (next_source_register))
3522 return NOT_RELEVANT;
3525 return extension_code;
3527 case NOT_RELEVANT:
3528 /* This may still be an EXTENDED_DEF. */
3530 /* FIXME: This restriction can be relaxed. It is possible to handle
3531 PARALLEL insns too. */
3532 set = single_set (insn);
3533 if (!set)
3534 return NOT_RELEVANT;
3535 rhs = SET_SRC (set);
3536 lhs = SET_DEST (set);
3538 /* Don't handle extensions to something other then register or
3539 subregister. */
3540 if (!REG_P (lhs) && GET_CODE (lhs) != SUBREG)
3541 return NOT_RELEVANT;
3543 switch (GET_CODE (rhs))
3545 case SIGN_EXTEND:
3546 *source_mode = GET_MODE (XEXP (rhs, 0));
3547 *source_mode_unsigned = MAX_MACHINE_MODE;
3548 return EXTENDED_DEF;
3549 case ZERO_EXTEND:
3550 *source_mode = MAX_MACHINE_MODE;
3551 *source_mode_unsigned = GET_MODE (XEXP (rhs, 0));
3552 return EXTENDED_DEF;
3553 case CONST_INT:
3555 val = INTVAL (rhs);
3557 /* Find the narrowest mode, val could fit into. */
3558 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT), i = 0;
3559 GET_MODE_BITSIZE (mode) < BITS_PER_WORD;
3560 mode = GET_MODE_WIDER_MODE (mode), i++)
3562 val2 = trunc_int_for_mode (val, mode);
3563 if (val2 == val && *source_mode == MAX_MACHINE_MODE)
3564 *source_mode = mode;
3565 if (val == (val & (HOST_WIDE_INT)GET_MODE_MASK (mode))
3566 && *source_mode_unsigned == MAX_MACHINE_MODE)
3567 *source_mode_unsigned = mode;
3568 if (*source_mode != MAX_MACHINE_MODE
3569 && *source_mode_unsigned !=MAX_MACHINE_MODE)
3570 return EXTENDED_DEF;
3572 if (*source_mode != MAX_MACHINE_MODE
3573 || *source_mode_unsigned !=MAX_MACHINE_MODE)
3574 return EXTENDED_DEF;
3575 return NOT_RELEVANT;
3576 default:
3577 return NOT_RELEVANT;
3579 default:
3580 gcc_unreachable ();
3585 /* Initialized the def_entry field for REF in INSN at INDEX with ET. */
3587 static void
3588 see_update_defs_relevancy (rtx insn, df_ref ref,
3589 enum entry_type et,
3590 enum machine_mode source_mode,
3591 enum machine_mode source_mode_unsigned,
3592 unsigned int index)
3594 struct see_entry_extra_info *curr_entry_extra_info
3595 = XNEW (struct see_entry_extra_info);
3596 curr_entry_extra_info->relevancy = et;
3597 curr_entry_extra_info->local_relevancy = et;
3599 DF_REF_ID (ref) = index;
3601 if (et != EXTENDED_DEF)
3603 curr_entry_extra_info->source_mode = source_mode;
3604 curr_entry_extra_info->local_source_mode = source_mode;
3606 else
3608 curr_entry_extra_info->source_mode_signed = source_mode;
3609 curr_entry_extra_info->source_mode_unsigned = source_mode_unsigned;
3611 def_entry[index].extra_info = curr_entry_extra_info;
3612 def_entry[index].reg = NULL;
3613 def_entry[index].pred = NULL;
3615 if (dump_file)
3617 rtx reg = DF_REF_REAL_REG (ref);
3618 if (et == NOT_RELEVANT)
3620 fprintf (dump_file, "d%i insn %i reg %i ",
3621 index, (insn ? INSN_UID (insn) : -1), REGNO (reg));
3622 fprintf (dump_file, "NOT RELEVANT \n");
3624 else
3626 fprintf (dump_file, "d%i insn %i reg %i ",
3627 index, INSN_UID (insn), REGNO (reg));
3628 fprintf (dump_file, "RELEVANT - ");
3629 switch (et)
3631 case SIGN_EXTENDED_DEF :
3632 fprintf (dump_file, "SIGN_EXTENDED_DEF, source_mode = %s\n",
3633 GET_MODE_NAME (source_mode));
3634 break;
3635 case ZERO_EXTENDED_DEF :
3636 fprintf (dump_file, "ZERO_EXTENDED_DEF, source_mode = %s\n",
3637 GET_MODE_NAME (source_mode));
3638 break;
3639 case EXTENDED_DEF :
3640 fprintf (dump_file, "EXTENDED_DEF, ");
3641 if (source_mode != MAX_MACHINE_MODE
3642 && source_mode_unsigned != MAX_MACHINE_MODE)
3644 fprintf (dump_file, "positive const, ");
3645 fprintf (dump_file, "source_mode_signed = %s, ",
3646 GET_MODE_NAME (source_mode));
3647 fprintf (dump_file, "source_mode_unsigned = %s\n",
3648 GET_MODE_NAME (source_mode_unsigned));
3650 else if (source_mode != MAX_MACHINE_MODE)
3651 fprintf (dump_file, "source_mode_signed = %s\n",
3652 GET_MODE_NAME (source_mode));
3653 else
3654 fprintf (dump_file, "source_mode_unsigned = %s\n",
3655 GET_MODE_NAME (source_mode_unsigned));
3656 break;
3657 default :
3658 gcc_unreachable ();
3665 /* Updates the relevancy of all the uses and all defs.
3667 The information of the u'th use is stored in use_entry[u] and the
3668 information of the d'th definition is stored in def_entry[d].
3670 Currently all the uses are relevant for the optimization except for
3671 uses that are in LIBCALL or RETVAL instructions. */
3673 static void
3674 see_update_relevancy (void)
3676 unsigned int d = 0;
3677 unsigned int u = 0;
3678 enum entry_type et;
3679 enum machine_mode source_mode;
3680 enum machine_mode source_mode_unsigned;
3681 basic_block bb;
3683 if (!def_entry)
3684 return;
3686 FOR_ALL_BB (bb)
3688 df_ref *use_rec;
3689 df_ref *def_rec;
3690 rtx insn;
3691 FOR_BB_INSNS (bb, insn)
3693 unsigned int uid = INSN_UID (insn);
3694 if (INSN_P (insn))
3696 et = RELEVANT_USE;
3698 for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
3700 df_ref use = *use_rec;
3701 see_update_uses_relevancy (insn, use, et, u);
3702 u++;
3705 for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++)
3707 df_ref use = *use_rec;
3708 see_update_uses_relevancy (insn, use, et, u);
3709 u++;
3712 et = see_analyze_one_def (insn, &source_mode, &source_mode_unsigned);
3713 for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
3715 df_ref def = *def_rec;
3716 see_update_defs_relevancy (insn, def, et, source_mode,
3717 source_mode_unsigned, d);
3718 d++;
3723 for (use_rec = df_get_artificial_uses (bb->index); *use_rec; use_rec++)
3725 df_ref use = *use_rec;
3726 see_update_uses_relevancy (NULL, use, NOT_RELEVANT, u);
3727 u++;
3730 for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
3732 df_ref def = *def_rec;
3733 see_update_defs_relevancy (NULL, def, NOT_RELEVANT,
3734 MAX_MACHINE_MODE, MAX_MACHINE_MODE, d);
3735 d++;
3741 /* Phase 1 top level function.
3742 In this phase the relevancy of all the definitions and uses are checked,
3743 later the webs are produces and the extensions are generated.
3744 These extensions are not emitted yet into the insns stream.
3746 returns true if at list one relevant web was found and there were no
3747 problems, otherwise return false. */
3749 static bool
3750 see_propagate_extensions_to_uses (void)
3752 int num_relevant_refs;
3753 basic_block bb;
3755 if (dump_file)
3756 fprintf (dump_file,
3757 "* Phase 1: Propagate extensions to uses. *\n");
3759 /* Update the relevancy of references using the DF object. */
3760 see_update_relevancy ();
3762 /* Produce the webs and update the extra_info of the root.
3763 In general, a web is relevant if all its definitions and uses are relevant
3764 and there is at least one definition that was marked as SIGN_EXTENDED_DEF
3765 or ZERO_EXTENDED_DEF. */
3766 FOR_ALL_BB (bb)
3768 rtx insn;
3769 df_ref *use_rec;
3771 FOR_BB_INSNS (bb, insn)
3773 unsigned int uid = INSN_UID (insn);
3774 if (INSN_P (insn))
3776 for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
3778 df_ref use = *use_rec;
3779 union_defs (use, def_entry, use_entry, see_update_leader_extra_info);
3782 for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++)
3784 df_ref use = *use_rec;
3785 union_defs (use, def_entry, use_entry, see_update_leader_extra_info);
3790 for (use_rec = df_get_artificial_uses (bb->index); *use_rec; use_rec++)
3792 df_ref use = *use_rec;
3793 union_defs (use, def_entry, use_entry, see_update_leader_extra_info);
3797 /* Generate use extensions for references and insert these
3798 references to see_bb_splay_ar data structure. */
3799 num_relevant_refs = see_handle_relevant_refs ();
3801 return num_relevant_refs > 0;
3805 /* Main entry point for the sign extension elimination optimization. */
3807 static void
3808 see_main (void)
3810 bool cont = false;
3811 int i = 0;
3813 /* Initialize global data structures. */
3814 see_initialize_data_structures ();
3816 /* Phase 1: Propagate extensions to uses. */
3817 cont = see_propagate_extensions_to_uses ();
3819 if (cont)
3821 init_recog ();
3823 /* Phase 2: Merge and eliminate locally redundant extensions. */
3824 see_merge_and_eliminate_extensions ();
3826 /* Phase 3: Eliminate globally redundant extensions. */
3827 see_execute_LCM ();
3829 /* Phase 4: Commit changes to the insn stream. */
3830 see_commit_changes ();
3832 if (dump_file)
3834 /* For debug purpose only. */
3835 fprintf (dump_file, "see_pre_extension_hash:\n");
3836 htab_traverse (see_pre_extension_hash, see_print_pre_extension_expr,
3837 NULL);
3839 for (i = 0; i < last_bb; i++)
3841 if (see_bb_hash_ar[i])
3842 /* Traverse over all the references in the basic block in
3843 forward order. */
3845 fprintf (dump_file,
3846 "Searching register properties in bb %d\n", i);
3847 htab_traverse (see_bb_hash_ar[i],
3848 see_print_register_properties, NULL);
3854 /* Free global data structures. */
3855 see_free_data_structures ();
3859 static bool
3860 gate_handle_see (void)
3862 return optimize > 1 && flag_see;
3865 static unsigned int
3866 rest_of_handle_see (void)
3868 see_main ();
3869 df_clear_flags (DF_DEFER_INSN_RESCAN);
3870 df_process_deferred_rescans ();
3871 run_fast_dce ();
3872 return 0;
3875 struct rtl_opt_pass pass_see =
3878 RTL_PASS,
3879 "see", /* name */
3880 gate_handle_see, /* gate */
3881 rest_of_handle_see, /* execute */
3882 NULL, /* sub */
3883 NULL, /* next */
3884 0, /* static_pass_number */
3885 TV_SEE, /* tv_id */
3886 0, /* properties_required */
3887 0, /* properties_provided */
3888 0, /* properties_destroyed */
3889 0, /* todo_flags_start */
3890 TODO_df_verify |
3891 TODO_df_finish | TODO_verify_rtl_sharing |
3892 TODO_dump_func /* todo_flags_finish */