1 /* Multiply table generator for tile.
2 Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 Contributed by Walter Lee (walt@tilera.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This program creates a table used to compile multiply by constant
24 This program should be compiled by a c++ compiler. If it's
25 compiled with -DTILEPRO, it generates the multiply table for
26 TILEPro; otherwise it generates the multiply table for TILE-Gx.
27 Running the program produces the table in stdout.
29 The program works by generating every possible combination of up to
30 MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left
31 shift) and computing the multiplier computed by those instructions.
39 There are usually multiple instruction sequences to multiply by a
40 given constant. This program keeps only the cheapest one.
41 "Cheapest" is defined first by the minimum theoretical schedule
42 length, and if those are equal then by the number of instructions,
43 and if those are equal then by which instructions we "prefer"
44 (e.g. because we think one might take infinitesimally less power
45 than another, or simply because we arbitrarily pick one to be more
48 Once this program has determined the best instruction sequence for
49 each multiplier, it emits them in a table sorted by the multiplier
50 so the user can binary-search it to look for a match. The table is
51 pruned based on various criteria to keep its sizes reasonable. */
57 #define __STDC_LIMIT_MACROS
64 /* The string representing the architecture. */
65 #define ARCH "tilepro"
67 /* The type for the multiplication. */
72 /* The string representing the architecture. */
75 /* The type for the multiplication. */
76 typedef long long MUL_TYPE
;
80 /* Longest instruction sequence this will produce. With the current
81 stupid algorithm runtime grows very quickly with this number. */
82 #define MAX_INSTRUCTIONS 4
84 /* Maximum number of subexpressions in the expression DAG being
85 generated. This is the same as the number of instructions, except
86 that zero and the original register we'd like to multiply by a
87 constant are also thrown into the mix. */
88 #define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS)
90 #define MIN(x, y) ((x) <= (y) ? (x) : (y))
91 #define MAX(x, y) ((x) >= (y) ? (x) : (y))
93 /* For this program a unary op is one which has only one nonconstant
94 operand. So shift left by 5 is considered unary. */
95 typedef MUL_TYPE (*unary_op_func
) (MUL_TYPE
);
96 typedef MUL_TYPE (*binary_op_func
) (MUL_TYPE
, MUL_TYPE
);
98 /* This describes an operation like 'add two registers' or 'left-shift
101 We call something a 'unary' operator if it only takes in one
102 register as input, even though it might have an implicit second
103 constant operand. Currently this is used for left-shift by
108 /* Construct for a binary operator. */
109 Operator (const char *pattern
, const char *name
, binary_op_func func
,
111 : m_pattern (pattern
), m_name (name
), m_top_index (-1),
112 m_unary_func (0), m_binary_func (func
), m_cost (cost
),
117 /* Construct for a unary operator. */
118 Operator (const char *pattern
, const char *name
, unary_op_func func
,
119 int rhs_if_unary
, int cost
)
120 : m_pattern (pattern
), m_name (name
), m_top_index (-1),
121 m_unary_func (func
), m_binary_func (0), m_cost (cost
),
122 m_rhs_if_unary (rhs_if_unary
)
126 bool is_unary () const
128 return m_binary_func
== NULL
;
131 /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3. */
132 const char *m_pattern
;
134 /* Name of the opcode for this operation, e.g. add. */
137 /* We don't have enough bits in our output representation to store
138 the original insn_code value, so we store a compressed form
139 instead. These values are decoded back into insn_code via the
140 machine-generated multiply_insn_seq_decode_opcode lookup
144 /* Unary operator to apply, or NULL if this is a binary operator. */
145 unary_op_func m_unary_func
;
147 /* Binary operator to apply, or NULL if this is a unary operator. */
148 binary_op_func m_binary_func
;
150 /* Function of how expensive we consider this operator. Higher is
154 /* the RHS value to write into the C file if unary; used for shift
160 /* An entry in an expression DAG. */
164 Expr () : m_op (NULL
), m_lhs (0), m_rhs (0), m_produced_val (0),
165 m_critical_path_length (0)
169 /* The operator being applied to the operands. */
170 const Operator
*m_op
;
172 /* The index of the left-hand operand in the array of subexpressions
176 /* For binary ops ,this is the index of the left-hand operand in the
177 array of subexpressions already computed. For unary ops, it is
178 specific to the op (e.g. it might hold a constant shift
182 /* The multiplier produced by this expression tree. For example, for
183 the tree ((x << 5) + x), the value would be 33. */
184 MUL_TYPE m_produced_val
;
186 /* How far is this expression from the root, i.e. how many cycles
187 minimum will it take to compute this? */
188 int m_critical_path_length
;
192 /* Make function pointers for the various linear operators we can
193 apply to compute a multiplicative value. */
196 add (MUL_TYPE n1
, MUL_TYPE n2
)
202 sub (MUL_TYPE n1
, MUL_TYPE n2
)
208 s1a (MUL_TYPE n1
, MUL_TYPE n2
)
214 s2a (MUL_TYPE n1
, MUL_TYPE n2
)
220 s3a (MUL_TYPE n1
, MUL_TYPE n2
)
225 #define SHIFT(count) \
227 shift##count(MUL_TYPE n) \
229 return n << (count); \
299 static Operator ops
[] = {
300 Operator ("CODE_FOR_addsi3", "add", add
, 1040),
301 Operator ("CODE_FOR_subsi3", "sub", sub
, 1041),
302 Operator ("CODE_FOR_insn_s1a", "s1a", s1a
, 1042),
303 Operator ("CODE_FOR_insn_s2a", "s2a", s2a
, 1043),
304 Operator ("CODE_FOR_insn_s3a", "s3a", s3a
, 1044),
305 /* Note: shl by 1 is not necessary, since adding a value to itself
306 produces the same result. But the architecture team thinks
307 left-shift may use slightly less power, so we might as well
309 Operator ("CODE_FOR_ashlsi3", "shli", shift1
, 1, 1001),
310 Operator ("CODE_FOR_ashlsi3", "shli", shift2
, 2, 1002),
311 Operator ("CODE_FOR_ashlsi3", "shli", shift3
, 3, 1003),
312 Operator ("CODE_FOR_ashlsi3", "shli", shift4
, 4, 1004),
313 Operator ("CODE_FOR_ashlsi3", "shli", shift5
, 5, 1005),
314 Operator ("CODE_FOR_ashlsi3", "shli", shift6
, 6, 1006),
315 Operator ("CODE_FOR_ashlsi3", "shli", shift7
, 7, 1007),
316 Operator ("CODE_FOR_ashlsi3", "shli", shift8
, 8, 1008),
317 Operator ("CODE_FOR_ashlsi3", "shli", shift9
, 9, 1009),
318 Operator ("CODE_FOR_ashlsi3", "shli", shift10
, 10, 1010),
319 Operator ("CODE_FOR_ashlsi3", "shli", shift11
, 11, 1011),
320 Operator ("CODE_FOR_ashlsi3", "shli", shift12
, 12, 1012),
321 Operator ("CODE_FOR_ashlsi3", "shli", shift13
, 13, 1013),
322 Operator ("CODE_FOR_ashlsi3", "shli", shift14
, 14, 1014),
323 Operator ("CODE_FOR_ashlsi3", "shli", shift15
, 15, 1015),
324 Operator ("CODE_FOR_ashlsi3", "shli", shift16
, 16, 1016),
325 Operator ("CODE_FOR_ashlsi3", "shli", shift17
, 17, 1017),
326 Operator ("CODE_FOR_ashlsi3", "shli", shift18
, 18, 1018),
327 Operator ("CODE_FOR_ashlsi3", "shli", shift19
, 19, 1019),
328 Operator ("CODE_FOR_ashlsi3", "shli", shift20
, 20, 1020),
329 Operator ("CODE_FOR_ashlsi3", "shli", shift21
, 21, 1021),
330 Operator ("CODE_FOR_ashlsi3", "shli", shift22
, 22, 1022),
331 Operator ("CODE_FOR_ashlsi3", "shli", shift23
, 23, 1023),
332 Operator ("CODE_FOR_ashlsi3", "shli", shift24
, 24, 1024),
333 Operator ("CODE_FOR_ashlsi3", "shli", shift25
, 25, 1025),
334 Operator ("CODE_FOR_ashlsi3", "shli", shift26
, 26, 1026),
335 Operator ("CODE_FOR_ashlsi3", "shli", shift27
, 27, 1027),
336 Operator ("CODE_FOR_ashlsi3", "shli", shift28
, 28, 1028),
337 Operator ("CODE_FOR_ashlsi3", "shli", shift29
, 29, 1029),
338 Operator ("CODE_FOR_ashlsi3", "shli", shift30
, 30, 1030),
339 Operator ("CODE_FOR_ashlsi3", "shli", shift31
, 31, 1031)
342 static Operator ops
[] = {
343 Operator ("CODE_FOR_adddi3", "add", add
, 1070),
344 Operator ("CODE_FOR_subdi3", "sub", sub
, 1071),
345 Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a
, 1072),
346 Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a
, 1073),
347 Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a
, 1074),
348 // Note: shl by 1 is not necessary, since adding a value to itself
349 // produces the same result. But the architecture team thinks left-shift
350 // may use slightly less power, so we might as well prefer it.
351 Operator ("CODE_FOR_ashldi3", "shli", shift1
, 1, 1001),
352 Operator ("CODE_FOR_ashldi3", "shli", shift2
, 2, 1002),
353 Operator ("CODE_FOR_ashldi3", "shli", shift3
, 3, 1003),
354 Operator ("CODE_FOR_ashldi3", "shli", shift4
, 4, 1004),
355 Operator ("CODE_FOR_ashldi3", "shli", shift5
, 5, 1005),
356 Operator ("CODE_FOR_ashldi3", "shli", shift6
, 6, 1006),
357 Operator ("CODE_FOR_ashldi3", "shli", shift7
, 7, 1007),
358 Operator ("CODE_FOR_ashldi3", "shli", shift8
, 8, 1008),
359 Operator ("CODE_FOR_ashldi3", "shli", shift9
, 9, 1009),
360 Operator ("CODE_FOR_ashldi3", "shli", shift10
, 10, 1010),
361 Operator ("CODE_FOR_ashldi3", "shli", shift11
, 11, 1011),
362 Operator ("CODE_FOR_ashldi3", "shli", shift12
, 12, 1012),
363 Operator ("CODE_FOR_ashldi3", "shli", shift13
, 13, 1013),
364 Operator ("CODE_FOR_ashldi3", "shli", shift14
, 14, 1014),
365 Operator ("CODE_FOR_ashldi3", "shli", shift15
, 15, 1015),
366 Operator ("CODE_FOR_ashldi3", "shli", shift16
, 16, 1016),
367 Operator ("CODE_FOR_ashldi3", "shli", shift17
, 17, 1017),
368 Operator ("CODE_FOR_ashldi3", "shli", shift18
, 18, 1018),
369 Operator ("CODE_FOR_ashldi3", "shli", shift19
, 19, 1019),
370 Operator ("CODE_FOR_ashldi3", "shli", shift20
, 20, 1020),
371 Operator ("CODE_FOR_ashldi3", "shli", shift21
, 21, 1021),
372 Operator ("CODE_FOR_ashldi3", "shli", shift22
, 22, 1022),
373 Operator ("CODE_FOR_ashldi3", "shli", shift23
, 23, 1023),
374 Operator ("CODE_FOR_ashldi3", "shli", shift24
, 24, 1024),
375 Operator ("CODE_FOR_ashldi3", "shli", shift25
, 25, 1025),
376 Operator ("CODE_FOR_ashldi3", "shli", shift26
, 26, 1026),
377 Operator ("CODE_FOR_ashldi3", "shli", shift27
, 27, 1027),
378 Operator ("CODE_FOR_ashldi3", "shli", shift28
, 28, 1028),
379 Operator ("CODE_FOR_ashldi3", "shli", shift29
, 29, 1029),
380 Operator ("CODE_FOR_ashldi3", "shli", shift30
, 30, 1030),
381 Operator ("CODE_FOR_ashldi3", "shli", shift31
, 31, 1031),
382 Operator ("CODE_FOR_ashldi3", "shli", shift32
, 32, 1032),
383 Operator ("CODE_FOR_ashldi3", "shli", shift33
, 33, 1033),
384 Operator ("CODE_FOR_ashldi3", "shli", shift34
, 34, 1034),
385 Operator ("CODE_FOR_ashldi3", "shli", shift35
, 35, 1035),
386 Operator ("CODE_FOR_ashldi3", "shli", shift36
, 36, 1036),
387 Operator ("CODE_FOR_ashldi3", "shli", shift37
, 37, 1037),
388 Operator ("CODE_FOR_ashldi3", "shli", shift38
, 38, 1038),
389 Operator ("CODE_FOR_ashldi3", "shli", shift39
, 39, 1039),
390 Operator ("CODE_FOR_ashldi3", "shli", shift40
, 40, 1040),
391 Operator ("CODE_FOR_ashldi3", "shli", shift41
, 41, 1041),
392 Operator ("CODE_FOR_ashldi3", "shli", shift42
, 42, 1042),
393 Operator ("CODE_FOR_ashldi3", "shli", shift43
, 43, 1043),
394 Operator ("CODE_FOR_ashldi3", "shli", shift44
, 44, 1044),
395 Operator ("CODE_FOR_ashldi3", "shli", shift45
, 45, 1045),
396 Operator ("CODE_FOR_ashldi3", "shli", shift46
, 46, 1046),
397 Operator ("CODE_FOR_ashldi3", "shli", shift47
, 47, 1047),
398 Operator ("CODE_FOR_ashldi3", "shli", shift48
, 48, 1048),
399 Operator ("CODE_FOR_ashldi3", "shli", shift49
, 49, 1049),
400 Operator ("CODE_FOR_ashldi3", "shli", shift50
, 50, 1050),
401 Operator ("CODE_FOR_ashldi3", "shli", shift51
, 51, 1051),
402 Operator ("CODE_FOR_ashldi3", "shli", shift52
, 52, 1052),
403 Operator ("CODE_FOR_ashldi3", "shli", shift53
, 53, 1053),
404 Operator ("CODE_FOR_ashldi3", "shli", shift54
, 54, 1054),
405 Operator ("CODE_FOR_ashldi3", "shli", shift55
, 55, 1055),
406 Operator ("CODE_FOR_ashldi3", "shli", shift56
, 56, 1056),
407 Operator ("CODE_FOR_ashldi3", "shli", shift57
, 57, 1057),
408 Operator ("CODE_FOR_ashldi3", "shli", shift58
, 58, 1058),
409 Operator ("CODE_FOR_ashldi3", "shli", shift59
, 59, 1059),
410 Operator ("CODE_FOR_ashldi3", "shli", shift60
, 60, 1060),
411 Operator ("CODE_FOR_ashldi3", "shli", shift61
, 61, 1061),
412 Operator ("CODE_FOR_ashldi3", "shli", shift62
, 62, 1062),
413 Operator ("CODE_FOR_ashldi3", "shli", shift63
, 63, 1063)
417 /* An ExpressionTree is an expression DAG. */
421 ExpressionTree () : m_num_vals (2)
423 m_exprs
[0].m_produced_val
= 0;
424 m_exprs
[1].m_produced_val
= 1;
427 void copy_technique_from (ExpressionTree
* other
)
429 /* TODO: write this; other can compute the same products with less
430 cost. We update this ExpressionTree object because some int is
431 already mapped to it. */
435 Expr m_exprs
[MAX_SUBEXPRS
];
440 for (int j
= 2; j
< m_num_vals
; j
++)
441 cost
+= m_exprs
[j
].m_op
->m_cost
;
442 return cost
+ m_exprs
[m_num_vals
- 1].m_critical_path_length
* 1000000;
447 typedef std::map
<MUL_TYPE
, ExpressionTree
*> ExpressionTreeMap
;
451 find_sequences (ExpressionTree
&s
, ExpressionTreeMap
&best_solution
)
453 /* Don't look more if we can't add any new values to the expression
455 const int num_vals
= s
.m_num_vals
;
456 if (num_vals
== MAX_SUBEXPRS
)
459 /* Grow array to make room for new values added as we loop. */
460 s
.m_num_vals
= num_vals
+ 1;
462 const Operator
*const prev_op
= s
.m_exprs
[num_vals
- 1].m_op
;
463 const int prev_top_index
= (prev_op
!= NULL
) ? prev_op
->m_top_index
: -1;
465 for (size_t f
= 0; f
< sizeof ops
/ sizeof ops
[0]; f
++)
467 const Operator
*const op
= &ops
[f
];
469 for (int i
= 0; i
< num_vals
; i
++)
471 /* Only allow zero as the first operand to sub, otherwise
473 if (i
== 0 && op
->m_binary_func
!= sub
)
476 /* Unary ops don't actually use the second operand, so as a
477 big hack we trick it into only looping once in the inner
479 const int j_end
= op
->is_unary () ? 2 : num_vals
;
481 /* We never allow zero as the second operand, as it is
483 for (int j
= 1; j
< j_end
; j
++)
485 /* Does this expression use the immediately previous
487 const bool uses_prev_value
=
489 || (!op
->is_unary () && j
== num_vals
- 1));
491 if (!uses_prev_value
)
493 /* For search efficiency, prune redundant
494 instruction orderings.
496 This op does not take the immediately preceding
497 value as input, which means we could have done it
498 in the previous slot. If its opcode is less than
499 the previous instruction's, we'll declare this
500 instruction order non-canonical and not pursue
501 this branch of the search. */
502 if (op
->m_top_index
< prev_top_index
)
509 n
= op
->m_unary_func (s
.m_exprs
[i
].m_produced_val
);
513 n
= op
->m_binary_func (s
.m_exprs
[i
].m_produced_val
,
514 s
.m_exprs
[j
].m_produced_val
);
517 bool duplicate
= false;
518 for (int k
= 0; k
< num_vals
; k
++)
519 if (n
== s
.m_exprs
[j
].m_produced_val
)
528 /* See if we found the best solution for n. */
529 Expr
*e
= &s
.m_exprs
[num_vals
];
532 e
->m_rhs
= op
->is_unary () ? op
->m_rhs_if_unary
: j
;
533 e
->m_produced_val
= n
;
534 e
->m_critical_path_length
=
535 1 + MAX (s
.m_exprs
[i
].m_critical_path_length
,
536 s
.m_exprs
[j
].m_critical_path_length
);
538 ExpressionTreeMap::iterator
best (best_solution
.find (n
));
539 if (best
== best_solution
.end ()
540 || (*best
).second
->cost () > s
.cost ())
541 best_solution
[n
] = new ExpressionTree (s
);
543 /* Recurse and find more. */
544 find_sequences (s
, best_solution
);
549 /* Restore old size. */
550 s
.m_num_vals
= num_vals
;
554 /* For each insn_code used by an operator, choose a compact number so
555 it takes less space in the output data structure. This prints out a
556 lookup table used to map the compactified number back to an
559 create_insn_code_compression_table ()
563 /* Entry 0 must hold CODE_FOR_nothing to mean "end of array". */
564 printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n"
565 " CODE_FOR_nothing /* must be first */ ", ARCH
);
567 for (size_t i
= 0; i
< sizeof ops
/ sizeof ops
[0]; i
++)
569 Operator
*op
= &ops
[i
];
572 /* See if some previous Operator was using the same insn_code.
573 If so, reuse its table entry. */
574 for (size_t j
= 0; j
< i
; j
++)
576 Operator
*old
= &ops
[j
];
577 if (strcmp (old
->m_pattern
, op
->m_pattern
) == 0)
579 index
= old
->m_top_index
;
586 /* We need to make a new entry in the table. */
587 printf (",\n %s", op
->m_pattern
);
588 index
= next_index
++;
591 op
->m_top_index
= index
;
598 /* These are constants we've seen in code, that we want to keep table
600 static int multiply_constants_used
[] = {
1062 const int num_mult_constants_used
=
1063 (int)(sizeof multiply_constants_used
1064 / sizeof multiply_constants_used
[0]);
1067 #define XSIZE (sizeof multiply_constants_used / \
1068 sizeof multiply_constants_used[0] + 32) / 32
1069 unsigned multiply_constants_avail
[XSIZE
];
1073 /* bsearch helper function. */
1075 compare_constants (const void *key
, const void *t
)
1077 return (*(int*)key
) - *((int*)t
);
1082 find_mult_constants_used (int multiplier
)
1084 return (int *) bsearch (&multiplier
, multiply_constants_used
,
1085 num_mult_constants_used
,
1086 sizeof multiply_constants_used
[0],
1091 int num_ops (ExpressionTree
*s
)
1094 for (int i
= 0; i
< s
->m_num_vals
; i
++)
1096 Expr
*e
= &s
->m_exprs
[i
];
1097 if (e
->m_op
!= NULL
)
1106 tilepro_emit (int multiplier
, int num_ops
)
1108 int abs_multiplier
= (multiplier
>= 0) ? multiplier
: -multiplier
;
1110 /* Keep constants in range [-1024, 1024]. */
1111 if (abs_multiplier
<= 1024)
1114 /* Keep constants near powers of two. */
1115 int prev_pow2
= 1 << (31 - __builtin_clz (abs_multiplier
));
1116 int next_pow2
= prev_pow2
<< 1;
1118 if ((abs_multiplier
- prev_pow2
<= 10)
1119 || (next_pow2
- abs_multiplier
<= 10))
1122 /* Keep constants near powers of ten. */
1125 long long prev_pow10
;
1126 long long next_pow10
;
1128 while (((j
* 10) < abs_multiplier
)
1133 next_pow10
= j
* 10;
1135 if ((abs_multiplier
- prev_pow10
<= 10)
1136 || (next_pow10
- abs_multiplier
<= 10))
1140 /* Keep short sequences that have two or fewer ops. */
1144 /* Keep constants that are mostly 0's or mostly 1's. */
1145 if (__builtin_popcount (multiplier
) <= 2 ||
1146 __builtin_popcount (multiplier
) >= 30)
1149 /* Keep constants seen in actual code. */
1150 if ((find_mult_constants_used (multiplier
)))
1157 tilegx_emit (long long multiplier
, int num_ops
)
1159 long long abs_multiplier
= (multiplier
>= 0) ? multiplier
: - multiplier
;
1161 /* Keep constants in range [-1024, 1024]. */
1162 if (abs_multiplier
<= 1024)
1165 /* Otherwise exclude sequences with four ops. */
1169 /* Keep constants near powers of two. */
1171 unsigned long long prev_pow2
=
1172 1LL << (63 - __builtin_clzll (abs_multiplier
));
1173 unsigned long long next_pow2
= prev_pow2
<< 1;
1175 /* handle overflow case. */
1178 if (prev_pow2
- abs_multiplier
<= 10)
1181 else if ((abs_multiplier
- prev_pow2
<= 10)
1182 || (next_pow2
- abs_multiplier
<= 10))
1186 /* Keep constants near powers of ten. */
1189 long long prev_pow10
;
1190 long long next_pow10
;
1192 while (((j
* 10) < abs_multiplier
)
1193 && (j
< (INTMAX_MAX
/ 10)))
1197 next_pow10
= (j
> (INTMAX_MAX
/ 10)) ? 0 : j
* 10;
1199 if ((abs_multiplier
- prev_pow10
<= 100)
1201 && (next_pow10
- abs_multiplier
<= 100)))
1208 /* Keep constants that are mostly 0's or mostly 1's. */
1209 if (__builtin_popcountll (multiplier
) <= 2 ||
1210 __builtin_popcountll (multiplier
) >= 62)
1213 /* Keep constants seen in actual code. */
1214 if (find_mult_constants_used (multiplier
))
1225 ExpressionTreeMap best_solution
;
1229 printf ("/* Constant multiply table for TILEPro.\n");
1231 printf ("/* Constant multiply table for TILE-Gx.\n");
1233 printf (" Copyright (C) 2011-2018 Free Software Foundation, Inc.\n");
1234 printf (" Contributed by Walter Lee (walt@tilera.com)\n");
1236 printf (" This file is part of GCC.\n");
1238 printf (" GCC is free software; you can redistribute it and/or modify it\n");
1239 printf (" under the terms of the GNU General Public License as published\n");
1240 printf (" by the Free Software Foundation; either version 3, or (at your\n");
1241 printf (" option) any later version.\n");
1243 printf (" GCC is distributed in the hope that it will be useful, but WITHOUT\n");
1244 printf (" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n");
1245 printf (" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public\n");
1246 printf (" License for more details.\n");
1248 printf (" You should have received a copy of the GNU General Public License\n");
1249 printf (" along with GCC; see the file COPYING3. If not see\n");
1250 printf (" <http://www.gnu.org/licenses/>. */\n");
1252 printf ("/* Note this file is auto-generated from gen-mul-tables.cc.\n");
1253 printf (" Make any required changes there. */\n");
1255 printf ("#include \"config.h\"\n");
1256 printf ("#include \"system.h\"\n");
1257 printf ("#include \"coretypes.h\"\n");
1258 printf ("#include \"backend.h\"\n");
1259 printf ("#include \"rtl.h\"\n");
1260 printf ("#include \"expmed.h\"\n");
1261 printf ("#include \"%s-multiply.h\"\n\n", ARCH
);
1262 create_insn_code_compression_table ();
1264 /* Try all combinations of operators and see what constants we
1265 produce. For each possible constant, record the most efficient
1266 way to generate it. */
1267 find_sequences (s
, best_solution
);
1269 printf ("const struct %s_multiply_insn_seq "
1270 "%s_multiply_insn_seq_table[] = {\n",
1273 const char *comma_separator
= "";
1275 ExpressionTreeMap::iterator
i (best_solution
.begin ());
1276 for (; i
!= best_solution
.end (); ++i
)
1278 ExpressionTree
*s
= (*i
).second
;
1279 const MUL_TYPE n
= (*i
).first
;
1281 if (n
== 0 || n
== 1)
1283 /* Both of these require zero operations, so these entries
1284 are bogus and should never be used. */
1288 /* Prune the list of entries to keep the table to a reasonable
1291 if (!tilepro_emit (n
, num_ops (s
)))
1294 if (!tilegx_emit (n
, num_ops (s
)))
1298 printf ("%s", comma_separator
);
1301 const MUL_TYPE int_min
= INT32_MIN
;
1303 const MUL_TYPE int_min
= INT64_MIN
;
1307 /* Handle C's woeful lack of unary negation. Without this,
1308 printing out INT_MIN in decimal will yield an unsigned
1309 int which could generate a compiler warning. */
1311 printf (" {%d - 1 /* 0x%x */ ,\n {", n
+ 1,
1314 printf (" {%lldll - 1 /* 0x%llx */ ,\n {", n
+ 1,
1315 (unsigned MUL_TYPE
) n
);
1321 printf (" {%d /* 0x%x */ ,\n {", n
, (unsigned) n
);
1323 printf (" {%lldll /* 0x%llx */ ,\n {", n
, (unsigned MUL_TYPE
) n
);
1328 for (int j
= 0; j
< s
->m_num_vals
; j
++)
1330 Expr
*e
= &s
->m_exprs
[j
];
1332 const Operator
*op
= e
->m_op
;
1337 snprintf (buf
, sizeof buf
, "%s{%d, %d, %d}%s",
1340 e
->m_lhs
, e
->m_rhs
, (j
+ 1) == s
->m_num_vals
? "}" : ",");
1344 snprintf (opnd0
, sizeof opnd0
, "r%d", e
->m_lhs
);
1346 snprintf (opnd0
, sizeof opnd0
, "zero");
1348 printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n",
1349 buf
, op
->m_name
, j
, opnd0
,
1350 op
->is_unary () ? "" : "r", e
->m_rhs
);
1355 comma_separator
= ",\n";
1358 printf ("\n};\n\n");
1359 printf ("const int %s_multiply_insn_seq_table_size =\n"
1360 " (int) (sizeof %s_multiply_insn_seq_table\n"
1361 " / sizeof %s_multiply_insn_seq_table[0]);\n",
1364 return EXIT_SUCCESS
;