2014-01-17 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / config / tilepro / gen-mul-tables.cc
blob645fa32ea5ef1940c22967119ad90f7f407e1b58
1 /* Multiply table generator for tile.
2 Copyright (C) 2011-2014 Free Software Foundation, Inc.
3 Contributed by Walter Lee (walt@tilera.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This program creates a table used to compile multiply by constant
22 efficiently.
24 This program should be compiled by a c++ compiler. If it's
25 compiled with with -DTILEPRO, it generates the multiply table for
26 TILEPro; otherwise it generates the multiply table for TILE-Gx.
27 Running the program produces the table in stdout.
29 The program works by generating every possible combination of up to
30 MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left
31 shift) and computing the multiplier computed by those instructions.
32 For example,
34 s2a r2,r1,r1
35 s2a r3,r2,r2
37 multiplies r1 by 25.
39 There are usually multiple instruction sequences to multiply by a
40 given constant. This program keeps only the cheapest one.
41 "Cheapest" is defined first by the minimum theoretical schedule
42 length, and if those are equal then by the number of instructions,
43 and if those are equal then by which instructions we "prefer"
44 (e.g. because we think one might take infinitesimally less power
45 than another, or simply because we arbitrarily pick one to be more
46 canonical).
48 Once this program has determined the best instruction sequence for
49 each multiplier, it emits them in a table sorted by the multiplier
50 so the user can binary-search it to look for a match. The table is
51 pruned based on various criteria to keep its sizes reasonable. */
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <assert.h>
56 #include <string.h>
57 #define __STDC_LIMIT_MACROS
58 #include <stdint.h>
60 #include <map>
62 #ifdef TILEPRO
64 /* The string representing the architecture. */
65 #define ARCH "tilepro"
67 /* The type for the multiplication. */
68 typedef int MUL_TYPE;
70 #else
72 /* The string representing the architecture. */
73 #define ARCH "tilegx"
75 /* The type for the multiplication. */
76 typedef long long MUL_TYPE;
78 #endif
80 /* Longest instruction sequence this will produce. With the current
81 stupid algorithm runtime grows very quickly with this number. */
82 #define MAX_INSTRUCTIONS 4
84 /* Maximum number of subexpressions in the expression DAG being
85 generated. This is the same as the number of instructions, except
86 that zero and the original register we'd like to multiply by a
87 constant are also thrown into the mix. */
88 #define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS)
90 #define MIN(x, y) ((x) <= (y) ? (x) : (y))
91 #define MAX(x, y) ((x) >= (y) ? (x) : (y))
93 /* For this program a unary op is one which has only one nonconstant
94 operand. So shift left by 5 is considered unary. */
95 typedef MUL_TYPE (*unary_op_func) (MUL_TYPE);
96 typedef MUL_TYPE (*binary_op_func) (MUL_TYPE, MUL_TYPE);
98 /* This describes an operation like 'add two registers' or 'left-shift
99 by 7'.
101 We call something a 'unary' operator if it only takes in one
102 register as input, even though it might have an implicit second
103 constant operand. Currently this is used for left-shift by
104 constant. */
105 class Operator
107 public:
108 /* Construct for a binary operator. */
109 Operator (const char *pattern, const char *name, binary_op_func func,
110 int cost)
111 : m_pattern (pattern), m_name (name), m_top_index (-1),
112 m_unary_func (0), m_binary_func (func), m_cost (cost),
113 m_rhs_if_unary (0)
117 /* Construct for a unary operator. */
118 Operator (const char *pattern, const char *name, unary_op_func func,
119 int rhs_if_unary, int cost)
120 : m_pattern (pattern), m_name (name), m_top_index (-1),
121 m_unary_func (func), m_binary_func (0), m_cost (cost),
122 m_rhs_if_unary (rhs_if_unary)
126 bool is_unary () const
128 return m_binary_func == NULL;
131 /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3. */
132 const char *m_pattern;
134 /* Name of the opcode for this operation, e.g. add. */
135 const char *m_name;
137 /* We don't have enough bits in our output representation to store
138 the original insn_code value, so we store a compressed form
139 instead. These values are decoded back into insn_code via the
140 machine-generated multiply_insn_seq_decode_opcode lookup
141 table. */
142 int m_top_index;
144 /* Unary operator to apply, or NULL if this is a binary operator. */
145 unary_op_func m_unary_func;
147 /* Binary operator to apply, or NULL if this is a unary operator. */
148 binary_op_func m_binary_func;
150 /* Function of how expensive we consider this operator. Higher is
151 worse. */
152 int m_cost;
154 /* the RHS value to write into the C file if unary; used for shift
155 count. */
156 int m_rhs_if_unary;
160 /* An entry in an expression DAG. */
161 class Expr
163 public:
164 Expr () : m_op (NULL), m_lhs (0), m_rhs (0), m_produced_val (0),
165 m_critical_path_length (0)
169 /* The operator being applied to the operands. */
170 const Operator *m_op;
172 /* The index of the left-hand operand in the array of subexpressions
173 already computed. */
174 int m_lhs;
176 /* For binary ops ,this is the index of the left-hand operand in the
177 array of subexpressions already computed. For unary ops, it is
178 specific to the op (e.g. it might hold a constant shift
179 count). */
180 int m_rhs;
182 /* The multiplier produced by this expression tree. For example, for
183 the tree ((x << 5) + x), the value would be 33. */
184 MUL_TYPE m_produced_val;
186 /* How far is this expression from the root, i.e. how many cycles
187 minimum will it take to compute this? */
188 int m_critical_path_length;
192 /* Make function pointers for the various linear operators we can
193 apply to compute a multiplicative value. */
195 static MUL_TYPE
196 add (MUL_TYPE n1, MUL_TYPE n2)
198 return n1 + n2;
201 static MUL_TYPE
202 sub (MUL_TYPE n1, MUL_TYPE n2)
204 return n1 - n2;
207 static MUL_TYPE
208 s1a (MUL_TYPE n1, MUL_TYPE n2)
210 return n1 * 2 + n2;
213 static MUL_TYPE
214 s2a (MUL_TYPE n1, MUL_TYPE n2)
216 return n1 * 4 + n2;
219 static MUL_TYPE
220 s3a (MUL_TYPE n1, MUL_TYPE n2)
222 return n1 * 8 + n2;
225 #define SHIFT(count) \
226 static MUL_TYPE \
227 shift##count(MUL_TYPE n) \
229 return n << (count); \
232 SHIFT (1);
233 SHIFT (2);
234 SHIFT (3);
235 SHIFT (4);
236 SHIFT (5);
237 SHIFT (6);
238 SHIFT (7);
239 SHIFT (8);
240 SHIFT (9);
241 SHIFT (10);
242 SHIFT (11);
243 SHIFT (12);
244 SHIFT (13);
245 SHIFT (14);
246 SHIFT (15);
247 SHIFT (16);
248 SHIFT (17);
249 SHIFT (18);
250 SHIFT (19);
251 SHIFT (20);
252 SHIFT (21);
253 SHIFT (22);
254 SHIFT (23);
255 SHIFT (24);
256 SHIFT (25);
257 SHIFT (26);
258 SHIFT (27);
259 SHIFT (28);
260 SHIFT (29);
261 SHIFT (30);
262 SHIFT (31);
263 #ifndef TILEPRO
264 SHIFT (32);
265 SHIFT (33);
266 SHIFT (34);
267 SHIFT (35);
268 SHIFT (36);
269 SHIFT (37);
270 SHIFT (38);
271 SHIFT (39);
272 SHIFT (40);
273 SHIFT (41);
274 SHIFT (42);
275 SHIFT (43);
276 SHIFT (44);
277 SHIFT (45);
278 SHIFT (46);
279 SHIFT (47);
280 SHIFT (48);
281 SHIFT (49);
282 SHIFT (50);
283 SHIFT (51);
284 SHIFT (52);
285 SHIFT (53);
286 SHIFT (54);
287 SHIFT (55);
288 SHIFT (56);
289 SHIFT (57);
290 SHIFT (58);
291 SHIFT (59);
292 SHIFT (60);
293 SHIFT (61);
294 SHIFT (62);
295 SHIFT (63);
296 #endif
298 #ifdef TILEPRO
299 static Operator ops[] = {
300 Operator ("CODE_FOR_addsi3", "add", add, 1040),
301 Operator ("CODE_FOR_subsi3", "sub", sub, 1041),
302 Operator ("CODE_FOR_insn_s1a", "s1a", s1a, 1042),
303 Operator ("CODE_FOR_insn_s2a", "s2a", s2a, 1043),
304 Operator ("CODE_FOR_insn_s3a", "s3a", s3a, 1044),
305 /* Note: shl by 1 is not necessary, since adding a value to itself
306 produces the same result. But the architecture team thinks
307 left-shift may use slightly less power, so we might as well
308 prefer it. */
309 Operator ("CODE_FOR_ashlsi3", "shli", shift1, 1, 1001),
310 Operator ("CODE_FOR_ashlsi3", "shli", shift2, 2, 1002),
311 Operator ("CODE_FOR_ashlsi3", "shli", shift3, 3, 1003),
312 Operator ("CODE_FOR_ashlsi3", "shli", shift4, 4, 1004),
313 Operator ("CODE_FOR_ashlsi3", "shli", shift5, 5, 1005),
314 Operator ("CODE_FOR_ashlsi3", "shli", shift6, 6, 1006),
315 Operator ("CODE_FOR_ashlsi3", "shli", shift7, 7, 1007),
316 Operator ("CODE_FOR_ashlsi3", "shli", shift8, 8, 1008),
317 Operator ("CODE_FOR_ashlsi3", "shli", shift9, 9, 1009),
318 Operator ("CODE_FOR_ashlsi3", "shli", shift10, 10, 1010),
319 Operator ("CODE_FOR_ashlsi3", "shli", shift11, 11, 1011),
320 Operator ("CODE_FOR_ashlsi3", "shli", shift12, 12, 1012),
321 Operator ("CODE_FOR_ashlsi3", "shli", shift13, 13, 1013),
322 Operator ("CODE_FOR_ashlsi3", "shli", shift14, 14, 1014),
323 Operator ("CODE_FOR_ashlsi3", "shli", shift15, 15, 1015),
324 Operator ("CODE_FOR_ashlsi3", "shli", shift16, 16, 1016),
325 Operator ("CODE_FOR_ashlsi3", "shli", shift17, 17, 1017),
326 Operator ("CODE_FOR_ashlsi3", "shli", shift18, 18, 1018),
327 Operator ("CODE_FOR_ashlsi3", "shli", shift19, 19, 1019),
328 Operator ("CODE_FOR_ashlsi3", "shli", shift20, 20, 1020),
329 Operator ("CODE_FOR_ashlsi3", "shli", shift21, 21, 1021),
330 Operator ("CODE_FOR_ashlsi3", "shli", shift22, 22, 1022),
331 Operator ("CODE_FOR_ashlsi3", "shli", shift23, 23, 1023),
332 Operator ("CODE_FOR_ashlsi3", "shli", shift24, 24, 1024),
333 Operator ("CODE_FOR_ashlsi3", "shli", shift25, 25, 1025),
334 Operator ("CODE_FOR_ashlsi3", "shli", shift26, 26, 1026),
335 Operator ("CODE_FOR_ashlsi3", "shli", shift27, 27, 1027),
336 Operator ("CODE_FOR_ashlsi3", "shli", shift28, 28, 1028),
337 Operator ("CODE_FOR_ashlsi3", "shli", shift29, 29, 1029),
338 Operator ("CODE_FOR_ashlsi3", "shli", shift30, 30, 1030),
339 Operator ("CODE_FOR_ashlsi3", "shli", shift31, 31, 1031)
341 #else
342 static Operator ops[] = {
343 Operator ("CODE_FOR_adddi3", "add", add, 1070),
344 Operator ("CODE_FOR_subdi3", "sub", sub, 1071),
345 Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a, 1072),
346 Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a, 1073),
347 Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a, 1074),
348 // Note: shl by 1 is not necessary, since adding a value to itself
349 // produces the same result. But the architecture team thinks left-shift
350 // may use slightly less power, so we might as well prefer it.
351 Operator ("CODE_FOR_ashldi3", "shli", shift1, 1, 1001),
352 Operator ("CODE_FOR_ashldi3", "shli", shift2, 2, 1002),
353 Operator ("CODE_FOR_ashldi3", "shli", shift3, 3, 1003),
354 Operator ("CODE_FOR_ashldi3", "shli", shift4, 4, 1004),
355 Operator ("CODE_FOR_ashldi3", "shli", shift5, 5, 1005),
356 Operator ("CODE_FOR_ashldi3", "shli", shift6, 6, 1006),
357 Operator ("CODE_FOR_ashldi3", "shli", shift7, 7, 1007),
358 Operator ("CODE_FOR_ashldi3", "shli", shift8, 8, 1008),
359 Operator ("CODE_FOR_ashldi3", "shli", shift9, 9, 1009),
360 Operator ("CODE_FOR_ashldi3", "shli", shift10, 10, 1010),
361 Operator ("CODE_FOR_ashldi3", "shli", shift11, 11, 1011),
362 Operator ("CODE_FOR_ashldi3", "shli", shift12, 12, 1012),
363 Operator ("CODE_FOR_ashldi3", "shli", shift13, 13, 1013),
364 Operator ("CODE_FOR_ashldi3", "shli", shift14, 14, 1014),
365 Operator ("CODE_FOR_ashldi3", "shli", shift15, 15, 1015),
366 Operator ("CODE_FOR_ashldi3", "shli", shift16, 16, 1016),
367 Operator ("CODE_FOR_ashldi3", "shli", shift17, 17, 1017),
368 Operator ("CODE_FOR_ashldi3", "shli", shift18, 18, 1018),
369 Operator ("CODE_FOR_ashldi3", "shli", shift19, 19, 1019),
370 Operator ("CODE_FOR_ashldi3", "shli", shift20, 20, 1020),
371 Operator ("CODE_FOR_ashldi3", "shli", shift21, 21, 1021),
372 Operator ("CODE_FOR_ashldi3", "shli", shift22, 22, 1022),
373 Operator ("CODE_FOR_ashldi3", "shli", shift23, 23, 1023),
374 Operator ("CODE_FOR_ashldi3", "shli", shift24, 24, 1024),
375 Operator ("CODE_FOR_ashldi3", "shli", shift25, 25, 1025),
376 Operator ("CODE_FOR_ashldi3", "shli", shift26, 26, 1026),
377 Operator ("CODE_FOR_ashldi3", "shli", shift27, 27, 1027),
378 Operator ("CODE_FOR_ashldi3", "shli", shift28, 28, 1028),
379 Operator ("CODE_FOR_ashldi3", "shli", shift29, 29, 1029),
380 Operator ("CODE_FOR_ashldi3", "shli", shift30, 30, 1030),
381 Operator ("CODE_FOR_ashldi3", "shli", shift31, 31, 1031),
382 Operator ("CODE_FOR_ashldi3", "shli", shift32, 32, 1032),
383 Operator ("CODE_FOR_ashldi3", "shli", shift33, 33, 1033),
384 Operator ("CODE_FOR_ashldi3", "shli", shift34, 34, 1034),
385 Operator ("CODE_FOR_ashldi3", "shli", shift35, 35, 1035),
386 Operator ("CODE_FOR_ashldi3", "shli", shift36, 36, 1036),
387 Operator ("CODE_FOR_ashldi3", "shli", shift37, 37, 1037),
388 Operator ("CODE_FOR_ashldi3", "shli", shift38, 38, 1038),
389 Operator ("CODE_FOR_ashldi3", "shli", shift39, 39, 1039),
390 Operator ("CODE_FOR_ashldi3", "shli", shift40, 40, 1040),
391 Operator ("CODE_FOR_ashldi3", "shli", shift41, 41, 1041),
392 Operator ("CODE_FOR_ashldi3", "shli", shift42, 42, 1042),
393 Operator ("CODE_FOR_ashldi3", "shli", shift43, 43, 1043),
394 Operator ("CODE_FOR_ashldi3", "shli", shift44, 44, 1044),
395 Operator ("CODE_FOR_ashldi3", "shli", shift45, 45, 1045),
396 Operator ("CODE_FOR_ashldi3", "shli", shift46, 46, 1046),
397 Operator ("CODE_FOR_ashldi3", "shli", shift47, 47, 1047),
398 Operator ("CODE_FOR_ashldi3", "shli", shift48, 48, 1048),
399 Operator ("CODE_FOR_ashldi3", "shli", shift49, 49, 1049),
400 Operator ("CODE_FOR_ashldi3", "shli", shift50, 50, 1050),
401 Operator ("CODE_FOR_ashldi3", "shli", shift51, 51, 1051),
402 Operator ("CODE_FOR_ashldi3", "shli", shift52, 52, 1052),
403 Operator ("CODE_FOR_ashldi3", "shli", shift53, 53, 1053),
404 Operator ("CODE_FOR_ashldi3", "shli", shift54, 54, 1054),
405 Operator ("CODE_FOR_ashldi3", "shli", shift55, 55, 1055),
406 Operator ("CODE_FOR_ashldi3", "shli", shift56, 56, 1056),
407 Operator ("CODE_FOR_ashldi3", "shli", shift57, 57, 1057),
408 Operator ("CODE_FOR_ashldi3", "shli", shift58, 58, 1058),
409 Operator ("CODE_FOR_ashldi3", "shli", shift59, 59, 1059),
410 Operator ("CODE_FOR_ashldi3", "shli", shift60, 60, 1060),
411 Operator ("CODE_FOR_ashldi3", "shli", shift61, 61, 1061),
412 Operator ("CODE_FOR_ashldi3", "shli", shift62, 62, 1062),
413 Operator ("CODE_FOR_ashldi3", "shli", shift63, 63, 1063)
415 #endif
417 /* An ExpressionTree is an expression DAG. */
418 class ExpressionTree
420 public:
421 ExpressionTree () : m_num_vals (2)
423 m_exprs[0].m_produced_val = 0;
424 m_exprs[1].m_produced_val = 1;
427 void copy_technique_from (ExpressionTree * other)
429 /* TODO: write this; other can compute the same products with less
430 cost. We update this ExpressionTree object because some int is
431 already mapped to it. */
434 int m_num_vals;
435 Expr m_exprs[MAX_SUBEXPRS];
437 int cost () const
439 int cost = 0;
440 for (int j = 2; j < m_num_vals; j++)
441 cost += m_exprs[j].m_op->m_cost;
442 return cost + m_exprs[m_num_vals - 1].m_critical_path_length * 1000000;
447 typedef std::map<MUL_TYPE, ExpressionTree *> ExpressionTreeMap;
450 static void
451 find_sequences (ExpressionTree &s, ExpressionTreeMap &best_solution)
453 /* Don't look more if we can't add any new values to the expression
454 tree. */
455 const int num_vals = s.m_num_vals;
456 if (num_vals == MAX_SUBEXPRS)
457 return;
459 /* Grow array to make room for new values added as we loop. */
460 s.m_num_vals = num_vals + 1;
462 const Operator *const prev_op = s.m_exprs[num_vals - 1].m_op;
463 const int prev_top_index = (prev_op != NULL) ? prev_op->m_top_index : -1;
465 for (size_t f = 0; f < sizeof ops / sizeof ops[0]; f++)
467 const Operator *const op = &ops[f];
469 for (int i = 0; i < num_vals; i++)
471 /* Only allow zero as the first operand to sub, otherwise
472 it is useless. */
473 if (i == 0 && op->m_binary_func != sub)
474 continue;
476 /* Unary ops don't actually use the second operand, so as a
477 big hack we trick it into only looping once in the inner
478 loop. */
479 const int j_end = op->is_unary () ? 2 : num_vals;
481 /* We never allow zero as the second operand, as it is
482 always useless. */
483 for (int j = 1; j < j_end; j++)
485 /* Does this expression use the immediately previous
486 expression? */
487 const bool uses_prev_value =
488 (i == num_vals - 1
489 || (!op->is_unary () && j == num_vals - 1));
491 if (!uses_prev_value)
493 /* For search efficiency, prune redundant
494 instruction orderings.
496 This op does not take the immediately preceding
497 value as input, which means we could have done it
498 in the previous slot. If its opcode is less than
499 the previous instruction's, we'll declare this
500 instruction order non-canonical and not pursue
501 this branch of the search. */
502 if (op->m_top_index < prev_top_index)
503 continue;
506 MUL_TYPE n;
507 if (op->is_unary ())
509 n = op->m_unary_func (s.m_exprs[i].m_produced_val);
511 else
513 n = op->m_binary_func (s.m_exprs[i].m_produced_val,
514 s.m_exprs[j].m_produced_val);
517 bool duplicate = false;
518 for (int k = 0; k < num_vals; k++)
519 if (n == s.m_exprs[j].m_produced_val)
521 duplicate = true;
522 break;
525 if (duplicate)
526 continue;
528 /* See if we found the best solution for n. */
529 Expr *e = &s.m_exprs[num_vals];
530 e->m_op = op;
531 e->m_lhs = i;
532 e->m_rhs = op->is_unary () ? op->m_rhs_if_unary : j;
533 e->m_produced_val = n;
534 e->m_critical_path_length =
535 1 + MAX (s.m_exprs[i].m_critical_path_length,
536 s.m_exprs[j].m_critical_path_length);
538 ExpressionTreeMap::iterator best (best_solution.find (n));
539 if (best == best_solution.end ()
540 || (*best).second->cost () > s.cost ())
541 best_solution[n] = new ExpressionTree (s);
543 /* Recurse and find more. */
544 find_sequences (s, best_solution);
549 /* Restore old size. */
550 s.m_num_vals = num_vals;
554 /* For each insn_code used by an operator, choose a compact number so
555 it takes less space in the output data structure. This prints out a
556 lookup table used to map the compactified number back to an
557 insn_code. */
558 static void
559 create_insn_code_compression_table ()
561 int next_index = 1;
563 /* Entry 0 must hold CODE_FOR_nothing to mean "end of array". */
564 printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n"
565 " CODE_FOR_nothing /* must be first */ ", ARCH);
567 for (size_t i = 0; i < sizeof ops / sizeof ops[0]; i++)
569 Operator *op = &ops[i];
570 int index = -1;
572 /* See if some previous Operator was using the same insn_code.
573 If so, reuse its table entry. */
574 for (size_t j = 0; j < i; j++)
576 Operator *old = &ops[j];
577 if (strcmp (old->m_pattern, op->m_pattern) == 0)
579 index = old->m_top_index;
580 break;
584 if (index == -1)
586 /* We need to make a new entry in the table. */
587 printf (",\n %s", op->m_pattern);
588 index = next_index++;
591 op->m_top_index = index;
594 printf ("\n};\n\n");
598 /* These are constants we've seen in code, that we want to keep table
599 entries for. */
600 static int multiply_constants_used[] = {
601 -11796480,
602 -27439,
603 -25148,
604 -22820,
605 -21709,
606 -20995,
607 -20284,
608 -20239,
609 -19595,
610 -19447,
611 -19183,
612 -19165,
613 -18730,
614 -17828,
615 -17799,
616 -17237,
617 -17036,
618 -16549,
619 -16423,
620 -16294,
621 -16244,
622 -16069,
623 -15137,
624 -15083,
625 -15038,
626 -14924,
627 -14905,
628 -14752,
629 -14731,
630 -14529,
631 -14273,
632 -14090,
633 -14084,
634 -14043,
635 -13850,
636 -13802,
637 -13631,
638 -13455,
639 -13275,
640 -12879,
641 -12700,
642 -12534,
643 -12399,
644 -12131,
645 -12112,
646 -12054,
647 -12019,
648 -11759,
649 -11585,
650 -11467,
651 -11395,
652 -11295,
653 -11248,
654 -11148,
655 -11116,
656 -11086,
657 -11059,
658 -11018,
659 -10811,
660 -10538,
661 -10258,
662 -10217,
663 -10033,
664 -9766,
665 -9754,
666 -9534,
667 -9527,
668 -9467,
669 -9262,
670 -9232,
671 -9222,
672 -9198,
673 -9191,
674 -9113,
675 -8825,
676 -8756,
677 -8697,
678 -8693,
679 -8565,
680 -8342,
681 -8208,
682 -8200,
683 -8170,
684 -8102,
685 -7770,
686 -7678,
687 -7562,
688 -7376,
689 -7373,
690 -7221,
691 -7121,
692 -6835,
693 -6810,
694 -6626,
695 -6581,
696 -6461,
697 -6278,
698 -6263,
699 -6163,
700 -6029,
701 -5816,
702 -5540,
703 -5461,
704 -5384,
705 -5329,
706 -4985,
707 -4926,
708 -4815,
709 -4788,
710 -4758,
711 -4433,
712 -4229,
713 -4209,
714 -4176,
715 -4104,
716 -4095,
717 -4078,
718 -3941,
719 -3818,
720 -3600,
721 -3474,
722 -3314,
723 -3264,
724 -3196,
725 -3072,
726 -2912,
727 -2836,
728 -2773,
729 -2269,
730 -2184,
731 -2100,
732 -1730,
733 -1512,
734 -1500,
735 -1396,
736 -1344,
737 -1312,
738 -1297,
739 -1059,
740 -1058,
741 1027,
742 1049,
743 1059,
744 1100,
745 1104,
746 1108,
747 1136,
748 1200,
749 1204,
750 1242,
751 1292,
752 1304,
753 1312,
754 1320,
755 1336,
756 1344,
757 1348,
758 1360,
759 1364,
760 1395,
761 1448,
762 1460,
763 1461,
764 1472,
765 1488,
766 1500,
767 1512,
768 1568,
769 1576,
770 1649,
771 1664,
772 1684,
773 1696,
774 1744,
775 1812,
776 1822,
777 1884,
778 1963,
779 1978,
780 2000,
781 2012,
782 2014,
783 2037,
784 2039,
785 2100,
786 2139,
787 2144,
788 2184,
789 2237,
790 2260,
791 2320,
792 2408,
793 2446,
794 2447,
795 2499,
796 2531,
797 2578,
798 2592,
799 2611,
800 2633,
801 2704,
802 2730,
803 2773,
804 2880,
805 2896,
806 2998,
807 3000,
808 3001,
809 3021,
810 3079,
811 3112,
812 3150,
813 3179,
814 3192,
815 3240,
816 3264,
817 3271,
818 3283,
819 3328,
820 3363,
821 3367,
822 3453,
823 3529,
824 3570,
825 3580,
826 3600,
827 3624,
828 3707,
829 3783,
830 3826,
831 3897,
832 3941,
833 3962,
834 3989,
835 4000,
836 4025,
837 4073,
838 4093,
839 4099,
840 4108,
841 4184,
842 4209,
843 4369,
844 4376,
845 4416,
846 4433,
847 4434,
848 4482,
849 4582,
850 4712,
851 4717,
852 4813,
853 4815,
854 4864,
855 5000,
856 5027,
857 5040,
858 5091,
859 5195,
860 5243,
861 5260,
862 5285,
863 5329,
864 5331,
865 5350,
866 5361,
867 5387,
868 5461,
869 5492,
870 5529,
871 5573,
872 5793,
873 5819,
874 5915,
875 5946,
876 5992,
877 6000,
878 6164,
879 6205,
880 6262,
881 6263,
882 6269,
883 6270,
884 6387,
885 6400,
886 6406,
887 6476,
888 6541,
889 6565,
890 6568,
891 6626,
892 6656,
893 6732,
894 6810,
895 6817,
896 6859,
897 7040,
898 7053,
899 7141,
900 7169,
901 7221,
902 7223,
903 7274,
904 7282,
905 7350,
906 7369,
907 7373,
908 7442,
909 7447,
910 7471,
911 7518,
912 7542,
913 7566,
914 7587,
915 7663,
916 7678,
917 7682,
918 7748,
919 7752,
920 7791,
921 8000,
922 8026,
923 8048,
924 8170,
925 8203,
926 8204,
927 8290,
928 8368,
929 8520,
930 8640,
931 8666,
932 8672,
933 8697,
934 8716,
935 8728,
936 8756,
937 8820,
938 8875,
939 8918,
940 8956,
941 9058,
942 9154,
943 9175,
944 9191,
945 9217,
946 9262,
947 9321,
948 9373,
949 9434,
950 9465,
951 9514,
952 9534,
953 9633,
954 9746,
955 9810,
956 9850,
957 9947,
958 9973,
959 10000,
960 10009,
961 10033,
962 10055,
963 10217,
964 10248,
965 10298,
966 10310,
967 10323,
968 10368,
969 10438,
970 10456,
971 10486,
972 10538,
973 10664,
974 10695,
975 10700,
976 10703,
977 10832,
978 10887,
979 10935,
980 10958,
981 11018,
982 11059,
983 11061,
984 11086,
985 11116,
986 11148,
987 11190,
988 11249,
989 11314,
990 11332,
991 11363,
992 11409,
993 11415,
994 11443,
995 11467,
996 11512,
997 11522,
998 11529,
999 11585,
1000 11759,
1001 11768,
1002 11795,
1003 11893,
1004 11997,
1005 12131,
1006 12299,
1007 12536,
1008 12543,
1009 12893,
1010 12945,
1011 12998,
1012 13109,
1013 13213,
1014 13685,
1015 13930,
1016 14023,
1017 14024,
1018 14271,
1019 14564,
1020 14647,
1021 15326,
1022 15850,
1023 15855,
1024 15929,
1025 16000,
1026 16154,
1027 16496,
1028 16807,
1029 16819,
1030 16984,
1031 17203,
1032 17223,
1033 17333,
1034 17760,
1035 17799,
1036 17837,
1037 18029,
1038 18068,
1039 18336,
1040 18515,
1041 19595,
1042 20017,
1043 20131,
1044 20862,
1045 20995,
1046 21709,
1047 22554,
1048 25000,
1049 25172,
1050 25600,
1051 25733,
1052 27439,
1053 38470,
1054 46802,
1055 50000,
1056 11796480,
1057 16843009,
1058 23592960,
1062 const int num_mult_constants_used =
1063 (int)(sizeof multiply_constants_used
1064 / sizeof multiply_constants_used[0]);
1067 #define XSIZE (sizeof multiply_constants_used / \
1068 sizeof multiply_constants_used[0] + 32) / 32
1069 unsigned multiply_constants_avail[XSIZE];
1070 #undef XSIZE
1073 /* bsearch helper function. */
1074 static int
1075 compare_constants (const void *key, const void *t)
1077 return (*(int*)key) - *((int*)t);
1081 static int *
1082 find_mult_constants_used (int multiplier)
1084 return (int *) bsearch (&multiplier, multiply_constants_used,
1085 num_mult_constants_used,
1086 sizeof multiply_constants_used[0],
1087 compare_constants);
1091 int num_ops (ExpressionTree *s)
1093 int n = 0;
1094 for (int i = 0; i < s->m_num_vals; i++)
1096 Expr *e = &s->m_exprs[i];
1097 if (e->m_op != NULL)
1098 n++;
1100 return n;
1104 #ifdef TILEPRO
1105 bool
1106 tilepro_emit (int multiplier, int num_ops)
1108 int abs_multiplier = (multiplier >= 0) ? multiplier : -multiplier;
1110 /* Keep constants in range [-1024, 1024]. */
1111 if (abs_multiplier <= 1024)
1112 return true;
1114 /* Keep constants near powers of two. */
1115 int prev_pow2 = 1 << (31 - __builtin_clz (abs_multiplier));
1116 int next_pow2 = prev_pow2 << 1;
1118 if ((abs_multiplier - prev_pow2 <= 10)
1119 || (next_pow2 - abs_multiplier <= 10))
1120 return true;
1122 /* Keep constants near powers of ten. */
1124 long long j = 1;
1125 long long prev_pow10;
1126 long long next_pow10;
1128 while (((j * 10) < abs_multiplier)
1129 && (j < (j * 10)))
1130 j = j * 10;
1132 prev_pow10 = j;
1133 next_pow10 = j * 10;
1135 if ((abs_multiplier - prev_pow10 <= 10)
1136 || (next_pow10 - abs_multiplier <= 10))
1137 return true;
1140 /* Keep short sequences that have two or fewer ops. */
1141 if (num_ops <= 2)
1142 return true;
1144 /* Keep constants that are mostly 0's or mostly 1's. */
1145 if (__builtin_popcount (multiplier) <= 2 ||
1146 __builtin_popcount (multiplier) >= 30)
1147 return true;
1149 /* Keep constants seen in actual code. */
1150 if ((find_mult_constants_used (multiplier)))
1151 return true;
1153 return false;
1155 #else
1156 bool
1157 tilegx_emit (long long multiplier, int num_ops)
1159 long long abs_multiplier = (multiplier >= 0) ? multiplier : - multiplier;
1161 /* Keep constants in range [-1024, 1024]. */
1162 if (abs_multiplier <= 1024)
1163 return true;
1165 /* Otherwise exclude sequences with four ops. */
1166 if (num_ops > 3)
1167 return false;
1169 /* Keep constants near powers of two. */
1171 unsigned long long prev_pow2 =
1172 1LL << (63 - __builtin_clzll (abs_multiplier));
1173 unsigned long long next_pow2 = prev_pow2 << 1;
1175 /* handle overflow case. */
1176 if (next_pow2 == 0)
1178 if (prev_pow2 - abs_multiplier <= 10)
1179 return true;
1181 else if ((abs_multiplier - prev_pow2 <= 10)
1182 || (next_pow2 - abs_multiplier <= 10))
1183 return true;
1186 /* Keep constants near powers of ten. */
1188 long long j = 1;
1189 long long prev_pow10;
1190 long long next_pow10;
1192 while (((j * 10) < abs_multiplier)
1193 && (j < (INTMAX_MAX / 10)))
1194 j = j * 10;
1196 prev_pow10 = j;
1197 next_pow10 = (j > (INTMAX_MAX / 10)) ? 0 : j * 10;
1199 if ((abs_multiplier - prev_pow10 <= 100)
1200 || (next_pow10
1201 && (next_pow10 - abs_multiplier <= 100)))
1202 return true;
1205 if (num_ops <= 2)
1206 return true;
1208 /* Keep constants that are mostly 0's or mostly 1's. */
1209 if (__builtin_popcountll (multiplier) <= 2 ||
1210 __builtin_popcountll (multiplier) >= 62)
1211 return true;
1213 /* Keep constants seen in actual code. */
1214 if (find_mult_constants_used (multiplier))
1215 return true;
1217 return false;
1219 #endif
1223 main ()
1225 ExpressionTreeMap best_solution;
1226 ExpressionTree s;
1228 #ifdef TILEPRO
1229 printf ("/* Constant multiply table for TILEPro.\n");
1230 #else
1231 printf ("/* Constant multiply table for TILE-Gx.\n");
1232 #endif
1233 printf (" Copyright (C) 2011-2014 Free Software Foundation, Inc.\n");
1234 printf (" Contributed by Walter Lee (walt@tilera.com)\n");
1235 printf ("\n");
1236 printf (" This file is part of GCC.\n");
1237 printf ("\n");
1238 printf (" GCC is free software; you can redistribute it and/or modify it\n");
1239 printf (" under the terms of the GNU General Public License as published\n");
1240 printf (" by the Free Software Foundation; either version 3, or (at your\n");
1241 printf (" option) any later version.\n");
1242 printf ("\n");
1243 printf (" GCC is distributed in the hope that it will be useful, but WITHOUT\n");
1244 printf (" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n");
1245 printf (" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public\n");
1246 printf (" License for more details.\n");
1247 printf ("\n");
1248 printf (" You should have received a copy of the GNU General Public License\n");
1249 printf (" along with GCC; see the file COPYING3. If not see\n");
1250 printf (" <http://www.gnu.org/licenses/>. */\n");
1251 printf ("\n");
1252 printf ("#include \"config.h\"\n");
1253 printf ("#include \"system.h\"\n");
1254 printf ("#include \"coretypes.h\"\n");
1255 printf ("#include \"expr.h\"\n");
1256 printf ("#include \"optabs.h\"\n");
1257 printf ("#include \"%s-multiply.h\"\n\n", ARCH);
1258 create_insn_code_compression_table ();
1260 /* Try all combinations of operators and see what constants we
1261 produce. For each possible constant, record the most efficient
1262 way to generate it. */
1263 find_sequences (s, best_solution);
1265 printf ("const struct %s_multiply_insn_seq "
1266 "%s_multiply_insn_seq_table[] = {\n",
1267 ARCH, ARCH);
1269 const char *comma_separator = "";
1271 ExpressionTreeMap::iterator i (best_solution.begin ());
1272 for (; i != best_solution.end (); ++i)
1274 ExpressionTree *s = (*i).second;
1275 const MUL_TYPE n = (*i).first;
1277 if (n == 0 || n == 1)
1279 /* Both of these require zero operations, so these entries
1280 are bogus and should never be used. */
1281 continue;
1284 /* Prune the list of entries to keep the table to a reasonable
1285 size. */
1286 #ifdef TILEPRO
1287 if (!tilepro_emit (n, num_ops (s)))
1288 continue;
1289 #else
1290 if (!tilegx_emit (n, num_ops (s)))
1291 continue;
1292 #endif
1294 printf ("%s", comma_separator);
1296 #ifdef TILEPRO
1297 const MUL_TYPE int_min = INT32_MIN;
1298 #else
1299 const MUL_TYPE int_min = INT64_MIN;
1300 #endif
1301 if (n == int_min)
1303 /* Handle C's woeful lack of unary negation. Without this,
1304 printing out INT_MIN in decimal will yield an unsigned
1305 int which could generate a compiler warning. */
1306 #ifdef TILEPRO
1307 printf (" {%d - 1 /* 0x%x */ ,\n {", n + 1,
1308 (unsigned) n);
1309 #else
1310 printf (" {%lldll - 1 /* 0x%llx */ ,\n {", n + 1,
1311 (unsigned MUL_TYPE) n);
1312 #endif
1314 else
1316 #ifdef TILEPRO
1317 printf (" {%d /* 0x%x */ ,\n {", n, (unsigned) n);
1318 #else
1319 printf (" {%lldll /* 0x%llx */ ,\n {", n, (unsigned MUL_TYPE) n);
1320 #endif
1323 bool first = true;
1324 for (int j = 0; j < s->m_num_vals; j++)
1326 Expr *e = &s->m_exprs[j];
1328 const Operator *op = e->m_op;
1329 if (op == NULL)
1330 continue;
1332 char buf[1024];
1333 snprintf (buf, sizeof buf, "%s{%d, %d, %d}%s",
1334 first ? "" : " ",
1335 op->m_top_index,
1336 e->m_lhs, e->m_rhs, (j + 1) == s->m_num_vals ? "}" : ",");
1338 char opnd0[10];
1339 if (e->m_lhs)
1340 snprintf (opnd0, sizeof opnd0, "r%d", e->m_lhs);
1341 else
1342 snprintf (opnd0, sizeof opnd0, "zero");
1344 printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n",
1345 buf, op->m_name, j, opnd0,
1346 op->is_unary () ? "" : "r", e->m_rhs);
1348 first = false;
1350 printf (" }");
1351 comma_separator = ",\n";
1354 printf ("\n};\n\n");
1355 printf ("const int %s_multiply_insn_seq_table_size =\n"
1356 " (int) (sizeof %s_multiply_insn_seq_table\n"
1357 " / sizeof %s_multiply_insn_seq_table[0]);\n",
1358 ARCH, ARCH, ARCH);
1360 return EXIT_SUCCESS;