gcc/tree-vect-patterns.c

   1 /* Analysis Utilities for Loop Vectorization.
   2    Copyright (C) 2006, 2007 Free Software Foundation, Inc.
   3    Contributed by Dorit Nuzman <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "ggc.h"
  26 #include "tree.h"
  27
  28 #include "target.h"
  29 #include "basic-block.h"
  30 #include "diagnostic.h"
  31 #include "tree-flow.h"
  32 #include "tree-dump.h"
  33 #include "timevar.h"
  34 #include "cfgloop.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "params.h"
  38 #include "tree-data-ref.h"
  39 #include "tree-vectorizer.h"
  40 #include "recog.h"
  41 #include "toplev.h"
  42
  43 /* Function prototypes */
  44 static void vect_pattern_recog_1
  45   (tree (* ) (tree, tree *, tree *), block_stmt_iterator);
  46 static bool widened_name_p (tree, tree, tree *, tree *);
  47
  48 /* Pattern recognition functions  */
  49 static tree vect_recog_widen_sum_pattern (tree, tree *, tree *);
  50 static tree vect_recog_widen_mult_pattern (tree, tree *, tree *);
  51 static tree vect_recog_dot_prod_pattern (tree, tree *, tree *);
  52 static tree vect_recog_pow_pattern (tree, tree *, tree *);
  53 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
  54         vect_recog_widen_mult_pattern,
  55         vect_recog_widen_sum_pattern,
  56         vect_recog_dot_prod_pattern,
  57         vect_recog_pow_pattern};
  58
  59
  60 /* Function widened_name_p
  61
  62    Check whether NAME, an ssa-name used in USE_STMT,
  63    is a result of a type-promotion, such that:
  64      DEF_STMT: NAME = NOP (name0)
  65    where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
  66 */
  67
  68 static bool
  69 widened_name_p (tree name, tree use_stmt, tree *half_type, tree *def_stmt)
  70 {
  71   tree dummy;
  72   loop_vec_info loop_vinfo;
  73   stmt_vec_info stmt_vinfo;
  74   tree expr;
  75   tree type = TREE_TYPE (name);
  76   tree oprnd0;
  77   enum vect_def_type dt;
  78   tree def;
  79
  80   stmt_vinfo = vinfo_for_stmt (use_stmt);
  81   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
  82
  83   if (!vect_is_simple_use (name, loop_vinfo, def_stmt, &def, &dt))
  84     return false;
  85
  86   if (dt != vect_loop_def
  87       && dt != vect_invariant_def && dt != vect_constant_def)
  88     return false;
  89
  90   if (! *def_stmt)
  91     return false;
  92
  93   if (TREE_CODE (*def_stmt) != GIMPLE_MODIFY_STMT)
  94     return false;
  95
  96   expr = GIMPLE_STMT_OPERAND (*def_stmt, 1);
  97   if (TREE_CODE (expr) != NOP_EXPR)
  98     return false;
  99
 100   oprnd0 = TREE_OPERAND (expr, 0);
 101
 102   *half_type = TREE_TYPE (oprnd0);
 103   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
 104       || (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type))
 105       || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
 106     return false;
 107
 108   if (!vect_is_simple_use (oprnd0, loop_vinfo, &dummy, &dummy, &dt))
 109     return false;
 110
 111   return true;
 112 }
 113
 114
 115 /* Function vect_recog_dot_prod_pattern
 116
 117    Try to find the following pattern:
 118
 119      type x_t, y_t;
 120      TYPE1 prod;
 121      TYPE2 sum = init;
 122    loop:
 123      sum_0 = phi <init, sum_1>
 124      S1  x_t = ...
 125      S2  y_t = ...
 126      S3  x_T = (TYPE1) x_t;
 127      S4  y_T = (TYPE1) y_t;
 128      S5  prod = x_T * y_T;
 129      [S6  prod = (TYPE2) prod;  #optional]
 130      S7  sum_1 = prod + sum_0;
 131
 132    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
 133    same size of 'TYPE1' or bigger. This is a special case of a reduction
 134    computation.
 135
 136    Input:
 137
 138    * LAST_STMT: A stmt from which the pattern search begins. In the example,
 139    when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be
 140    detected.
 141
 142    Output:
 143
 144    * TYPE_IN: The type of the input arguments to the pattern.
 145
 146    * TYPE_OUT: The type of the output  of this pattern.
 147
 148    * Return value: A new stmt that will be used to replace the sequence of
 149    stmts that constitute the pattern. In this case it will be:
 150         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
 151
 152    Note: The dot-prod idiom is a widening reduction pattern that is
 153          vectorized without preserving all the intermediate results. It
 154          produces only N/2 (widened) results (by summing up pairs of
 155          intermediate results) rather than all N results.  Therefore, we
 156          cannot allow this pattern when we want to get all the results and in
 157          the correct order (as is the case when this computation is in an
 158          inner-loop nested in an outer-loop that us being vectorized).  */
 159
 160 static tree
 161 vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out)
 162 {
 163   tree stmt, expr;
 164   tree oprnd0, oprnd1;
 165   tree oprnd00, oprnd01;
 166   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
 167   tree type, half_type;
 168   tree pattern_expr;
 169   tree prod_type;
 170   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
 171   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
 172
 173   if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT)
 174     return NULL;
 175
 176   expr = GIMPLE_STMT_OPERAND (last_stmt, 1);
 177   type = TREE_TYPE (expr);
 178
 179   /* Look for the following pattern
 180           DX = (TYPE1) X;
 181           DY = (TYPE1) Y;
 182           DPROD = DX * DY;
 183           DDPROD = (TYPE2) DPROD;
 184           sum_1 = DDPROD + sum_0;
 185      In which
 186      - DX is double the size of X
 187      - DY is double the size of Y
 188      - DX, DY, DPROD all have the same type
 189      - sum is the same size of DPROD or bigger
 190      - sum has been recognized as a reduction variable.
 191
 192      This is equivalent to:
 193        DPROD = X w* Y;          #widen mult
 194        sum_1 = DPROD w+ sum_0;  #widen summation
 195      or
 196        DPROD = X w* Y;          #widen mult
 197        sum_1 = DPROD + sum_0;   #summation
 198    */
 199
 200   /* Starting from LAST_STMT, follow the defs of its uses in search
 201      of the above pattern.  */
 202
 203   if (TREE_CODE (expr) != PLUS_EXPR)
 204     return NULL;
 205
 206   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
 207     {
 208       /* Has been detected as widening-summation?  */
 209
 210       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
 211       expr = GIMPLE_STMT_OPERAND (stmt, 1);
 212       type = TREE_TYPE (expr);
 213       if (TREE_CODE (expr) != WIDEN_SUM_EXPR)
 214         return NULL;
 215       oprnd0 = TREE_OPERAND (expr, 0);
 216       oprnd1 = TREE_OPERAND (expr, 1);
 217       half_type = TREE_TYPE (oprnd0);
 218     }
 219   else
 220     {
 221       tree def_stmt;
 222
 223       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
 224         return NULL;
 225       oprnd0 = TREE_OPERAND (expr, 0);
 226       oprnd1 = TREE_OPERAND (expr, 1);
 227       if (TYPE_MAIN_VARIANT (TREE_TYPE (oprnd0)) != TYPE_MAIN_VARIANT (type)
 228           || TYPE_MAIN_VARIANT (TREE_TYPE (oprnd1)) != TYPE_MAIN_VARIANT (type))
 229         return NULL;
 230       stmt = last_stmt;
 231
 232       if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt))
 233         {
 234           stmt = def_stmt;
 235           expr = GIMPLE_STMT_OPERAND (stmt, 1);
 236           oprnd0 = TREE_OPERAND (expr, 0);
 237         }
 238       else
 239         half_type = type;
 240     }
 241
 242   /* So far so good. Since last_stmt was detected as a (summation) reduction,
 243      we know that oprnd1 is the reduction variable (defined by a loop-header
 244      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
 245      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
 246
 247   prod_type = half_type;
 248   stmt = SSA_NAME_DEF_STMT (oprnd0);
 249   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
 250      inside the loop (in case we are analyzing an outer-loop).  */
 251   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 252     return NULL;
 253   stmt_vinfo = vinfo_for_stmt (stmt);
 254   gcc_assert (stmt_vinfo);
 255   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_loop_def)
 256     return NULL;
 257   expr = GIMPLE_STMT_OPERAND (stmt, 1);
 258   if (TREE_CODE (expr) != MULT_EXPR)
 259     return NULL;
 260   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
 261     {
 262       /* Has been detected as a widening multiplication?  */
 263
 264       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
 265       expr = GIMPLE_STMT_OPERAND (stmt, 1);
 266       if (TREE_CODE (expr) != WIDEN_MULT_EXPR)
 267         return NULL;
 268       stmt_vinfo = vinfo_for_stmt (stmt);
 269       gcc_assert (stmt_vinfo);
 270       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_loop_def);
 271       oprnd00 = TREE_OPERAND (expr, 0);
 272       oprnd01 = TREE_OPERAND (expr, 1);
 273     }
 274   else
 275     {
 276       tree half_type0, half_type1;
 277       tree def_stmt;
 278       tree oprnd0, oprnd1;
 279
 280       oprnd0 = TREE_OPERAND (expr, 0);
 281       oprnd1 = TREE_OPERAND (expr, 1);
 282       if (TYPE_MAIN_VARIANT (TREE_TYPE (oprnd0))
 283                                 != TYPE_MAIN_VARIANT (prod_type)
 284           || TYPE_MAIN_VARIANT (TREE_TYPE (oprnd1))
 285                                 != TYPE_MAIN_VARIANT (prod_type))
 286         return NULL;
 287       if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt))
 288         return NULL;
 289       oprnd00 = TREE_OPERAND (GIMPLE_STMT_OPERAND (def_stmt, 1), 0);
 290       if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt))
 291         return NULL;
 292       oprnd01 = TREE_OPERAND (GIMPLE_STMT_OPERAND (def_stmt, 1), 0);
 293       if (TYPE_MAIN_VARIANT (half_type0) != TYPE_MAIN_VARIANT (half_type1))
 294         return NULL;
 295       if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
 296         return NULL;
 297     }
 298
 299   half_type = TREE_TYPE (oprnd00);
 300   *type_in = half_type;
 301   *type_out = type;
 302
 303   /* Pattern detected. Create a stmt to be used to replace the pattern: */
 304   pattern_expr = build3 (DOT_PROD_EXPR, type, oprnd00, oprnd01, oprnd1);
 305   if (vect_print_dump_info (REPORT_DETAILS))
 306     {
 307       fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
 308       print_generic_expr (vect_dump, pattern_expr, TDF_SLIM);
 309     }
 310
 311   /* We don't allow changing the order of the computation in the inner-loop
 312      when doing outer-loop vectorization.  */
 313   if (nested_in_vect_loop_p (loop, last_stmt))
 314     {
 315       if (vect_print_dump_info (REPORT_DETAILS))
 316         fprintf (vect_dump, "vect_recog_dot_prod_pattern: not allowed.");
 317       return NULL;
 318     }
 319
 320   return pattern_expr;
 321 }
 322
 323
 324 /* Function vect_recog_widen_mult_pattern
 325
 326    Try to find the following pattern:
 327
 328      type a_t, b_t;
 329      TYPE a_T, b_T, prod_T;
 330
 331      S1  a_t = ;
 332      S2  b_t = ;
 333      S3  a_T = (TYPE) a_t;
 334      S4  b_T = (TYPE) b_t;
 335      S5  prod_T = a_T * b_T;
 336
 337    where type 'TYPE' is at least double the size of type 'type'.
 338
 339    Input:
 340
 341    * LAST_STMT: A stmt from which the pattern search begins. In the example,
 342    when this function is called with S5, the pattern {S3,S4,S5} is be detected.
 343
 344    Output:
 345
 346    * TYPE_IN: The type of the input arguments to the pattern.
 347
 348    * TYPE_OUT: The type of the output  of this pattern.
 349
 350    * Return value: A new stmt that will be used to replace the sequence of
 351    stmts that constitute the pattern. In this case it will be:
 352         WIDEN_MULT <a_t, b_t>
 353 */
 354
 355 static tree
 356 vect_recog_widen_mult_pattern (tree last_stmt,
 357                                tree *type_in,
 358                                tree *type_out)
 359 {
 360   tree expr;
 361   tree def_stmt0, def_stmt1;
 362   tree oprnd0, oprnd1;
 363   tree type, half_type0, half_type1;
 364   tree pattern_expr;
 365   tree vectype;
 366   tree dummy;
 367   enum tree_code dummy_code;
 368
 369   if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT)
 370     return NULL;
 371
 372   expr = GIMPLE_STMT_OPERAND (last_stmt, 1);
 373   type = TREE_TYPE (expr);
 374
 375   /* Starting from LAST_STMT, follow the defs of its uses in search
 376      of the above pattern.  */
 377
 378   if (TREE_CODE (expr) != MULT_EXPR)
 379     return NULL;
 380
 381   oprnd0 = TREE_OPERAND (expr, 0);
 382   oprnd1 = TREE_OPERAND (expr, 1);
 383   if (TYPE_MAIN_VARIANT (TREE_TYPE (oprnd0)) != TYPE_MAIN_VARIANT (type)
 384       || TYPE_MAIN_VARIANT (TREE_TYPE (oprnd1)) != TYPE_MAIN_VARIANT (type))
 385     return NULL;
 386
 387   /* Check argument 0 */
 388   if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0))
 389     return NULL;
 390   oprnd0 = TREE_OPERAND (GIMPLE_STMT_OPERAND (def_stmt0, 1), 0);
 391
 392   /* Check argument 1 */
 393   if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1))
 394     return NULL;
 395   oprnd1 = TREE_OPERAND (GIMPLE_STMT_OPERAND (def_stmt1, 1), 0);
 396
 397   if (TYPE_MAIN_VARIANT (half_type0) != TYPE_MAIN_VARIANT (half_type1))
 398     return NULL;
 399
 400   /* Pattern detected.  */
 401   if (vect_print_dump_info (REPORT_DETAILS))
 402     fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
 403
 404   /* Check target support  */
 405   vectype = get_vectype_for_scalar_type (half_type0);
 406   if (!vectype
 407       || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, vectype,
 408                                        &dummy, &dummy, &dummy_code,
 409                                        &dummy_code))
 410     return NULL;
 411
 412   *type_in = vectype;
 413   *type_out = NULL_TREE;
 414
 415   /* Pattern supported. Create a stmt to be used to replace the pattern: */
 416   pattern_expr = build2 (WIDEN_MULT_EXPR, type, oprnd0, oprnd1);
 417   if (vect_print_dump_info (REPORT_DETAILS))
 418     print_generic_expr (vect_dump, pattern_expr, TDF_SLIM);
 419   return pattern_expr;
 420 }
 421
 422
 423 /* Function vect_recog_pow_pattern
 424
 425    Try to find the following pattern:
 426
 427      x = POW (y, N);
 428
 429    with POW being one of pow, powf, powi, powif and N being
 430    either 2 or 0.5.
 431
 432    Input:
 433
 434    * LAST_STMT: A stmt from which the pattern search begins.
 435
 436    Output:
 437
 438    * TYPE_IN: The type of the input arguments to the pattern.
 439
 440    * TYPE_OUT: The type of the output of this pattern.
 441
 442    * Return value: A new stmt that will be used to replace the sequence of
 443    stmts that constitute the pattern. In this case it will be:
 444         x * x
 445    or
 446         sqrt (x)
 447 */
 448
 449 static tree
 450 vect_recog_pow_pattern (tree last_stmt, tree *type_in, tree *type_out)
 451 {
 452   tree expr;
 453   tree type;
 454   tree fn, base, exp;
 455
 456   if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT)
 457     return NULL;
 458
 459   expr = GIMPLE_STMT_OPERAND (last_stmt, 1);
 460   type = TREE_TYPE (expr);
 461
 462   if (TREE_CODE (expr) != CALL_EXPR)
 463     return NULL_TREE;
 464
 465   fn = get_callee_fndecl (expr);
 466   switch (DECL_FUNCTION_CODE (fn))
 467     {
 468     case BUILT_IN_POWIF:
 469     case BUILT_IN_POWI:
 470     case BUILT_IN_POWF:
 471     case BUILT_IN_POW:
 472       base = CALL_EXPR_ARG (expr, 0);
 473       exp = CALL_EXPR_ARG (expr, 1);
 474       if (TREE_CODE (exp) != REAL_CST
 475           && TREE_CODE (exp) != INTEGER_CST)
 476         return NULL_TREE;
 477       break;
 478
 479     default:;
 480       return NULL_TREE;
 481     }
 482
 483   /* We now have a pow or powi builtin function call with a constant
 484      exponent.  */
 485
 486   *type_out = NULL_TREE;
 487
 488   /* Catch squaring.  */
 489   if ((host_integerp (exp, 0)
 490        && tree_low_cst (exp, 0) == 2)
 491       || (TREE_CODE (exp) == REAL_CST
 492           && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
 493     {
 494       *type_in = TREE_TYPE (base);
 495       return build2 (MULT_EXPR, TREE_TYPE (base), base, base);
 496     }
 497
 498   /* Catch square root.  */
 499   if (TREE_CODE (exp) == REAL_CST
 500       && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
 501     {
 502       tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
 503       *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
 504       if (*type_in)
 505         {
 506           newfn = build_call_expr (newfn, 1, base);
 507           if (vectorizable_function (newfn, *type_in, *type_in) != NULL_TREE)
 508             return newfn;
 509         }
 510     }
 511
 512   return NULL_TREE;
 513 }
 514
 515
 516 /* Function vect_recog_widen_sum_pattern
 517
 518    Try to find the following pattern:
 519
 520      type x_t;
 521      TYPE x_T, sum = init;
 522    loop:
 523      sum_0 = phi <init, sum_1>
 524      S1  x_t = *p;
 525      S2  x_T = (TYPE) x_t;
 526      S3  sum_1 = x_T + sum_0;
 527
 528    where type 'TYPE' is at least double the size of type 'type', i.e - we're
 529    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
 530    a special case of a reduction computation.
 531
 532    Input:
 533
 534    * LAST_STMT: A stmt from which the pattern search begins. In the example,
 535    when this function is called with S3, the pattern {S2,S3} will be detected.
 536
 537    Output:
 538
 539    * TYPE_IN: The type of the input arguments to the pattern.
 540
 541    * TYPE_OUT: The type of the output of this pattern.
 542
 543    * Return value: A new stmt that will be used to replace the sequence of
 544    stmts that constitute the pattern. In this case it will be:
 545         WIDEN_SUM <x_t, sum_0>
 546
 547    Note: The widening-sum idiom is a widening reduction pattern that is
 548          vectorized without preserving all the intermediate results. It
 549          produces only N/2 (widened) results (by summing up pairs of
 550          intermediate results) rather than all N results.  Therefore, we
 551          cannot allow this pattern when we want to get all the results and in
 552          the correct order (as is the case when this computation is in an
 553          inner-loop nested in an outer-loop that us being vectorized).  */
 554
 555 static tree
 556 vect_recog_widen_sum_pattern (tree last_stmt, tree *type_in, tree *type_out)
 557 {
 558   tree stmt, expr;
 559   tree oprnd0, oprnd1;
 560   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
 561   tree type, half_type;
 562   tree pattern_expr;
 563   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
 564   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
 565
 566   if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT)
 567     return NULL;
 568
 569   expr = GIMPLE_STMT_OPERAND (last_stmt, 1);
 570   type = TREE_TYPE (expr);
 571
 572   /* Look for the following pattern
 573           DX = (TYPE) X;
 574           sum_1 = DX + sum_0;
 575      In which DX is at least double the size of X, and sum_1 has been
 576      recognized as a reduction variable.
 577    */
 578
 579   /* Starting from LAST_STMT, follow the defs of its uses in search
 580      of the above pattern.  */
 581
 582   if (TREE_CODE (expr) != PLUS_EXPR)
 583     return NULL;
 584
 585   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
 586     return NULL;
 587
 588   oprnd0 = TREE_OPERAND (expr, 0);
 589   oprnd1 = TREE_OPERAND (expr, 1);
 590   if (TYPE_MAIN_VARIANT (TREE_TYPE (oprnd0)) != TYPE_MAIN_VARIANT (type)
 591       || TYPE_MAIN_VARIANT (TREE_TYPE (oprnd1)) != TYPE_MAIN_VARIANT (type))
 592     return NULL;
 593
 594   /* So far so good. Since last_stmt was detected as a (summation) reduction,
 595      we know that oprnd1 is the reduction variable (defined by a loop-header
 596      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
 597      Left to check that oprnd0 is defined by a cast from type 'type' to type
 598      'TYPE'.  */
 599
 600   if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt))
 601     return NULL;
 602
 603   oprnd0 = TREE_OPERAND (GIMPLE_STMT_OPERAND (stmt, 1), 0);
 604   *type_in = half_type;
 605   *type_out = type;
 606
 607   /* Pattern detected. Create a stmt to be used to replace the pattern: */
 608   pattern_expr = build2 (WIDEN_SUM_EXPR, type, oprnd0, oprnd1);
 609   if (vect_print_dump_info (REPORT_DETAILS))
 610     {
 611       fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
 612       print_generic_expr (vect_dump, pattern_expr, TDF_SLIM);
 613     }
 614
 615   /* We don't allow changing the order of the computation in the inner-loop
 616      when doing outer-loop vectorization.  */
 617   if (nested_in_vect_loop_p (loop, last_stmt))
 618     {
 619       if (vect_print_dump_info (REPORT_DETAILS))
 620         fprintf (vect_dump, "vect_recog_widen_sum_pattern: not allowed.");
 621       return NULL;
 622     }
 623
 624   return pattern_expr;
 625 }
 626
 627
 628 /* Function vect_pattern_recog_1
 629
 630    Input:
 631    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
 632         computation pattern.
 633    STMT: A stmt from which the pattern search should start.
 634
 635    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
 636    expression that computes the same functionality and can be used to
 637    replace the sequence of stmts that are involved in the pattern.
 638
 639    Output:
 640    This function checks if the expression returned by PATTERN_RECOG_FUNC is
 641    supported in vector form by the target.  We use 'TYPE_IN' to obtain the
 642    relevant vector type. If 'TYPE_IN' is already a vector type, then this
 643    indicates that target support had already been checked by PATTERN_RECOG_FUNC.
 644    If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
 645    to the available target pattern.
 646
 647    This function also does some bookkeeping, as explained in the documentation
 648    for vect_recog_pattern.  */
 649
 650 static void
 651 vect_pattern_recog_1 (
 652         tree (* vect_recog_func) (tree, tree *, tree *),
 653         block_stmt_iterator si)
 654 {
 655   tree stmt = bsi_stmt (si);
 656   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 657   stmt_vec_info pattern_stmt_info;
 658   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
 659   tree pattern_expr;
 660   tree pattern_vectype;
 661   tree type_in, type_out;
 662   tree pattern_type;
 663   enum tree_code code;
 664   tree var, var_name;
 665   stmt_ann_t ann;
 666
 667   pattern_expr = (* vect_recog_func) (stmt, &type_in, &type_out);
 668   if (!pattern_expr)
 669     return;
 670
 671   if (VECTOR_MODE_P (TYPE_MODE (type_in)))
 672     {
 673       /* No need to check target support (already checked by the pattern
 674          recognition function).  */
 675       pattern_vectype = type_in;
 676     }
 677   else
 678     {
 679       enum tree_code vec_mode;
 680       enum insn_code icode;
 681       optab optab;
 682
 683       /* Check target support  */
 684       pattern_vectype = get_vectype_for_scalar_type (type_in);
 685       if (!pattern_vectype)
 686         return;
 687
 688       optab = optab_for_tree_code (TREE_CODE (pattern_expr), pattern_vectype,
 689                                    optab_default);
 690       vec_mode = TYPE_MODE (pattern_vectype);
 691       if (!optab
 692           || (icode = optab_handler (optab, vec_mode)->insn_code) ==
 693               CODE_FOR_nothing
 694           || (type_out
 695               && (!get_vectype_for_scalar_type (type_out)
 696                   || (insn_data[icode].operand[0].mode !=
 697                       TYPE_MODE (get_vectype_for_scalar_type (type_out))))))
 698         return;
 699     }
 700
 701   /* Found a vectorizable pattern.  */
 702   if (vect_print_dump_info (REPORT_DETAILS))
 703     {
 704       fprintf (vect_dump, "pattern recognized: ");
 705       print_generic_expr (vect_dump, pattern_expr, TDF_SLIM);
 706     }
 707
 708   /* Mark the stmts that are involved in the pattern,
 709      create a new stmt to express the pattern and insert it.  */
 710   code = TREE_CODE (pattern_expr);
 711   pattern_type = TREE_TYPE (pattern_expr);
 712   var = create_tmp_var (pattern_type, "patt");
 713   add_referenced_var (var);
 714   var_name = make_ssa_name (var, NULL_TREE);
 715   pattern_expr = build_gimple_modify_stmt (var_name, pattern_expr);
 716   SSA_NAME_DEF_STMT (var_name) = pattern_expr;
 717   bsi_insert_before (&si, pattern_expr, BSI_SAME_STMT);
 718   ann = stmt_ann (pattern_expr);
 719   set_stmt_info (ann, new_stmt_vec_info (pattern_expr, loop_vinfo));
 720   pattern_stmt_info = vinfo_for_stmt (pattern_expr);
 721
 722   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
 723   STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info);
 724   STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
 725   STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
 726   STMT_VINFO_RELATED_STMT (stmt_info) = pattern_expr;
 727
 728   return;
 729 }
 730
 731
 732 /* Function vect_pattern_recog
 733
 734    Input:
 735    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
 736         computation idioms.
 737
 738    Output - for each computation idiom that is detected we insert a new stmt
 739         that provides the same functionality and that can be vectorized. We
 740         also record some information in the struct_stmt_info of the relevant
 741         stmts, as explained below:
 742
 743    At the entry to this function we have the following stmts, with the
 744    following initial value in the STMT_VINFO fields:
 745
 746          stmt                     in_pattern_p  related_stmt    vec_stmt
 747          S1: a_i = ....                 -       -               -
 748          S2: a_2 = ..use(a_i)..         -       -               -
 749          S3: a_1 = ..use(a_2)..         -       -               -
 750          S4: a_0 = ..use(a_1)..         -       -               -
 751          S5: ... = ..use(a_0)..         -       -               -
 752
 753    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
 754    represented by a single stmt. We then:
 755    - create a new stmt S6 that will replace the pattern.
 756    - insert the new stmt S6 before the last stmt in the pattern
 757    - fill in the STMT_VINFO fields as follows:
 758
 759                                   in_pattern_p  related_stmt    vec_stmt
 760          S1: a_i = ....                 -       -               -
 761          S2: a_2 = ..use(a_i)..         -       -               -
 762          S3: a_1 = ..use(a_2)..         -       -               -
 763        > S6: a_new = ....               -       S4              -
 764          S4: a_0 = ..use(a_1)..         true    S6              -
 765          S5: ... = ..use(a_0)..         -       -               -
 766
 767    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
 768     to each other through the RELATED_STMT field).
 769
 770    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
 771    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
 772    remain irrelevant unless used by stmts other than S4.
 773
 774    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
 775    (because they are marked as irrelevant). It will vectorize S6, and record
 776    a pointer to the new vector stmt VS6 both from S6 (as usual), and also
 777    from S4. We do that so that when we get to vectorizing stmts that use the
 778    def of S4 (like S5 that uses a_0), we'll know where to take the relevant
 779    vector-def from. S4 will be skipped, and S5 will be vectorized as usual:
 780
 781                                   in_pattern_p  related_stmt    vec_stmt
 782          S1: a_i = ....                 -       -               -
 783          S2: a_2 = ..use(a_i)..         -       -               -
 784          S3: a_1 = ..use(a_2)..         -       -               -
 785        > VS6: va_new = ....             -       -               -
 786          S6: a_new = ....               -       S4              VS6
 787          S4: a_0 = ..use(a_1)..         true    S6              VS6
 788        > VS5: ... = ..vuse(va_new)..    -       -               -
 789          S5: ... = ..use(a_0)..         -       -               -
 790
 791    DCE could then get rid of {S1,S2,S3,S4,S5,S6} (if their defs are not used
 792    elsewhere), and we'll end up with:
 793
 794         VS6: va_new = ....
 795         VS5: ... = ..vuse(va_new)..
 796
 797    If vectorization does not succeed, DCE will clean S6 away (its def is
 798    not used), and we'll end up with the original sequence.
 799 */
 800
 801 void
 802 vect_pattern_recog (loop_vec_info loop_vinfo)
 803 {
 804   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 805   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 806   unsigned int nbbs = loop->num_nodes;
 807   block_stmt_iterator si;
 808   tree stmt;
 809   unsigned int i, j;
 810   tree (* vect_recog_func_ptr) (tree, tree *, tree *);
 811
 812   if (vect_print_dump_info (REPORT_DETAILS))
 813     fprintf (vect_dump, "=== vect_pattern_recog ===");
 814
 815   /* Scan through the loop stmts, applying the pattern recognition
 816      functions starting at each stmt visited:  */
 817   for (i = 0; i < nbbs; i++)
 818     {
 819       basic_block bb = bbs[i];
 820       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
 821         {
 822           stmt = bsi_stmt (si);
 823
 824           /* Scan over all generic vect_recog_xxx_pattern functions.  */
 825           for (j = 0; j < NUM_PATTERNS; j++)
 826             {
 827               vect_recog_func_ptr = vect_vect_recog_func_ptrs[j];
 828               vect_pattern_recog_1 (vect_recog_func_ptr, si);
 829             }
 830         }
 831     }
 832 }