gcc/vector-builder.h

   1 /* A class for building vector constant patterns.
   2    Copyright (C) 2017-2019 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #ifndef GCC_VECTOR_BUILDER_H
  21 #define GCC_VECTOR_BUILDER_H
  22
  23 /* This class is a wrapper around auto_vec<T> for building vectors of T.
  24    It aims to encode each vector as npatterns interleaved patterns,
  25    where each pattern represents a sequence:
  26
  27      { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }
  28
  29    The first three elements in each pattern provide enough information
  30    to derive the other elements.  If all patterns have a STEP of zero,
  31    we only need to encode the first two elements in each pattern.
  32    If BASE1 is also equal to BASE0 for all patterns, we only need to
  33    encode the first element in each pattern.  The number of encoded
  34    elements per pattern is given by nelts_per_pattern.
  35
  36    The class can be used in two ways:
  37
  38    1. It can be used to build a full image of the vector, which is then
  39       canonicalized by finalize ().  In this case npatterns is initially
  40       the number of elements in the vector and nelts_per_pattern is
  41       initially 1.
  42
  43    2. It can be used to build a vector that already has a known encoding.
  44       This is preferred since it is more efficient and copes with
  45       variable-length vectors.  finalize () then canonicalizes the encoding
  46       to a simpler form if possible.
  47
  48    Shape is the type that specifies the number of elements in the vector
  49    and (where relevant) the type of each element.
  50
  51    The derived class Derived provides the functionality of this class
  52    for specific Ts.  Derived needs to provide the following interface:
  53
  54       bool equal_p (T elt1, T elt2) const;
  55
  56           Return true if elements ELT1 and ELT2 are equal.
  57
  58       bool allow_steps_p () const;
  59
  60           Return true if a stepped representation is OK.  We don't allow
  61           linear series for anything other than integers, to avoid problems
  62           with rounding.
  63
  64       bool integral_p (T elt) const;
  65
  66           Return true if element ELT can be interpreted as an integer.
  67
  68       StepType step (T elt1, T elt2) const;
  69
  70           Return the value of element ELT2 minus the value of element ELT1,
  71           given integral_p (ELT1) && integral_p (ELT2).  There is no fixed
  72           choice of StepType.
  73
  74       T apply_step (T base, unsigned int factor, StepType step) const;
  75
  76           Return a vector element with the value BASE + FACTOR * STEP.
  77
  78       bool can_elide_p (T elt) const;
  79
  80           Return true if we can drop element ELT, even if the retained
  81           elements are different.  This is provided for TREE_OVERFLOW
  82           handling.
  83
  84       void note_representative (T *elt1_ptr, T elt2);
  85
  86           Record that ELT2 is being elided, given that ELT1_PTR points to
  87           the last encoded element for the containing pattern.  This is
  88           again provided for TREE_OVERFLOW handling.
  89
  90       static poly_uint64 shape_nelts (Shape shape);
  91
  92           Return the number of elements in SHAPE.
  93
  94     The class provides additional functionality for the case in which
  95     T can describe a vector constant as well as an individual element.
  96     This functionality requires:
  97
  98       static poly_uint64 nelts_of (T x);
  99
 100           Return the number of elements in vector constant X.
 101
 102       static unsigned int npatterns_of (T x);
 103
 104           Return the number of patterns used to encode vector constant X.
 105
 106       static unsigned int nelts_per_pattern_of (T x);
 107
 108           Return the number of elements used to encode each pattern
 109           in vector constant X.  */
 110
 111 template<typename T, typename Shape, typename Derived>
 112 class vector_builder : public auto_vec<T, 32>
 113 {
 114 public:
 115   vector_builder ();
 116
 117   poly_uint64 full_nelts () const { return m_full_nelts; }
 118   unsigned int npatterns () const { return m_npatterns; }
 119   unsigned int nelts_per_pattern () const { return m_nelts_per_pattern; }
 120   unsigned int encoded_nelts () const;
 121   bool encoded_full_vector_p () const;
 122   T elt (unsigned int) const;
 123   unsigned int count_dups (int, int, int) const;
 124
 125   bool operator == (const Derived &) const;
 126   bool operator != (const Derived &x) const { return !operator == (x); }
 127
 128   bool new_unary_operation (Shape, T, bool);
 129   bool new_binary_operation (Shape, T, T, bool);
 130
 131   void finalize ();
 132
 133   static unsigned int binary_encoded_nelts (T, T);
 134
 135 protected:
 136   void new_vector (poly_uint64, unsigned int, unsigned int);
 137   void reshape (unsigned int, unsigned int);
 138   bool repeating_sequence_p (unsigned int, unsigned int, unsigned int);
 139   bool stepped_sequence_p (unsigned int, unsigned int, unsigned int);
 140   bool try_npatterns (unsigned int);
 141
 142 private:
 143   vector_builder (const vector_builder &);
 144   vector_builder &operator= (const vector_builder &);
 145   Derived *derived () { return static_cast<Derived *> (this); }
 146   const Derived *derived () const;
 147
 148   poly_uint64 m_full_nelts;
 149   unsigned int m_npatterns;
 150   unsigned int m_nelts_per_pattern;
 151 };
 152
 153 template<typename T, typename Shape, typename Derived>
 154 inline const Derived *
 155 vector_builder<T, Shape, Derived>::derived () const
 156 {
 157   return static_cast<const Derived *> (this);
 158 }
 159
 160 template<typename T, typename Shape, typename Derived>
 161 inline
 162 vector_builder<T, Shape, Derived>::vector_builder ()
 163   : m_full_nelts (0),
 164     m_npatterns (0),
 165     m_nelts_per_pattern (0)
 166 {}
 167
 168 /* Return the number of elements that are explicitly encoded.  The vec
 169    starts with these explicitly-encoded elements and may contain additional
 170    elided elements.  */
 171
 172 template<typename T, typename Shape, typename Derived>
 173 inline unsigned int
 174 vector_builder<T, Shape, Derived>::encoded_nelts () const
 175 {
 176   return m_npatterns * m_nelts_per_pattern;
 177 }
 178
 179 /* Return true if every element of the vector is explicitly encoded.  */
 180
 181 template<typename T, typename Shape, typename Derived>
 182 inline bool
 183 vector_builder<T, Shape, Derived>::encoded_full_vector_p () const
 184 {
 185   return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts);
 186 }
 187
 188 /* Start building a vector that has FULL_NELTS elements.  Initially
 189    encode it using NPATTERNS patterns with NELTS_PER_PATTERN each.  */
 190
 191 template<typename T, typename Shape, typename Derived>
 192 void
 193 vector_builder<T, Shape, Derived>::new_vector (poly_uint64 full_nelts,
 194                                                unsigned int npatterns,
 195                                                unsigned int nelts_per_pattern)
 196 {
 197   m_full_nelts = full_nelts;
 198   m_npatterns = npatterns;
 199   m_nelts_per_pattern = nelts_per_pattern;
 200   this->reserve (encoded_nelts ());
 201   this->truncate (0);
 202 }
 203
 204 /* Return true if this vector and OTHER have the same elements and
 205    are encoded in the same way.  */
 206
 207 template<typename T, typename Shape, typename Derived>
 208 bool
 209 vector_builder<T, Shape, Derived>::operator == (const Derived &other) const
 210 {
 211   if (maybe_ne (m_full_nelts, other.m_full_nelts)
 212       || m_npatterns != other.m_npatterns
 213       || m_nelts_per_pattern != other.m_nelts_per_pattern)
 214     return false;
 215
 216   unsigned int nelts = encoded_nelts ();
 217   for (unsigned int i = 0; i < nelts; ++i)
 218     if (!derived ()->equal_p ((*this)[i], other[i]))
 219       return false;
 220
 221   return true;
 222 }
 223
 224 /* Return the value of vector element I, which might or might not be
 225    encoded explicitly.  */
 226
 227 template<typename T, typename Shape, typename Derived>
 228 T
 229 vector_builder<T, Shape, Derived>::elt (unsigned int i) const
 230 {
 231   /* First handle elements that are already present in the underlying
 232      vector, regardless of whether they're part of the encoding or not.  */
 233   if (i < this->length ())
 234     return (*this)[i];
 235
 236   /* Extrapolation is only possible if the encoding has been fully
 237      populated.  */
 238   gcc_checking_assert (encoded_nelts () <= this->length ());
 239
 240   /* Identify the pattern that contains element I and work out the index of
 241      the last encoded element for that pattern.  */
 242   unsigned int pattern = i % m_npatterns;
 243   unsigned int count = i / m_npatterns;
 244   unsigned int final_i = encoded_nelts () - m_npatterns + pattern;
 245   T final = (*this)[final_i];
 246
 247   /* If there are no steps, the final encoded value is the right one.  */
 248   if (m_nelts_per_pattern <= 2)
 249     return final;
 250
 251   /* Otherwise work out the value from the last two encoded elements.  */
 252   T prev = (*this)[final_i - m_npatterns];
 253   return derived ()->apply_step (final, count - 2,
 254                                  derived ()->step (prev, final));
 255 }
 256
 257 /* Try to start building a new vector of shape SHAPE that holds the result of
 258    a unary operation on vector constant VEC.  ALLOW_STEPPED_P is true if the
 259    operation can handle stepped encodings directly, without having to expand
 260    the full sequence.
 261
 262    Return true if the operation is possible, which it always is when
 263    ALLOW_STEPPED_P is true.  Leave the builder unchanged otherwise.  */
 264
 265 template<typename T, typename Shape, typename Derived>
 266 bool
 267 vector_builder<T, Shape, Derived>::new_unary_operation (Shape shape, T vec,
 268                                                         bool allow_stepped_p)
 269 {
 270   poly_uint64 full_nelts = Derived::shape_nelts (shape);
 271   gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec)));
 272   unsigned int npatterns = Derived::npatterns_of (vec);
 273   unsigned int nelts_per_pattern = Derived::nelts_per_pattern_of (vec);
 274   if (!allow_stepped_p && nelts_per_pattern > 2)
 275     {
 276       if (!full_nelts.is_constant ())
 277         return false;
 278       npatterns = full_nelts.to_constant ();
 279       nelts_per_pattern = 1;
 280     }
 281   derived ()->new_vector (shape, npatterns, nelts_per_pattern);
 282   return true;
 283 }
 284
 285 /* Try to start building a new vector of shape SHAPE that holds the result of
 286    a binary operation on vector constants VEC1 and VEC2.  ALLOW_STEPPED_P is
 287    true if the operation can handle stepped encodings directly, without
 288    having to expand the full sequence.
 289
 290    Return true if the operation is possible.  Leave the builder unchanged
 291    otherwise.  */
 292
 293 template<typename T, typename Shape, typename Derived>
 294 bool
 295 vector_builder<T, Shape, Derived>::new_binary_operation (Shape shape,
 296                                                          T vec1, T vec2,
 297                                                          bool allow_stepped_p)
 298 {
 299   poly_uint64 full_nelts = Derived::shape_nelts (shape);
 300   gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec1))
 301               && known_eq (full_nelts, Derived::nelts_of (vec2)));
 302   /* Conceptually we split the patterns in VEC1 and VEC2 until we have
 303      an equal number for both.  Each split pattern requires the same
 304      number of elements per pattern as the original.  E.g. splitting:
 305
 306        { 1, 2, 3, ... }
 307
 308      into two gives:
 309
 310        { 1, 3, 5, ... }
 311        { 2, 4, 6, ... }
 312
 313      while splitting:
 314
 315        { 1, 0, ... }
 316
 317      into two gives:
 318
 319        { 1, 0, ... }
 320        { 0, 0, ... }.  */
 321   unsigned int npatterns
 322     = least_common_multiple (Derived::npatterns_of (vec1),
 323                              Derived::npatterns_of (vec2));
 324   unsigned int nelts_per_pattern
 325     = MAX (Derived::nelts_per_pattern_of (vec1),
 326            Derived::nelts_per_pattern_of (vec2));
 327   if (!allow_stepped_p && nelts_per_pattern > 2)
 328     {
 329       if (!full_nelts.is_constant ())
 330         return false;
 331       npatterns = full_nelts.to_constant ();
 332       nelts_per_pattern = 1;
 333     }
 334   derived ()->new_vector (shape, npatterns, nelts_per_pattern);
 335   return true;
 336 }
 337
 338 /* Return the number of elements that the caller needs to operate on in
 339    order to handle a binary operation on vector constants VEC1 and VEC2.
 340    This static function is used instead of new_binary_operation if the
 341    result of the operation is not a constant vector.  */
 342
 343 template<typename T, typename Shape, typename Derived>
 344 unsigned int
 345 vector_builder<T, Shape, Derived>::binary_encoded_nelts (T vec1, T vec2)
 346 {
 347   poly_uint64 nelts = Derived::nelts_of (vec1);
 348   gcc_assert (known_eq (nelts, Derived::nelts_of (vec2)));
 349   /* See new_binary_operation for details.  */
 350   unsigned int npatterns
 351     = least_common_multiple (Derived::npatterns_of (vec1),
 352                              Derived::npatterns_of (vec2));
 353   unsigned int nelts_per_pattern
 354     = MAX (Derived::nelts_per_pattern_of (vec1),
 355            Derived::nelts_per_pattern_of (vec2));
 356   unsigned HOST_WIDE_INT const_nelts;
 357   if (nelts.is_constant (&const_nelts))
 358     return MIN (npatterns * nelts_per_pattern, const_nelts);
 359   return npatterns * nelts_per_pattern;
 360 }
 361
 362 /* Return the number of leading duplicate elements in the range
 363    [START:END:STEP].  The value is always at least 1.  */
 364
 365 template<typename T, typename Shape, typename Derived>
 366 unsigned int
 367 vector_builder<T, Shape, Derived>::count_dups (int start, int end,
 368                                                int step) const
 369 {
 370   gcc_assert ((end - start) % step == 0);
 371
 372   unsigned int ndups = 1;
 373   for (int i = start + step;
 374        i != end && derived ()->equal_p (elt (i), elt (start));
 375        i += step)
 376     ndups++;
 377   return ndups;
 378 }
 379
 380 /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each,
 381    but without changing the underlying vector.  */
 382
 383 template<typename T, typename Shape, typename Derived>
 384 void
 385 vector_builder<T, Shape, Derived>::reshape (unsigned int npatterns,
 386                                             unsigned int nelts_per_pattern)
 387 {
 388   unsigned int old_encoded_nelts = encoded_nelts ();
 389   unsigned int new_encoded_nelts = npatterns * nelts_per_pattern;
 390   gcc_checking_assert (new_encoded_nelts <= old_encoded_nelts);
 391   unsigned int next = new_encoded_nelts - npatterns;
 392   for (unsigned int i = new_encoded_nelts; i < old_encoded_nelts; ++i)
 393     {
 394       derived ()->note_representative (&(*this)[next], (*this)[i]);
 395       next += 1;
 396       if (next == new_encoded_nelts)
 397         next -= npatterns;
 398     }
 399   m_npatterns = npatterns;
 400   m_nelts_per_pattern = nelts_per_pattern;
 401 }
 402
 403 /* Return true if elements [START, END) contain a repeating sequence of
 404    STEP elements.  */
 405
 406 template<typename T, typename Shape, typename Derived>
 407 bool
 408 vector_builder<T, Shape, Derived>::repeating_sequence_p (unsigned int start,
 409                                                          unsigned int end,
 410                                                          unsigned int step)
 411 {
 412   for (unsigned int i = start; i < end - step; ++i)
 413     if (!derived ()->equal_p ((*this)[i], (*this)[i + step]))
 414       return false;
 415   return true;
 416 }
 417
 418 /* Return true if elements [START, END) contain STEP interleaved linear
 419    series.  */
 420
 421 template<typename T, typename Shape, typename Derived>
 422 bool
 423 vector_builder<T, Shape, Derived>::stepped_sequence_p (unsigned int start,
 424                                                        unsigned int end,
 425                                                        unsigned int step)
 426 {
 427   if (!derived ()->allow_steps_p ())
 428     return false;
 429
 430   for (unsigned int i = start + step * 2; i < end; ++i)
 431     {
 432       T elt1 = (*this)[i - step * 2];
 433       T elt2 = (*this)[i - step];
 434       T elt3 = (*this)[i];
 435
 436       if (!derived ()->integral_p (elt1)
 437           || !derived ()->integral_p (elt2)
 438           || !derived ()->integral_p (elt3))
 439         return false;
 440
 441       if (maybe_ne (derived ()->step (elt1, elt2),
 442                     derived ()->step (elt2, elt3)))
 443         return false;
 444
 445       if (!derived ()->can_elide_p (elt3))
 446         return false;
 447     }
 448   return true;
 449 }
 450
 451 /* Try to change the number of encoded patterns to NPATTERNS, returning
 452    true on success.  */
 453
 454 template<typename T, typename Shape, typename Derived>
 455 bool
 456 vector_builder<T, Shape, Derived>::try_npatterns (unsigned int npatterns)
 457 {
 458   if (m_nelts_per_pattern == 1)
 459     {
 460       /* See whether NPATTERNS is valid with the current 1-element-per-pattern
 461          encoding.  */
 462       if (repeating_sequence_p (0, encoded_nelts (), npatterns))
 463         {
 464           reshape (npatterns, 1);
 465           return true;
 466         }
 467
 468       /* We can only increase the number of elements per pattern if all
 469          elements are still encoded explicitly.  */
 470       if (!encoded_full_vector_p ())
 471         return false;
 472     }
 473
 474   if (m_nelts_per_pattern <= 2)
 475     {
 476       /* See whether NPATTERNS is valid with a 2-element-per-pattern
 477          encoding.  */
 478       if (repeating_sequence_p (npatterns, encoded_nelts (), npatterns))
 479         {
 480           reshape (npatterns, 2);
 481           return true;
 482         }
 483
 484       /* We can only increase the number of elements per pattern if all
 485          elements are still encoded explicitly.  */
 486       if (!encoded_full_vector_p ())
 487         return false;
 488     }
 489
 490   if (m_nelts_per_pattern <= 3)
 491     {
 492       /* See whether we have NPATTERNS interleaved linear series,
 493          giving a 3-element-per-pattern encoding.  */
 494       if (stepped_sequence_p (npatterns, encoded_nelts (), npatterns))
 495         {
 496           reshape (npatterns, 3);
 497           return true;
 498         }
 499       return false;
 500     }
 501
 502   gcc_unreachable ();
 503 }
 504
 505 /* Replace the current encoding with the canonical form.  */
 506
 507 template<typename T, typename Shape, typename Derived>
 508 void
 509 vector_builder<T, Shape, Derived>::finalize ()
 510 {
 511   /* The encoding requires the same number of elements to come from each
 512      pattern.  */
 513   gcc_assert (multiple_p (m_full_nelts, m_npatterns));
 514
 515   /* Allow the caller to build more elements than necessary.  For example,
 516      it's often convenient to build a stepped vector from the natural
 517      encoding of three elements even if the vector itself only has two.  */
 518   unsigned HOST_WIDE_INT const_full_nelts;
 519   if (m_full_nelts.is_constant (&const_full_nelts)
 520       && const_full_nelts <= encoded_nelts ())
 521     {
 522       m_npatterns = const_full_nelts;
 523       m_nelts_per_pattern = 1;
 524     }
 525
 526   /* Try to whittle down the number of elements per pattern.  That is:
 527
 528      1. If we have stepped patterns whose steps are all 0, reduce the
 529         number of elements per pattern from 3 to 2.
 530
 531      2. If we have background fill values that are the same as the
 532         foreground values, reduce the number of elements per pattern
 533         from 2 to 1.  */
 534   while (m_nelts_per_pattern > 1
 535          && repeating_sequence_p (encoded_nelts () - m_npatterns * 2,
 536                                   encoded_nelts (), m_npatterns))
 537     /* The last two sequences of M_NPATTERNS elements are equal,
 538        so remove the last one.  */
 539     reshape (m_npatterns, m_nelts_per_pattern - 1);
 540
 541   if (pow2p_hwi (m_npatterns))
 542     {
 543       /* Try to halve the number of patterns while doing so gives a
 544          valid pattern.  This approach is linear in the number of
 545          elements, whereas searcing from 1 up would be O(n*log(n)).
 546
 547          Each halving step tries to keep the number of elements per pattern
 548          the same.  If that isn't possible, and if all elements are still
 549          explicitly encoded, the halving step can instead increase the number
 550          of elements per pattern.
 551
 552          E.g. for:
 553
 554              { 0, 2, 3, 4, 5, 6, 7, 8 }  npatterns == 8  full_nelts == 8
 555
 556          we first realize that the second half of the sequence is not
 557          equal to the first, so we cannot maintain 1 element per pattern
 558          for npatterns == 4.  Instead we halve the number of patterns
 559          and double the number of elements per pattern, treating this
 560          as a "foreground" { 0, 2, 3, 4 } against a "background" of
 561          { 5, 6, 7, 8 | 5, 6, 7, 8 ... }:
 562
 563              { 0, 2, 3, 4 | 5, 6, 7, 8 }  npatterns == 4
 564
 565          Next we realize that this is *not* a foreround of { 0, 2 }
 566          against a background of { 3, 4 | 3, 4 ... }, so the only
 567          remaining option for reducing the number of patterns is
 568          to use a foreground of { 0, 2 } against a stepped background
 569          of { 1, 2 | 3, 4 | 5, 6 ... }.  This is valid because we still
 570          haven't elided any elements:
 571
 572              { 0, 2 | 3, 4 | 5, 6 }  npatterns == 2
 573
 574          This in turn can be reduced to a foreground of { 0 } against a
 575          stepped background of { 1 | 2 | 3 ... }:
 576
 577              { 0 | 2 | 3 }  npatterns == 1
 578
 579          This last step would not have been possible for:
 580
 581              { 0, 0 | 3, 4 | 5, 6 }  npatterns == 2.  */
 582       while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2))
 583         continue;
 584
 585       /* Builders of arbitrary fixed-length vectors can use:
 586
 587              new_vector (x, x, 1)
 588
 589          so that every element is specified explicitly.  Handle cases
 590          that are actually wrapping series, like { 0, 1, 2, 3, 0, 1, 2, 3 }
 591          would be for 2-bit elements.  We'll have treated them as
 592          duplicates in the loop above.  */
 593       if (m_nelts_per_pattern == 1
 594           && m_full_nelts.is_constant (&const_full_nelts)
 595           && this->length () >= const_full_nelts
 596           && (m_npatterns & 3) == 0
 597           && stepped_sequence_p (m_npatterns / 4, const_full_nelts,
 598                                  m_npatterns / 4))
 599         {
 600           reshape (m_npatterns / 4, 3);
 601           while ((m_npatterns & 1) == 0 && try_npatterns (m_npatterns / 2))
 602             continue;
 603         }
 604     }
 605   else
 606     /* For the non-power-of-2 case, do a simple search up from 1.  */
 607     for (unsigned int i = 1; i <= m_npatterns / 2; ++i)
 608       if (m_npatterns % i == 0 && try_npatterns (i))
 609         break;
 610 }
 611
 612 #endif