gcc/vec-perm-indices.cc

   1 /* A representation of vector permutation indices.
   2    Copyright (C) 2017-2024 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "vec-perm-indices.h"
  24 #include "tree.h"
  25 #include "fold-const.h"
  26 #include "tree-vector-builder.h"
  27 #include "backend.h"
  28 #include "rtl.h"
  29 #include "memmodel.h"
  30 #include "emit-rtl.h"
  31 #include "selftest.h"
  32 #include "rtx-vector-builder.h"
  33
  34 /* Switch to a new permutation vector that selects between NINPUTS vector
  35    inputs that have NELTS_PER_INPUT elements each.  Take the elements of the
  36    new permutation vector from ELEMENTS, clamping each one to be in range.  */
  37
  38 void
  39 vec_perm_indices::new_vector (const vec_perm_builder &elements,
  40                               unsigned int ninputs,
  41                               poly_uint64 nelts_per_input)
  42 {
  43   m_ninputs = ninputs;
  44   m_nelts_per_input = nelts_per_input;
  45   /* If the vector has a constant number of elements, expand the
  46      encoding and clamp each element.  E.g. { 0, 2, 4, ... } might
  47      wrap halfway if there is only one vector input, and we want
  48      the wrapped form to be the canonical one.
  49
  50      If the vector has a variable number of elements, just copy
  51      the encoding.  In that case the unwrapped form is canonical
  52      and there is no way of representing the wrapped form.  */
  53   poly_uint64 full_nelts = elements.full_nelts ();
  54   unsigned HOST_WIDE_INT copy_nelts;
  55   if (full_nelts.is_constant (&copy_nelts))
  56     m_encoding.new_vector (full_nelts, copy_nelts, 1);
  57   else
  58     {
  59       copy_nelts = elements.encoded_nelts ();
  60       m_encoding.new_vector (full_nelts, elements.npatterns (),
  61                              elements.nelts_per_pattern ());
  62     }
  63   unsigned int npatterns = m_encoding.npatterns ();
  64   for (unsigned int i = 0; i < npatterns; ++i)
  65     m_encoding.quick_push (clamp (elements.elt (i)));
  66   /* Use the fact that:
  67
  68         (a + b) % c == ((a % c) + (b % c)) % c
  69
  70      to simplify the clamping of variable-length vectors.  */
  71   for (unsigned int i = npatterns; i < copy_nelts; ++i)
  72     {
  73       element_type step = clamp (elements.elt (i)
  74                                  - elements.elt (i - npatterns));
  75       m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
  76     }
  77   m_encoding.finalize ();
  78 }
  79
  80 /* Switch to a new permutation vector that selects the same input elements
  81    as ORIG, but with each element split into FACTOR pieces.  For example,
  82    if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
  83    { 2, 3, 4, 5, 0, 1, 6, 7 }.  */
  84
  85 void
  86 vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
  87                                        unsigned int factor)
  88 {
  89   m_ninputs = orig.m_ninputs;
  90   m_nelts_per_input = orig.m_nelts_per_input * factor;
  91   m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
  92                          orig.m_encoding.npatterns () * factor,
  93                          orig.m_encoding.nelts_per_pattern ());
  94   unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
  95   for (unsigned int i = 0; i < encoded_nelts; ++i)
  96     {
  97       element_type base = orig.m_encoding[i] * factor;
  98       for (unsigned int j = 0; j < factor; ++j)
  99         m_encoding.quick_push (base + j);
 100     }
 101   m_encoding.finalize ();
 102 }
 103
 104 /* Check whether we can switch to a new permutation vector that
 105    selects the same input elements as ORIG, but with each element
 106    built up from FACTOR pieces.  Return true if yes, otherwise
 107    return false.  Every FACTOR permutation indexes should be
 108    continuous separately and the first one of each batch should
 109    be able to exactly modulo FACTOR.  For example, if ORIG is
 110    { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation
 111    is { 1, 2, 0, 3 }.  */
 112
 113 bool
 114 vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig,
 115                                      unsigned int factor)
 116 {
 117   gcc_assert (factor > 0);
 118
 119   if (maybe_lt (orig.m_nelts_per_input, factor))
 120     return false;
 121
 122   poly_uint64 nelts;
 123   /* Invalid if vector units number isn't multiple of factor.  */
 124   if (!multiple_p (orig.m_nelts_per_input, factor, &nelts))
 125     return false;
 126
 127   /* Only handle the case that npatterns is multiple of factor.
 128      FIXME: Try to see whether we can reshape it by factor npatterns.  */
 129   if (orig.m_encoding.npatterns () % factor != 0)
 130     return false;
 131
 132   unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
 133   auto_vec<element_type, 32> encoding (encoded_nelts);
 134   /* Separate all encoded elements into batches by size factor,
 135      then ensure the first element of each batch is multiple of
 136      factor and all elements in each batch is consecutive from
 137      the first one.  */
 138   for (unsigned int i = 0; i < encoded_nelts; i += factor)
 139     {
 140       element_type first = orig.m_encoding[i];
 141       element_type new_index;
 142       if (!multiple_p (first, factor, &new_index))
 143         return false;
 144       for (unsigned int j = 1; j < factor; ++j)
 145         if (maybe_ne (first + j, orig.m_encoding[i + j]))
 146           return false;
 147       encoding.quick_push (new_index);
 148     }
 149
 150   m_ninputs = orig.m_ninputs;
 151   m_nelts_per_input = nelts;
 152   poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor);
 153   unsigned int npatterns = orig.m_encoding.npatterns () / factor;
 154
 155   m_encoding.new_vector (full_nelts, npatterns,
 156                          orig.m_encoding.nelts_per_pattern ());
 157   m_encoding.splice (encoding);
 158   m_encoding.finalize ();
 159
 160   return true;
 161 }
 162
 163 /* Rotate the inputs of the permutation right by DELTA inputs.  This changes
 164    the values of the permutation vector but it doesn't change the way that
 165    the elements are encoded.  */
 166
 167 void
 168 vec_perm_indices::rotate_inputs (int delta)
 169 {
 170   element_type element_delta = delta * m_nelts_per_input;
 171   for (unsigned int i = 0; i < m_encoding.length (); ++i)
 172     m_encoding[i] = clamp (m_encoding[i] + element_delta);
 173 }
 174
 175 /* Return true if index OUT_BASE + I * OUT_STEP selects input
 176    element IN_BASE + I * IN_STEP.  For example, the call to test
 177    whether a permute reverses a vector of N elements would be:
 178
 179      series_p (0, 1, N - 1, -1)
 180
 181    which would return true for { N - 1, N - 2, N - 3, ... }.
 182    The calls to test for an interleaving of elements starting
 183    at N1 and N2 would be:
 184
 185      series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
 186
 187    which would return true for { N1, N2, N1 + 1, N2 + 1, ... }.  */
 188
 189 bool
 190 vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
 191                             element_type in_base, element_type in_step) const
 192 {
 193   /* Check the base value.  */
 194   if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
 195     return false;
 196
 197   element_type full_nelts = m_encoding.full_nelts ();
 198   unsigned int npatterns = m_encoding.npatterns ();
 199
 200   /* Calculate which multiple of OUT_STEP elements we need to get
 201      back to the same pattern.  */
 202   unsigned int cycle_length = least_common_multiple (out_step, npatterns);
 203
 204   /* Check the steps.  */
 205   in_step = clamp (in_step);
 206   out_base += out_step;
 207   unsigned int limit = 0;
 208   for (;;)
 209     {
 210       /* Succeed if we've checked all the elements in the vector.  */
 211       if (known_ge (out_base, full_nelts))
 212         return true;
 213
 214       if (out_base >= npatterns)
 215         {
 216           /* We've got to the end of the "foreground" values.  Check
 217              2 elements from each pattern in the "background" values.  */
 218           if (limit == 0)
 219             limit = out_base + cycle_length * 2;
 220           else if (out_base >= limit)
 221             return true;
 222         }
 223
 224       element_type v0 = m_encoding.elt (out_base - out_step);
 225       element_type v1 = m_encoding.elt (out_base);
 226       if (maybe_ne (clamp (v1 - v0), in_step))
 227         return false;
 228
 229       out_base += out_step;
 230     }
 231 }
 232
 233 /* Return true if all elements of the permutation vector are in the range
 234    [START, START + SIZE).  */
 235
 236 bool
 237 vec_perm_indices::all_in_range_p (element_type start, element_type size) const
 238 {
 239   /* Check the first two elements of each pattern.  */
 240   unsigned int npatterns = m_encoding.npatterns ();
 241   unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
 242   unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
 243   for (unsigned int i = 0; i < base_nelts; ++i)
 244     if (!known_in_range_p (m_encoding[i], start, size))
 245       return false;
 246
 247   /* For stepped encodings, check the full range of the series.  */
 248   if (nelts_per_pattern == 3)
 249     {
 250       element_type limit = input_nelts ();
 251
 252       /* The number of elements in each pattern beyond the first two
 253          that we checked above.  */
 254       poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
 255                                          npatterns) - 2;
 256       for (unsigned int i = 0; i < npatterns; ++i)
 257         {
 258           /* BASE1 has been checked but BASE2 hasn't.   */
 259           element_type base1 = m_encoding[i + npatterns];
 260           element_type base2 = m_encoding[i + base_nelts];
 261
 262           /* The step to add to get from BASE1 to each subsequent value.  */
 263           element_type step = clamp (base2 - base1);
 264
 265           /* STEP has no inherent sign, so a value near LIMIT can
 266              act as a negative step.  The series is in range if it
 267              is in range according to one of the two interpretations.
 268
 269              Since we're dealing with clamped values, ELEMENT_TYPE is
 270              wide enough for overflow not to be a problem.  */
 271           element_type headroom_down = base1 - start;
 272           element_type headroom_up = size - headroom_down - 1;
 273           HOST_WIDE_INT diff;
 274           if ((!step.is_constant (&diff)
 275                || maybe_lt (headroom_up, diff * step_nelts))
 276               && (!(limit - step).is_constant (&diff)
 277                   || maybe_lt (headroom_down, diff * step_nelts)))
 278             return false;
 279         }
 280     }
 281   return true;
 282 }
 283
 284 /* Try to read the contents of VECTOR_CST CST as a constant permutation
 285    vector.  Return true and add the elements to BUILDER on success,
 286    otherwise return false without modifying BUILDER.  */
 287
 288 bool
 289 tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
 290 {
 291   unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
 292   for (unsigned int i = 0; i < encoded_nelts; ++i)
 293     if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
 294       return false;
 295
 296   builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
 297                        VECTOR_CST_NPATTERNS (cst),
 298                        VECTOR_CST_NELTS_PER_PATTERN (cst));
 299   for (unsigned int i = 0; i < encoded_nelts; ++i)
 300     builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
 301   return true;
 302 }
 303
 304 /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES.  */
 305
 306 tree
 307 vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
 308 {
 309   gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
 310   tree_vector_builder sel (type, indices.encoding ().npatterns (),
 311                            indices.encoding ().nelts_per_pattern ());
 312   unsigned int encoded_nelts = sel.encoded_nelts ();
 313   for (unsigned int i = 0; i < encoded_nelts; i++)
 314     sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
 315   return sel.build ();
 316 }
 317
 318 /* Return a CONST_VECTOR of mode MODE that contains the elements of
 319    INDICES.  */
 320
 321 rtx
 322 vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
 323 {
 324   gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
 325               && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
 326   rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
 327                           indices.encoding ().nelts_per_pattern ());
 328   unsigned int encoded_nelts = sel.encoded_nelts ();
 329   for (unsigned int i = 0; i < encoded_nelts; i++)
 330     sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
 331   return sel.build ();
 332 }
 333
 334 #if CHECKING_P
 335
 336 namespace selftest {
 337
 338 /* Test a 12-element vector.  */
 339
 340 static void
 341 test_vec_perm_12 (void)
 342 {
 343   vec_perm_builder builder (12, 12, 1);
 344   for (unsigned int i = 0; i < 4; ++i)
 345     {
 346       builder.quick_push (i * 5);
 347       builder.quick_push (3 + i);
 348       builder.quick_push (2 + 3 * i);
 349     }
 350   vec_perm_indices indices (builder, 1, 12);
 351   ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
 352   ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
 353   ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
 354   ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
 355   ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
 356
 357   ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
 358   ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
 359
 360   ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
 361   ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
 362
 363   ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
 364   ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
 365
 366   ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
 367   ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
 368   ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
 369 }
 370
 371 /* Run selftests for this file.  */
 372
 373 void
 374 vec_perm_indices_cc_tests ()
 375 {
 376   test_vec_perm_12 ();
 377 }
 378
 379 } // namespace selftest
 380
 381 #endif