Daily bump.
[official-gcc.git] / gcc / vec-perm-indices.c
blob31b32ea05896c4228457819f903de28943f98dc8
1 /* A representation of vector permutation indices.
2 Copyright (C) 2017-2021 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "vec-perm-indices.h"
24 #include "tree.h"
25 #include "fold-const.h"
26 #include "tree-vector-builder.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "memmodel.h"
30 #include "emit-rtl.h"
31 #include "selftest.h"
32 #include "rtx-vector-builder.h"
34 /* Switch to a new permutation vector that selects between NINPUTS vector
35 inputs that have NELTS_PER_INPUT elements each. Take the elements of the
36 new permutation vector from ELEMENTS, clamping each one to be in range. */
38 void
39 vec_perm_indices::new_vector (const vec_perm_builder &elements,
40 unsigned int ninputs,
41 poly_uint64 nelts_per_input)
43 m_ninputs = ninputs;
44 m_nelts_per_input = nelts_per_input;
45 /* If the vector has a constant number of elements, expand the
46 encoding and clamp each element. E.g. { 0, 2, 4, ... } might
47 wrap halfway if there is only one vector input, and we want
48 the wrapped form to be the canonical one.
50 If the vector has a variable number of elements, just copy
51 the encoding. In that case the unwrapped form is canonical
52 and there is no way of representing the wrapped form. */
53 poly_uint64 full_nelts = elements.full_nelts ();
54 unsigned HOST_WIDE_INT copy_nelts;
55 if (full_nelts.is_constant (&copy_nelts))
56 m_encoding.new_vector (full_nelts, copy_nelts, 1);
57 else
59 copy_nelts = elements.encoded_nelts ();
60 m_encoding.new_vector (full_nelts, elements.npatterns (),
61 elements.nelts_per_pattern ());
63 unsigned int npatterns = m_encoding.npatterns ();
64 for (unsigned int i = 0; i < npatterns; ++i)
65 m_encoding.quick_push (clamp (elements.elt (i)));
66 /* Use the fact that:
68 (a + b) % c == ((a % c) + (b % c)) % c
70 to simplify the clamping of variable-length vectors. */
71 for (unsigned int i = npatterns; i < copy_nelts; ++i)
73 element_type step = clamp (elements.elt (i)
74 - elements.elt (i - npatterns));
75 m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
77 m_encoding.finalize ();
80 /* Switch to a new permutation vector that selects the same input elements
81 as ORIG, but with each element split into FACTOR pieces. For example,
82 if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
83 { 2, 3, 4, 5, 0, 1, 6, 7 }. */
85 void
86 vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
87 unsigned int factor)
89 m_ninputs = orig.m_ninputs;
90 m_nelts_per_input = orig.m_nelts_per_input * factor;
91 m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
92 orig.m_encoding.npatterns () * factor,
93 orig.m_encoding.nelts_per_pattern ());
94 unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
95 for (unsigned int i = 0; i < encoded_nelts; ++i)
97 element_type base = orig.m_encoding[i] * factor;
98 for (unsigned int j = 0; j < factor; ++j)
99 m_encoding.quick_push (base + j);
101 m_encoding.finalize ();
104 /* Check whether we can switch to a new permutation vector that
105 selects the same input elements as ORIG, but with each element
106 built up from FACTOR pieces. Return true if yes, otherwise
107 return false. Every FACTOR permutation indexes should be
108 continuous separately and the first one of each batch should
109 be able to exactly modulo FACTOR. For example, if ORIG is
110 { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation
111 is { 1, 2, 0, 3 }. */
113 bool
114 vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig,
115 unsigned int factor)
117 gcc_assert (factor > 0);
119 if (maybe_lt (orig.m_nelts_per_input, factor))
120 return false;
122 poly_uint64 nelts;
123 /* Invalid if vector units number isn't multiple of factor. */
124 if (!multiple_p (orig.m_nelts_per_input, factor, &nelts))
125 return false;
127 /* Only handle the case that npatterns is multiple of factor.
128 FIXME: Try to see whether we can reshape it by factor npatterns. */
129 if (orig.m_encoding.npatterns () % factor != 0)
130 return false;
132 unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
133 auto_vec<element_type, 32> encoding (encoded_nelts);
134 /* Separate all encoded elements into batches by size factor,
135 then ensure the first element of each batch is multiple of
136 factor and all elements in each batch is consecutive from
137 the first one. */
138 for (unsigned int i = 0; i < encoded_nelts; i += factor)
140 element_type first = orig.m_encoding[i];
141 element_type new_index;
142 if (!multiple_p (first, factor, &new_index))
143 return false;
144 for (unsigned int j = 1; j < factor; ++j)
145 if (maybe_ne (first + j, orig.m_encoding[i + j]))
146 return false;
147 encoding.quick_push (new_index);
150 m_ninputs = orig.m_ninputs;
151 m_nelts_per_input = nelts;
152 poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor);
153 unsigned int npatterns = orig.m_encoding.npatterns () / factor;
155 m_encoding.new_vector (full_nelts, npatterns,
156 orig.m_encoding.nelts_per_pattern ());
157 m_encoding.splice (encoding);
158 m_encoding.finalize ();
160 return true;
163 /* Rotate the inputs of the permutation right by DELTA inputs. This changes
164 the values of the permutation vector but it doesn't change the way that
165 the elements are encoded. */
167 void
168 vec_perm_indices::rotate_inputs (int delta)
170 element_type element_delta = delta * m_nelts_per_input;
171 for (unsigned int i = 0; i < m_encoding.length (); ++i)
172 m_encoding[i] = clamp (m_encoding[i] + element_delta);
175 /* Return true if index OUT_BASE + I * OUT_STEP selects input
176 element IN_BASE + I * IN_STEP. For example, the call to test
177 whether a permute reverses a vector of N elements would be:
179 series_p (0, 1, N - 1, -1)
181 which would return true for { N - 1, N - 2, N - 3, ... }.
182 The calls to test for an interleaving of elements starting
183 at N1 and N2 would be:
185 series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
187 which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. */
189 bool
190 vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
191 element_type in_base, element_type in_step) const
193 /* Check the base value. */
194 if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
195 return false;
197 element_type full_nelts = m_encoding.full_nelts ();
198 unsigned int npatterns = m_encoding.npatterns ();
200 /* Calculate which multiple of OUT_STEP elements we need to get
201 back to the same pattern. */
202 unsigned int cycle_length = least_common_multiple (out_step, npatterns);
204 /* Check the steps. */
205 in_step = clamp (in_step);
206 out_base += out_step;
207 unsigned int limit = 0;
208 for (;;)
210 /* Succeed if we've checked all the elements in the vector. */
211 if (known_ge (out_base, full_nelts))
212 return true;
214 if (out_base >= npatterns)
216 /* We've got to the end of the "foreground" values. Check
217 2 elements from each pattern in the "background" values. */
218 if (limit == 0)
219 limit = out_base + cycle_length * 2;
220 else if (out_base >= limit)
221 return true;
224 element_type v0 = m_encoding.elt (out_base - out_step);
225 element_type v1 = m_encoding.elt (out_base);
226 if (maybe_ne (clamp (v1 - v0), in_step))
227 return false;
229 out_base += out_step;
231 return true;
234 /* Return true if all elements of the permutation vector are in the range
235 [START, START + SIZE). */
237 bool
238 vec_perm_indices::all_in_range_p (element_type start, element_type size) const
240 /* Check the first two elements of each pattern. */
241 unsigned int npatterns = m_encoding.npatterns ();
242 unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
243 unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
244 for (unsigned int i = 0; i < base_nelts; ++i)
245 if (!known_in_range_p (m_encoding[i], start, size))
246 return false;
248 /* For stepped encodings, check the full range of the series. */
249 if (nelts_per_pattern == 3)
251 element_type limit = input_nelts ();
253 /* The number of elements in each pattern beyond the first two
254 that we checked above. */
255 poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
256 npatterns) - 2;
257 for (unsigned int i = 0; i < npatterns; ++i)
259 /* BASE1 has been checked but BASE2 hasn't. */
260 element_type base1 = m_encoding[i + npatterns];
261 element_type base2 = m_encoding[i + base_nelts];
263 /* The step to add to get from BASE1 to each subsequent value. */
264 element_type step = clamp (base2 - base1);
266 /* STEP has no inherent sign, so a value near LIMIT can
267 act as a negative step. The series is in range if it
268 is in range according to one of the two interpretations.
270 Since we're dealing with clamped values, ELEMENT_TYPE is
271 wide enough for overflow not to be a problem. */
272 element_type headroom_down = base1 - start;
273 element_type headroom_up = size - headroom_down - 1;
274 HOST_WIDE_INT diff;
275 if ((!step.is_constant (&diff)
276 || maybe_lt (headroom_up, diff * step_nelts))
277 && (!(limit - step).is_constant (&diff)
278 || maybe_lt (headroom_down, diff * step_nelts)))
279 return false;
282 return true;
285 /* Try to read the contents of VECTOR_CST CST as a constant permutation
286 vector. Return true and add the elements to BUILDER on success,
287 otherwise return false without modifying BUILDER. */
289 bool
290 tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
292 unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
293 for (unsigned int i = 0; i < encoded_nelts; ++i)
294 if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
295 return false;
297 builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
298 VECTOR_CST_NPATTERNS (cst),
299 VECTOR_CST_NELTS_PER_PATTERN (cst));
300 for (unsigned int i = 0; i < encoded_nelts; ++i)
301 builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
302 return true;
305 /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */
307 tree
308 vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
310 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
311 tree_vector_builder sel (type, indices.encoding ().npatterns (),
312 indices.encoding ().nelts_per_pattern ());
313 unsigned int encoded_nelts = sel.encoded_nelts ();
314 for (unsigned int i = 0; i < encoded_nelts; i++)
315 sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
316 return sel.build ();
319 /* Return a CONST_VECTOR of mode MODE that contains the elements of
320 INDICES. */
323 vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
325 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
326 && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
327 rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
328 indices.encoding ().nelts_per_pattern ());
329 unsigned int encoded_nelts = sel.encoded_nelts ();
330 for (unsigned int i = 0; i < encoded_nelts; i++)
331 sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
332 return sel.build ();
335 #if CHECKING_P
337 namespace selftest {
339 /* Test a 12-element vector. */
341 static void
342 test_vec_perm_12 (void)
344 vec_perm_builder builder (12, 12, 1);
345 for (unsigned int i = 0; i < 4; ++i)
347 builder.quick_push (i * 5);
348 builder.quick_push (3 + i);
349 builder.quick_push (2 + 3 * i);
351 vec_perm_indices indices (builder, 1, 12);
352 ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
353 ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
354 ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
355 ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
356 ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
358 ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
359 ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
361 ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
362 ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
364 ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
365 ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
367 ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
368 ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
369 ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
372 /* Run selftests for this file. */
374 void
375 vec_perm_indices_c_tests ()
377 test_vec_perm_12 ();
380 } // namespace selftest
382 #endif