Objective-C, NeXT, v2: Correct a regression in code-gen.
[official-gcc.git] / gcc / vec-perm-indices.cc
blob9543970f389a7e8c789407f420370a66e90c822b
1 /* A representation of vector permutation indices.
2 Copyright (C) 2017-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "vec-perm-indices.h"
24 #include "tree.h"
25 #include "fold-const.h"
26 #include "tree-vector-builder.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "memmodel.h"
30 #include "emit-rtl.h"
31 #include "selftest.h"
32 #include "rtx-vector-builder.h"
34 /* Switch to a new permutation vector that selects between NINPUTS vector
35 inputs that have NELTS_PER_INPUT elements each. Take the elements of the
36 new permutation vector from ELEMENTS, clamping each one to be in range. */
38 void
39 vec_perm_indices::new_vector (const vec_perm_builder &elements,
40 unsigned int ninputs,
41 poly_uint64 nelts_per_input)
43 m_ninputs = ninputs;
44 m_nelts_per_input = nelts_per_input;
45 /* If the vector has a constant number of elements, expand the
46 encoding and clamp each element. E.g. { 0, 2, 4, ... } might
47 wrap halfway if there is only one vector input, and we want
48 the wrapped form to be the canonical one.
50 If the vector has a variable number of elements, just copy
51 the encoding. In that case the unwrapped form is canonical
52 and there is no way of representing the wrapped form. */
53 poly_uint64 full_nelts = elements.full_nelts ();
54 unsigned HOST_WIDE_INT copy_nelts;
55 if (full_nelts.is_constant (&copy_nelts))
56 m_encoding.new_vector (full_nelts, copy_nelts, 1);
57 else
59 copy_nelts = elements.encoded_nelts ();
60 m_encoding.new_vector (full_nelts, elements.npatterns (),
61 elements.nelts_per_pattern ());
63 unsigned int npatterns = m_encoding.npatterns ();
64 for (unsigned int i = 0; i < npatterns; ++i)
65 m_encoding.quick_push (clamp (elements.elt (i)));
66 /* Use the fact that:
68 (a + b) % c == ((a % c) + (b % c)) % c
70 to simplify the clamping of variable-length vectors. */
71 for (unsigned int i = npatterns; i < copy_nelts; ++i)
73 element_type step = clamp (elements.elt (i)
74 - elements.elt (i - npatterns));
75 m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
77 m_encoding.finalize ();
80 /* Switch to a new permutation vector that selects the same input elements
81 as ORIG, but with each element split into FACTOR pieces. For example,
82 if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
83 { 2, 3, 4, 5, 0, 1, 6, 7 }. */
85 void
86 vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
87 unsigned int factor)
89 m_ninputs = orig.m_ninputs;
90 m_nelts_per_input = orig.m_nelts_per_input * factor;
91 m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
92 orig.m_encoding.npatterns () * factor,
93 orig.m_encoding.nelts_per_pattern ());
94 unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
95 for (unsigned int i = 0; i < encoded_nelts; ++i)
97 element_type base = orig.m_encoding[i] * factor;
98 for (unsigned int j = 0; j < factor; ++j)
99 m_encoding.quick_push (base + j);
101 m_encoding.finalize ();
104 /* Check whether we can switch to a new permutation vector that
105 selects the same input elements as ORIG, but with each element
106 built up from FACTOR pieces. Return true if yes, otherwise
107 return false. Every FACTOR permutation indexes should be
108 continuous separately and the first one of each batch should
109 be able to exactly modulo FACTOR. For example, if ORIG is
110 { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation
111 is { 1, 2, 0, 3 }. */
113 bool
114 vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig,
115 unsigned int factor)
117 gcc_assert (factor > 0);
119 if (maybe_lt (orig.m_nelts_per_input, factor))
120 return false;
122 poly_uint64 nelts;
123 /* Invalid if vector units number isn't multiple of factor. */
124 if (!multiple_p (orig.m_nelts_per_input, factor, &nelts))
125 return false;
127 /* Only handle the case that npatterns is multiple of factor.
128 FIXME: Try to see whether we can reshape it by factor npatterns. */
129 if (orig.m_encoding.npatterns () % factor != 0)
130 return false;
132 unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
133 auto_vec<element_type, 32> encoding (encoded_nelts);
134 /* Separate all encoded elements into batches by size factor,
135 then ensure the first element of each batch is multiple of
136 factor and all elements in each batch is consecutive from
137 the first one. */
138 for (unsigned int i = 0; i < encoded_nelts; i += factor)
140 element_type first = orig.m_encoding[i];
141 element_type new_index;
142 if (!multiple_p (first, factor, &new_index))
143 return false;
144 for (unsigned int j = 1; j < factor; ++j)
145 if (maybe_ne (first + j, orig.m_encoding[i + j]))
146 return false;
147 encoding.quick_push (new_index);
150 m_ninputs = orig.m_ninputs;
151 m_nelts_per_input = nelts;
152 poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor);
153 unsigned int npatterns = orig.m_encoding.npatterns () / factor;
155 m_encoding.new_vector (full_nelts, npatterns,
156 orig.m_encoding.nelts_per_pattern ());
157 m_encoding.splice (encoding);
158 m_encoding.finalize ();
160 return true;
163 /* Rotate the inputs of the permutation right by DELTA inputs. This changes
164 the values of the permutation vector but it doesn't change the way that
165 the elements are encoded. */
167 void
168 vec_perm_indices::rotate_inputs (int delta)
170 element_type element_delta = delta * m_nelts_per_input;
171 for (unsigned int i = 0; i < m_encoding.length (); ++i)
172 m_encoding[i] = clamp (m_encoding[i] + element_delta);
175 /* Return true if index OUT_BASE + I * OUT_STEP selects input
176 element IN_BASE + I * IN_STEP. For example, the call to test
177 whether a permute reverses a vector of N elements would be:
179 series_p (0, 1, N - 1, -1)
181 which would return true for { N - 1, N - 2, N - 3, ... }.
182 The calls to test for an interleaving of elements starting
183 at N1 and N2 would be:
185 series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
187 which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. */
189 bool
190 vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
191 element_type in_base, element_type in_step) const
193 /* Check the base value. */
194 if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
195 return false;
197 element_type full_nelts = m_encoding.full_nelts ();
198 unsigned int npatterns = m_encoding.npatterns ();
200 /* Calculate which multiple of OUT_STEP elements we need to get
201 back to the same pattern. */
202 unsigned int cycle_length = least_common_multiple (out_step, npatterns);
204 /* Check the steps. */
205 in_step = clamp (in_step);
206 out_base += out_step;
207 unsigned int limit = 0;
208 for (;;)
210 /* Succeed if we've checked all the elements in the vector. */
211 if (known_ge (out_base, full_nelts))
212 return true;
214 if (out_base >= npatterns)
216 /* We've got to the end of the "foreground" values. Check
217 2 elements from each pattern in the "background" values. */
218 if (limit == 0)
219 limit = out_base + cycle_length * 2;
220 else if (out_base >= limit)
221 return true;
224 element_type v0 = m_encoding.elt (out_base - out_step);
225 element_type v1 = m_encoding.elt (out_base);
226 if (maybe_ne (clamp (v1 - v0), in_step))
227 return false;
229 out_base += out_step;
233 /* Return true if all elements of the permutation vector are in the range
234 [START, START + SIZE). */
236 bool
237 vec_perm_indices::all_in_range_p (element_type start, element_type size) const
239 /* Check the first two elements of each pattern. */
240 unsigned int npatterns = m_encoding.npatterns ();
241 unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
242 unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
243 for (unsigned int i = 0; i < base_nelts; ++i)
244 if (!known_in_range_p (m_encoding[i], start, size))
245 return false;
247 /* For stepped encodings, check the full range of the series. */
248 if (nelts_per_pattern == 3)
250 element_type limit = input_nelts ();
252 /* The number of elements in each pattern beyond the first two
253 that we checked above. */
254 poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
255 npatterns) - 2;
256 for (unsigned int i = 0; i < npatterns; ++i)
258 /* BASE1 has been checked but BASE2 hasn't. */
259 element_type base1 = m_encoding[i + npatterns];
260 element_type base2 = m_encoding[i + base_nelts];
262 /* The step to add to get from BASE1 to each subsequent value. */
263 element_type step = clamp (base2 - base1);
265 /* STEP has no inherent sign, so a value near LIMIT can
266 act as a negative step. The series is in range if it
267 is in range according to one of the two interpretations.
269 Since we're dealing with clamped values, ELEMENT_TYPE is
270 wide enough for overflow not to be a problem. */
271 element_type headroom_down = base1 - start;
272 element_type headroom_up = size - headroom_down - 1;
273 HOST_WIDE_INT diff;
274 if ((!step.is_constant (&diff)
275 || maybe_lt (headroom_up, diff * step_nelts))
276 && (!(limit - step).is_constant (&diff)
277 || maybe_lt (headroom_down, diff * step_nelts)))
278 return false;
281 return true;
284 /* Try to read the contents of VECTOR_CST CST as a constant permutation
285 vector. Return true and add the elements to BUILDER on success,
286 otherwise return false without modifying BUILDER. */
288 bool
289 tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
291 unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
292 for (unsigned int i = 0; i < encoded_nelts; ++i)
293 if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
294 return false;
296 builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
297 VECTOR_CST_NPATTERNS (cst),
298 VECTOR_CST_NELTS_PER_PATTERN (cst));
299 for (unsigned int i = 0; i < encoded_nelts; ++i)
300 builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
301 return true;
304 /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */
306 tree
307 vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
309 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
310 tree_vector_builder sel (type, indices.encoding ().npatterns (),
311 indices.encoding ().nelts_per_pattern ());
312 unsigned int encoded_nelts = sel.encoded_nelts ();
313 for (unsigned int i = 0; i < encoded_nelts; i++)
314 sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
315 return sel.build ();
318 /* Return a CONST_VECTOR of mode MODE that contains the elements of
319 INDICES. */
322 vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
324 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
325 && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
326 rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
327 indices.encoding ().nelts_per_pattern ());
328 unsigned int encoded_nelts = sel.encoded_nelts ();
329 for (unsigned int i = 0; i < encoded_nelts; i++)
330 sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
331 return sel.build ();
334 #if CHECKING_P
336 namespace selftest {
338 /* Test a 12-element vector. */
340 static void
341 test_vec_perm_12 (void)
343 vec_perm_builder builder (12, 12, 1);
344 for (unsigned int i = 0; i < 4; ++i)
346 builder.quick_push (i * 5);
347 builder.quick_push (3 + i);
348 builder.quick_push (2 + 3 * i);
350 vec_perm_indices indices (builder, 1, 12);
351 ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
352 ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
353 ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
354 ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
355 ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
357 ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
358 ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
360 ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
361 ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
363 ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
364 ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
366 ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
367 ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
368 ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
371 /* Run selftests for this file. */
373 void
374 vec_perm_indices_cc_tests ()
376 test_vec_perm_12 ();
379 } // namespace selftest
381 #endif