fwprop: Fix single_use_p calculation
[official-gcc.git] / gcc / vec-perm-indices.c
blobede590dc5c9fa3253f7bbea19e4ad9866e0f0204
1 /* A representation of vector permutation indices.
2 Copyright (C) 2017-2021 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "vec-perm-indices.h"
24 #include "tree.h"
25 #include "fold-const.h"
26 #include "tree-vector-builder.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "memmodel.h"
30 #include "emit-rtl.h"
31 #include "selftest.h"
32 #include "rtx-vector-builder.h"
34 /* Switch to a new permutation vector that selects between NINPUTS vector
35 inputs that have NELTS_PER_INPUT elements each. Take the elements of the
36 new permutation vector from ELEMENTS, clamping each one to be in range. */
38 void
39 vec_perm_indices::new_vector (const vec_perm_builder &elements,
40 unsigned int ninputs,
41 poly_uint64 nelts_per_input)
43 m_ninputs = ninputs;
44 m_nelts_per_input = nelts_per_input;
45 /* If the vector has a constant number of elements, expand the
46 encoding and clamp each element. E.g. { 0, 2, 4, ... } might
47 wrap halfway if there is only one vector input, and we want
48 the wrapped form to be the canonical one.
50 If the vector has a variable number of elements, just copy
51 the encoding. In that case the unwrapped form is canonical
52 and there is no way of representing the wrapped form. */
53 poly_uint64 full_nelts = elements.full_nelts ();
54 unsigned HOST_WIDE_INT copy_nelts;
55 if (full_nelts.is_constant (&copy_nelts))
56 m_encoding.new_vector (full_nelts, copy_nelts, 1);
57 else
59 copy_nelts = elements.encoded_nelts ();
60 m_encoding.new_vector (full_nelts, elements.npatterns (),
61 elements.nelts_per_pattern ());
63 unsigned int npatterns = m_encoding.npatterns ();
64 for (unsigned int i = 0; i < npatterns; ++i)
65 m_encoding.quick_push (clamp (elements.elt (i)));
66 /* Use the fact that:
68 (a + b) % c == ((a % c) + (b % c)) % c
70 to simplify the clamping of variable-length vectors. */
71 for (unsigned int i = npatterns; i < copy_nelts; ++i)
73 element_type step = clamp (elements.elt (i)
74 - elements.elt (i - npatterns));
75 m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
77 m_encoding.finalize ();
80 /* Switch to a new permutation vector that selects the same input elements
81 as ORIG, but with each element split into FACTOR pieces. For example,
82 if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
83 { 2, 3, 4, 5, 0, 1, 6, 7 }. */
85 void
86 vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
87 unsigned int factor)
89 m_ninputs = orig.m_ninputs;
90 m_nelts_per_input = orig.m_nelts_per_input * factor;
91 m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
92 orig.m_encoding.npatterns () * factor,
93 orig.m_encoding.nelts_per_pattern ());
94 unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
95 for (unsigned int i = 0; i < encoded_nelts; ++i)
97 element_type base = orig.m_encoding[i] * factor;
98 for (unsigned int j = 0; j < factor; ++j)
99 m_encoding.quick_push (base + j);
101 m_encoding.finalize ();
104 /* Rotate the inputs of the permutation right by DELTA inputs. This changes
105 the values of the permutation vector but it doesn't change the way that
106 the elements are encoded. */
108 void
109 vec_perm_indices::rotate_inputs (int delta)
111 element_type element_delta = delta * m_nelts_per_input;
112 for (unsigned int i = 0; i < m_encoding.length (); ++i)
113 m_encoding[i] = clamp (m_encoding[i] + element_delta);
116 /* Return true if index OUT_BASE + I * OUT_STEP selects input
117 element IN_BASE + I * IN_STEP. For example, the call to test
118 whether a permute reverses a vector of N elements would be:
120 series_p (0, 1, N - 1, -1)
122 which would return true for { N - 1, N - 2, N - 3, ... }.
123 The calls to test for an interleaving of elements starting
124 at N1 and N2 would be:
126 series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
128 which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. */
130 bool
131 vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
132 element_type in_base, element_type in_step) const
134 /* Check the base value. */
135 if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
136 return false;
138 element_type full_nelts = m_encoding.full_nelts ();
139 unsigned int npatterns = m_encoding.npatterns ();
141 /* Calculate which multiple of OUT_STEP elements we need to get
142 back to the same pattern. */
143 unsigned int cycle_length = least_common_multiple (out_step, npatterns);
145 /* Check the steps. */
146 in_step = clamp (in_step);
147 out_base += out_step;
148 unsigned int limit = 0;
149 for (;;)
151 /* Succeed if we've checked all the elements in the vector. */
152 if (known_ge (out_base, full_nelts))
153 return true;
155 if (out_base >= npatterns)
157 /* We've got to the end of the "foreground" values. Check
158 2 elements from each pattern in the "background" values. */
159 if (limit == 0)
160 limit = out_base + cycle_length * 2;
161 else if (out_base >= limit)
162 return true;
165 element_type v0 = m_encoding.elt (out_base - out_step);
166 element_type v1 = m_encoding.elt (out_base);
167 if (maybe_ne (clamp (v1 - v0), in_step))
168 return false;
170 out_base += out_step;
172 return true;
175 /* Return true if all elements of the permutation vector are in the range
176 [START, START + SIZE). */
178 bool
179 vec_perm_indices::all_in_range_p (element_type start, element_type size) const
181 /* Check the first two elements of each pattern. */
182 unsigned int npatterns = m_encoding.npatterns ();
183 unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
184 unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
185 for (unsigned int i = 0; i < base_nelts; ++i)
186 if (!known_in_range_p (m_encoding[i], start, size))
187 return false;
189 /* For stepped encodings, check the full range of the series. */
190 if (nelts_per_pattern == 3)
192 element_type limit = input_nelts ();
194 /* The number of elements in each pattern beyond the first two
195 that we checked above. */
196 poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
197 npatterns) - 2;
198 for (unsigned int i = 0; i < npatterns; ++i)
200 /* BASE1 has been checked but BASE2 hasn't. */
201 element_type base1 = m_encoding[i + npatterns];
202 element_type base2 = m_encoding[i + base_nelts];
204 /* The step to add to get from BASE1 to each subsequent value. */
205 element_type step = clamp (base2 - base1);
207 /* STEP has no inherent sign, so a value near LIMIT can
208 act as a negative step. The series is in range if it
209 is in range according to one of the two interpretations.
211 Since we're dealing with clamped values, ELEMENT_TYPE is
212 wide enough for overflow not to be a problem. */
213 element_type headroom_down = base1 - start;
214 element_type headroom_up = size - headroom_down - 1;
215 HOST_WIDE_INT diff;
216 if ((!step.is_constant (&diff)
217 || maybe_lt (headroom_up, diff * step_nelts))
218 && (!(limit - step).is_constant (&diff)
219 || maybe_lt (headroom_down, diff * step_nelts)))
220 return false;
223 return true;
226 /* Try to read the contents of VECTOR_CST CST as a constant permutation
227 vector. Return true and add the elements to BUILDER on success,
228 otherwise return false without modifying BUILDER. */
230 bool
231 tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
233 unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
234 for (unsigned int i = 0; i < encoded_nelts; ++i)
235 if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
236 return false;
238 builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
239 VECTOR_CST_NPATTERNS (cst),
240 VECTOR_CST_NELTS_PER_PATTERN (cst));
241 for (unsigned int i = 0; i < encoded_nelts; ++i)
242 builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
243 return true;
246 /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */
248 tree
249 vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
251 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
252 tree_vector_builder sel (type, indices.encoding ().npatterns (),
253 indices.encoding ().nelts_per_pattern ());
254 unsigned int encoded_nelts = sel.encoded_nelts ();
255 for (unsigned int i = 0; i < encoded_nelts; i++)
256 sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
257 return sel.build ();
260 /* Return a CONST_VECTOR of mode MODE that contains the elements of
261 INDICES. */
264 vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
266 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
267 && known_eq (GET_MODE_NUNITS (mode), indices.length ()));
268 rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
269 indices.encoding ().nelts_per_pattern ());
270 unsigned int encoded_nelts = sel.encoded_nelts ();
271 for (unsigned int i = 0; i < encoded_nelts; i++)
272 sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
273 return sel.build ();
276 #if CHECKING_P
278 namespace selftest {
280 /* Test a 12-element vector. */
282 static void
283 test_vec_perm_12 (void)
285 vec_perm_builder builder (12, 12, 1);
286 for (unsigned int i = 0; i < 4; ++i)
288 builder.quick_push (i * 5);
289 builder.quick_push (3 + i);
290 builder.quick_push (2 + 3 * i);
292 vec_perm_indices indices (builder, 1, 12);
293 ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
294 ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
295 ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
296 ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
297 ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
299 ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
300 ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
302 ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
303 ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
305 ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
306 ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
308 ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
309 ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
310 ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
313 /* Run selftests for this file. */
315 void
316 vec_perm_indices_c_tests ()
318 test_vec_perm_12 ();
321 } // namespace selftest
323 #endif