1 /* A class for building vector constant patterns.
2 Copyright (C) 2017-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #ifndef GCC_VECTOR_BUILDER_H
21 #define GCC_VECTOR_BUILDER_H
23 /* This class is a wrapper around auto_vec<T> for building vectors of T.
24 It aims to encode each vector as npatterns interleaved patterns,
25 where each pattern represents a sequence:
27 { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }
29 The first three elements in each pattern provide enough information
30 to derive the other elements. If all patterns have a STEP of zero,
31 we only need to encode the first two elements in each pattern.
32 If BASE1 is also equal to BASE0 for all patterns, we only need to
33 encode the first element in each pattern. The number of encoded
34 elements per pattern is given by nelts_per_pattern.
36 The class can be used in two ways:
38 1. It can be used to build a full image of the vector, which is then
39 canonicalized by finalize (). In this case npatterns is initially
40 the number of elements in the vector and nelts_per_pattern is
43 2. It can be used to build a vector that already has a known encoding.
44 This is preferred since it is more efficient and copes with
45 variable-length vectors. finalize () then canonicalizes the encoding
46 to a simpler form if possible.
48 Shape is the type that specifies the number of elements in the vector
49 and (where relevant) the type of each element.
51 The derived class Derived provides the functionality of this class
52 for specific Ts. Derived needs to provide the following interface:
54 bool equal_p (T elt1, T elt2) const;
56 Return true if elements ELT1 and ELT2 are equal.
58 bool allow_steps_p () const;
60 Return true if a stepped representation is OK. We don't allow
61 linear series for anything other than integers, to avoid problems
64 bool integral_p (T elt) const;
66 Return true if element ELT can be interpreted as an integer.
68 StepType step (T elt1, T elt2) const;
70 Return the value of element ELT2 minus the value of element ELT1,
71 given integral_p (ELT1) && integral_p (ELT2). There is no fixed
74 T apply_step (T base, unsigned int factor, StepType step) const;
76 Return a vector element with the value BASE + FACTOR * STEP.
78 bool can_elide_p (T elt) const;
80 Return true if we can drop element ELT, even if the retained
81 elements are different. This is provided for TREE_OVERFLOW
84 void note_representative (T *elt1_ptr, T elt2);
86 Record that ELT2 is being elided, given that ELT1_PTR points to
87 the last encoded element for the containing pattern. This is
88 again provided for TREE_OVERFLOW handling.
90 static poly_uint64 shape_nelts (Shape shape);
92 Return the number of elements in SHAPE.
94 The class provides additional functionality for the case in which
95 T can describe a vector constant as well as an individual element.
96 This functionality requires:
98 static poly_uint64 nelts_of (T x);
100 Return the number of elements in vector constant X.
102 static unsigned int npatterns_of (T x);
104 Return the number of patterns used to encode vector constant X.
106 static unsigned int nelts_per_pattern_of (T x);
108 Return the number of elements used to encode each pattern
109 in vector constant X. */
111 template<typename T
, typename Shape
, typename Derived
>
112 class vector_builder
: public auto_vec
<T
, 32>
117 poly_uint64
full_nelts () const { return m_full_nelts
; }
118 unsigned int npatterns () const { return m_npatterns
; }
119 unsigned int nelts_per_pattern () const { return m_nelts_per_pattern
; }
120 unsigned int encoded_nelts () const;
121 bool encoded_full_vector_p () const;
122 T
elt (unsigned int) const;
123 unsigned int count_dups (int, int, int) const;
125 bool operator == (const Derived
&) const;
126 bool operator != (const Derived
&x
) const { return !operator == (x
); }
128 bool new_unary_operation (Shape
, T
, bool);
129 bool new_binary_operation (Shape
, T
, T
, bool);
133 static unsigned int binary_encoded_nelts (T
, T
);
136 void new_vector (poly_uint64
, unsigned int, unsigned int);
137 void reshape (unsigned int, unsigned int);
138 bool repeating_sequence_p (unsigned int, unsigned int, unsigned int);
139 bool stepped_sequence_p (unsigned int, unsigned int, unsigned int);
140 bool try_npatterns (unsigned int);
143 vector_builder (const vector_builder
&);
144 vector_builder
&operator= (const vector_builder
&);
145 Derived
*derived () { return static_cast<Derived
*> (this); }
146 const Derived
*derived () const;
148 poly_uint64 m_full_nelts
;
149 unsigned int m_npatterns
;
150 unsigned int m_nelts_per_pattern
;
153 template<typename T
, typename Shape
, typename Derived
>
154 inline const Derived
*
155 vector_builder
<T
, Shape
, Derived
>::derived () const
157 return static_cast<const Derived
*> (this);
160 template<typename T
, typename Shape
, typename Derived
>
162 vector_builder
<T
, Shape
, Derived
>::vector_builder ()
165 m_nelts_per_pattern (0)
168 /* Return the number of elements that are explicitly encoded. The vec
169 starts with these explicitly-encoded elements and may contain additional
172 template<typename T
, typename Shape
, typename Derived
>
174 vector_builder
<T
, Shape
, Derived
>::encoded_nelts () const
176 return m_npatterns
* m_nelts_per_pattern
;
179 /* Return true if every element of the vector is explicitly encoded. */
181 template<typename T
, typename Shape
, typename Derived
>
183 vector_builder
<T
, Shape
, Derived
>::encoded_full_vector_p () const
185 return known_eq (m_npatterns
* m_nelts_per_pattern
, m_full_nelts
);
188 /* Start building a vector that has FULL_NELTS elements. Initially
189 encode it using NPATTERNS patterns with NELTS_PER_PATTERN each. */
191 template<typename T
, typename Shape
, typename Derived
>
193 vector_builder
<T
, Shape
, Derived
>::new_vector (poly_uint64 full_nelts
,
194 unsigned int npatterns
,
195 unsigned int nelts_per_pattern
)
197 m_full_nelts
= full_nelts
;
198 m_npatterns
= npatterns
;
199 m_nelts_per_pattern
= nelts_per_pattern
;
200 this->reserve (encoded_nelts ());
204 /* Return true if this vector and OTHER have the same elements and
205 are encoded in the same way. */
207 template<typename T
, typename Shape
, typename Derived
>
209 vector_builder
<T
, Shape
, Derived
>::operator == (const Derived
&other
) const
211 if (maybe_ne (m_full_nelts
, other
.m_full_nelts
)
212 || m_npatterns
!= other
.m_npatterns
213 || m_nelts_per_pattern
!= other
.m_nelts_per_pattern
)
216 unsigned int nelts
= encoded_nelts ();
217 for (unsigned int i
= 0; i
< nelts
; ++i
)
218 if (!derived ()->equal_p ((*this)[i
], other
[i
]))
224 /* Return the value of vector element I, which might or might not be
225 encoded explicitly. */
227 template<typename T
, typename Shape
, typename Derived
>
229 vector_builder
<T
, Shape
, Derived
>::elt (unsigned int i
) const
231 /* First handle elements that are already present in the underlying
232 vector, regardless of whether they're part of the encoding or not. */
233 if (i
< this->length ())
236 /* Extrapolation is only possible if the encoding has been fully
238 gcc_checking_assert (encoded_nelts () <= this->length ());
240 /* Identify the pattern that contains element I and work out the index of
241 the last encoded element for that pattern. */
242 unsigned int pattern
= i
% m_npatterns
;
243 unsigned int count
= i
/ m_npatterns
;
244 unsigned int final_i
= encoded_nelts () - m_npatterns
+ pattern
;
245 T final
= (*this)[final_i
];
247 /* If there are no steps, the final encoded value is the right one. */
248 if (m_nelts_per_pattern
<= 2)
251 /* Otherwise work out the value from the last two encoded elements. */
252 T prev
= (*this)[final_i
- m_npatterns
];
253 return derived ()->apply_step (final
, count
- 2,
254 derived ()->step (prev
, final
));
257 /* Try to start building a new vector of shape SHAPE that holds the result of
258 a unary operation on vector constant VEC. ALLOW_STEPPED_P is true if the
259 operation can handle stepped encodings directly, without having to expand
262 Return true if the operation is possible, which it always is when
263 ALLOW_STEPPED_P is true. Leave the builder unchanged otherwise. */
265 template<typename T
, typename Shape
, typename Derived
>
267 vector_builder
<T
, Shape
, Derived
>::new_unary_operation (Shape shape
, T vec
,
268 bool allow_stepped_p
)
270 poly_uint64 full_nelts
= Derived::shape_nelts (shape
);
271 gcc_assert (known_eq (full_nelts
, Derived::nelts_of (vec
)));
272 unsigned int npatterns
= Derived::npatterns_of (vec
);
273 unsigned int nelts_per_pattern
= Derived::nelts_per_pattern_of (vec
);
274 if (!allow_stepped_p
&& nelts_per_pattern
> 2)
276 if (!full_nelts
.is_constant ())
278 npatterns
= full_nelts
.to_constant ();
279 nelts_per_pattern
= 1;
281 derived ()->new_vector (shape
, npatterns
, nelts_per_pattern
);
285 /* Try to start building a new vector of shape SHAPE that holds the result of
286 a binary operation on vector constants VEC1 and VEC2. ALLOW_STEPPED_P is
287 true if the operation can handle stepped encodings directly, without
288 having to expand the full sequence.
290 Return true if the operation is possible. Leave the builder unchanged
293 template<typename T
, typename Shape
, typename Derived
>
295 vector_builder
<T
, Shape
, Derived
>::new_binary_operation (Shape shape
,
297 bool allow_stepped_p
)
299 poly_uint64 full_nelts
= Derived::shape_nelts (shape
);
300 gcc_assert (known_eq (full_nelts
, Derived::nelts_of (vec1
))
301 && known_eq (full_nelts
, Derived::nelts_of (vec2
)));
302 /* Conceptually we split the patterns in VEC1 and VEC2 until we have
303 an equal number for both. Each split pattern requires the same
304 number of elements per pattern as the original. E.g. splitting:
321 unsigned int npatterns
322 = least_common_multiple (Derived::npatterns_of (vec1
),
323 Derived::npatterns_of (vec2
));
324 unsigned int nelts_per_pattern
325 = MAX (Derived::nelts_per_pattern_of (vec1
),
326 Derived::nelts_per_pattern_of (vec2
));
327 if (!allow_stepped_p
&& nelts_per_pattern
> 2)
329 if (!full_nelts
.is_constant ())
331 npatterns
= full_nelts
.to_constant ();
332 nelts_per_pattern
= 1;
334 derived ()->new_vector (shape
, npatterns
, nelts_per_pattern
);
338 /* Return the number of elements that the caller needs to operate on in
339 order to handle a binary operation on vector constants VEC1 and VEC2.
340 This static function is used instead of new_binary_operation if the
341 result of the operation is not a constant vector. */
343 template<typename T
, typename Shape
, typename Derived
>
345 vector_builder
<T
, Shape
, Derived
>::binary_encoded_nelts (T vec1
, T vec2
)
347 poly_uint64 nelts
= Derived::nelts_of (vec1
);
348 gcc_assert (known_eq (nelts
, Derived::nelts_of (vec2
)));
349 /* See new_binary_operation for details. */
350 unsigned int npatterns
351 = least_common_multiple (Derived::npatterns_of (vec1
),
352 Derived::npatterns_of (vec2
));
353 unsigned int nelts_per_pattern
354 = MAX (Derived::nelts_per_pattern_of (vec1
),
355 Derived::nelts_per_pattern_of (vec2
));
356 unsigned HOST_WIDE_INT const_nelts
;
357 if (nelts
.is_constant (&const_nelts
))
358 return MIN (npatterns
* nelts_per_pattern
, const_nelts
);
359 return npatterns
* nelts_per_pattern
;
362 /* Return the number of leading duplicate elements in the range
363 [START:END:STEP]. The value is always at least 1. */
365 template<typename T
, typename Shape
, typename Derived
>
367 vector_builder
<T
, Shape
, Derived
>::count_dups (int start
, int end
,
370 gcc_assert ((end
- start
) % step
== 0);
372 unsigned int ndups
= 1;
373 for (int i
= start
+ step
;
374 i
!= end
&& derived ()->equal_p (elt (i
), elt (start
));
380 /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each,
381 but without changing the underlying vector. */
383 template<typename T
, typename Shape
, typename Derived
>
385 vector_builder
<T
, Shape
, Derived
>::reshape (unsigned int npatterns
,
386 unsigned int nelts_per_pattern
)
388 unsigned int old_encoded_nelts
= encoded_nelts ();
389 unsigned int new_encoded_nelts
= npatterns
* nelts_per_pattern
;
390 gcc_checking_assert (new_encoded_nelts
<= old_encoded_nelts
);
391 unsigned int next
= new_encoded_nelts
- npatterns
;
392 for (unsigned int i
= new_encoded_nelts
; i
< old_encoded_nelts
; ++i
)
394 derived ()->note_representative (&(*this)[next
], (*this)[i
]);
396 if (next
== new_encoded_nelts
)
399 m_npatterns
= npatterns
;
400 m_nelts_per_pattern
= nelts_per_pattern
;
403 /* Return true if elements [START, END) contain a repeating sequence of
406 template<typename T
, typename Shape
, typename Derived
>
408 vector_builder
<T
, Shape
, Derived
>::repeating_sequence_p (unsigned int start
,
412 for (unsigned int i
= start
; i
< end
- step
; ++i
)
413 if (!derived ()->equal_p ((*this)[i
], (*this)[i
+ step
]))
418 /* Return true if elements [START, END) contain STEP interleaved linear
421 template<typename T
, typename Shape
, typename Derived
>
423 vector_builder
<T
, Shape
, Derived
>::stepped_sequence_p (unsigned int start
,
427 if (!derived ()->allow_steps_p ())
430 for (unsigned int i
= start
+ step
* 2; i
< end
; ++i
)
432 T elt1
= (*this)[i
- step
* 2];
433 T elt2
= (*this)[i
- step
];
436 if (!derived ()->integral_p (elt1
)
437 || !derived ()->integral_p (elt2
)
438 || !derived ()->integral_p (elt3
))
441 if (maybe_ne (derived ()->step (elt1
, elt2
),
442 derived ()->step (elt2
, elt3
)))
445 if (!derived ()->can_elide_p (elt3
))
451 /* Try to change the number of encoded patterns to NPATTERNS, returning
454 template<typename T
, typename Shape
, typename Derived
>
456 vector_builder
<T
, Shape
, Derived
>::try_npatterns (unsigned int npatterns
)
458 if (m_nelts_per_pattern
== 1)
460 /* See whether NPATTERNS is valid with the current 1-element-per-pattern
462 if (repeating_sequence_p (0, encoded_nelts (), npatterns
))
464 reshape (npatterns
, 1);
468 /* We can only increase the number of elements per pattern if all
469 elements are still encoded explicitly. */
470 if (!encoded_full_vector_p ())
474 if (m_nelts_per_pattern
<= 2)
476 /* See whether NPATTERNS is valid with a 2-element-per-pattern
478 if (repeating_sequence_p (npatterns
, encoded_nelts (), npatterns
))
480 reshape (npatterns
, 2);
484 /* We can only increase the number of elements per pattern if all
485 elements are still encoded explicitly. */
486 if (!encoded_full_vector_p ())
490 if (m_nelts_per_pattern
<= 3)
492 /* See whether we have NPATTERNS interleaved linear series,
493 giving a 3-element-per-pattern encoding. */
494 if (stepped_sequence_p (npatterns
, encoded_nelts (), npatterns
))
496 reshape (npatterns
, 3);
505 /* Replace the current encoding with the canonical form. */
507 template<typename T
, typename Shape
, typename Derived
>
509 vector_builder
<T
, Shape
, Derived
>::finalize ()
511 /* The encoding requires the same number of elements to come from each
513 gcc_assert (multiple_p (m_full_nelts
, m_npatterns
));
515 /* Allow the caller to build more elements than necessary. For example,
516 it's often convenient to build a stepped vector from the natural
517 encoding of three elements even if the vector itself only has two. */
518 unsigned HOST_WIDE_INT const_full_nelts
;
519 if (m_full_nelts
.is_constant (&const_full_nelts
)
520 && const_full_nelts
<= encoded_nelts ())
522 m_npatterns
= const_full_nelts
;
523 m_nelts_per_pattern
= 1;
526 /* Try to whittle down the number of elements per pattern. That is:
528 1. If we have stepped patterns whose steps are all 0, reduce the
529 number of elements per pattern from 3 to 2.
531 2. If we have background fill values that are the same as the
532 foreground values, reduce the number of elements per pattern
534 while (m_nelts_per_pattern
> 1
535 && repeating_sequence_p (encoded_nelts () - m_npatterns
* 2,
536 encoded_nelts (), m_npatterns
))
537 /* The last two sequences of M_NPATTERNS elements are equal,
538 so remove the last one. */
539 reshape (m_npatterns
, m_nelts_per_pattern
- 1);
541 if (pow2p_hwi (m_npatterns
))
543 /* Try to halve the number of patterns while doing so gives a
544 valid pattern. This approach is linear in the number of
545 elements, whereas searcing from 1 up would be O(n*log(n)).
547 Each halving step tries to keep the number of elements per pattern
548 the same. If that isn't possible, and if all elements are still
549 explicitly encoded, the halving step can instead increase the number
550 of elements per pattern.
554 { 0, 2, 3, 4, 5, 6, 7, 8 } npatterns == 8 full_nelts == 8
556 we first realize that the second half of the sequence is not
557 equal to the first, so we cannot maintain 1 element per pattern
558 for npatterns == 4. Instead we halve the number of patterns
559 and double the number of elements per pattern, treating this
560 as a "foreground" { 0, 2, 3, 4 } against a "background" of
561 { 5, 6, 7, 8 | 5, 6, 7, 8 ... }:
563 { 0, 2, 3, 4 | 5, 6, 7, 8 } npatterns == 4
565 Next we realize that this is *not* a foreround of { 0, 2 }
566 against a background of { 3, 4 | 3, 4 ... }, so the only
567 remaining option for reducing the number of patterns is
568 to use a foreground of { 0, 2 } against a stepped background
569 of { 1, 2 | 3, 4 | 5, 6 ... }. This is valid because we still
570 haven't elided any elements:
572 { 0, 2 | 3, 4 | 5, 6 } npatterns == 2
574 This in turn can be reduced to a foreground of { 0 } against a
575 stepped background of { 1 | 2 | 3 ... }:
577 { 0 | 2 | 3 } npatterns == 1
579 This last step would not have been possible for:
581 { 0, 0 | 3, 4 | 5, 6 } npatterns == 2. */
582 while ((m_npatterns
& 1) == 0 && try_npatterns (m_npatterns
/ 2))
585 /* Builders of arbitrary fixed-length vectors can use:
589 so that every element is specified explicitly. Handle cases
590 that are actually wrapping series, like { 0, 1, 2, 3, 0, 1, 2, 3 }
591 would be for 2-bit elements. We'll have treated them as
592 duplicates in the loop above. */
593 if (m_nelts_per_pattern
== 1
594 && m_full_nelts
.is_constant (&const_full_nelts
)
595 && this->length () >= const_full_nelts
596 && (m_npatterns
& 3) == 0
597 && stepped_sequence_p (m_npatterns
/ 4, const_full_nelts
,
600 reshape (m_npatterns
/ 4, 3);
601 while ((m_npatterns
& 1) == 0 && try_npatterns (m_npatterns
/ 2))
606 /* For the non-power-of-2 case, do a simple search up from 1. */
607 for (unsigned int i
= 1; i
<= m_npatterns
/ 2; ++i
)
608 if (m_npatterns
% i
== 0 && try_npatterns (i
))