Intrinsicify SpanHelpers.IndexOf(char) (dotnet/coreclr#22505)
[mono-project.git] / netcore / System.Private.CoreLib / shared / System / SpanHelpers.Char.cs
blobcfa2750328c0cb431e608fb35346c8b3e9e5b2b2
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 using System.Diagnostics;
6 using System.Numerics;
7 using System.Runtime.CompilerServices;
8 using System.Runtime.Intrinsics;
9 using System.Runtime.Intrinsics.X86;
11 using Internal.Runtime.CompilerServices;
13 #if BIT64
14 using nuint = System.UInt64;
15 using nint = System.Int64;
16 #else
17 using nuint = System.UInt32;
18 using nint = System.Int32;
19 #endif
21 namespace System
23 internal static partial class SpanHelpers // .Char
25 public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char value, int valueLength)
27 Debug.Assert(searchSpaceLength >= 0);
28 Debug.Assert(valueLength >= 0);
30 if (valueLength == 0)
31 return 0; // A zero-length sequence is always treated as "found" at the start of the search space.
33 char valueHead = value;
34 ref char valueTail = ref Unsafe.Add(ref value, 1);
35 int valueTailLength = valueLength - 1;
36 int remainingSearchSpaceLength = searchSpaceLength - valueTailLength;
38 int index = 0;
39 while (remainingSearchSpaceLength > 0)
41 // Do a quick search for the first element of "value".
42 int relativeIndex = IndexOf(ref Unsafe.Add(ref searchSpace, index), valueHead, remainingSearchSpaceLength);
43 if (relativeIndex == -1)
44 break;
46 remainingSearchSpaceLength -= relativeIndex;
47 index += relativeIndex;
49 if (remainingSearchSpaceLength <= 0)
50 break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there.
52 // Found the first element of "value". See if the tail matches.
53 if (SequenceEqual(
54 ref Unsafe.As<char, byte>(ref Unsafe.Add(ref searchSpace, index + 1)),
55 ref Unsafe.As<char, byte>(ref valueTail),
56 (nuint)valueTailLength * 2))
58 return index; // The tail matched. Return a successful find.
61 remainingSearchSpaceLength--;
62 index++;
64 return -1;
67 [MethodImpl(MethodImplOptions.AggressiveOptimization)]
68 public static unsafe int SequenceCompareTo(ref char first, int firstLength, ref char second, int secondLength)
70 Debug.Assert(firstLength >= 0);
71 Debug.Assert(secondLength >= 0);
73 int lengthDelta = firstLength - secondLength;
75 if (Unsafe.AreSame(ref first, ref second))
76 goto Equal;
78 IntPtr minLength = (IntPtr)((firstLength < secondLength) ? firstLength : secondLength);
79 IntPtr i = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
81 if ((byte*)minLength >= (byte*)(sizeof(UIntPtr) / sizeof(char)))
83 if (Vector.IsHardwareAccelerated && (byte*)minLength >= (byte*)Vector<ushort>.Count)
85 IntPtr nLength = minLength - Vector<ushort>.Count;
88 if (Unsafe.ReadUnaligned<Vector<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref first, i))) !=
89 Unsafe.ReadUnaligned<Vector<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref second, i))))
91 break;
93 i += Vector<ushort>.Count;
95 while ((byte*)nLength >= (byte*)i);
98 while ((byte*)minLength >= (byte*)(i + sizeof(UIntPtr) / sizeof(char)))
100 if (Unsafe.ReadUnaligned<UIntPtr>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref first, i))) !=
101 Unsafe.ReadUnaligned<UIntPtr>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref second, i))))
103 break;
105 i += sizeof(UIntPtr) / sizeof(char);
109 if (sizeof(UIntPtr) > sizeof(int) && (byte*)minLength >= (byte*)(i + sizeof(int) / sizeof(char)))
111 if (Unsafe.ReadUnaligned<int>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref first, i))) ==
112 Unsafe.ReadUnaligned<int>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref second, i))))
114 i += sizeof(int) / sizeof(char);
118 while ((byte*)i < (byte*)minLength)
120 int result = Unsafe.Add(ref first, i).CompareTo(Unsafe.Add(ref second, i));
121 if (result != 0)
122 return result;
123 i += 1;
126 Equal:
127 return lengthDelta;
130 // Adapted from IndexOf(...)
131 [MethodImpl(MethodImplOptions.AggressiveOptimization)]
132 public static unsafe bool Contains(ref char searchSpace, char value, int length)
134 Debug.Assert(length >= 0);
136 fixed (char* pChars = &searchSpace)
138 char* pCh = pChars;
139 char* pEndCh = pCh + length;
141 if (Vector.IsHardwareAccelerated && length >= Vector<ushort>.Count * 2)
143 // Figure out how many characters to read sequentially until we are vector aligned
144 // This is equivalent to:
145 // unaligned = ((int)pCh % Unsafe.SizeOf<Vector<ushort>>()) / elementsPerByte
146 // length = (Vector<ushort>.Count - unaligned) % Vector<ushort>.Count
147 const int elementsPerByte = sizeof(ushort) / sizeof(byte);
148 int unaligned = ((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) / elementsPerByte;
149 length = (Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1);
152 SequentialScan:
153 while (length >= 4)
155 length -= 4;
157 if (value == *pCh ||
158 value == *(pCh + 1) ||
159 value == *(pCh + 2) ||
160 value == *(pCh + 3))
162 goto Found;
165 pCh += 4;
168 while (length > 0)
170 length -= 1;
172 if (value == *pCh)
173 goto Found;
175 pCh += 1;
178 // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
179 // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
180 if (Vector.IsHardwareAccelerated && pCh < pEndCh)
182 // Get the highest multiple of Vector<ushort>.Count that is within the search space.
183 // That will be how many times we iterate in the loop below.
184 // This is equivalent to: length = Vector<ushort>.Count * ((int)(pEndCh - pCh) / Vector<ushort>.Count)
185 length = (int)((pEndCh - pCh) & ~(Vector<ushort>.Count - 1));
187 // Get comparison Vector
188 Vector<ushort> vComparison = new Vector<ushort>(value);
190 while (length > 0)
192 // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned
193 Debug.Assert(((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) == 0);
194 Vector<ushort> vMatches = Vector.Equals(vComparison, Unsafe.Read<Vector<ushort>>(pCh));
195 if (Vector<ushort>.Zero.Equals(vMatches))
197 pCh += Vector<ushort>.Count;
198 length -= Vector<ushort>.Count;
199 continue;
202 goto Found;
205 if (pCh < pEndCh)
207 length = (int)(pEndCh - pCh);
208 goto SequentialScan;
212 return false;
214 Found:
215 return true;
219 [MethodImpl(MethodImplOptions.AggressiveOptimization)]
220 public static unsafe int IndexOf(ref char searchSpace, char value, int length)
222 Debug.Assert(length >= 0);
224 nint offset = 0;
225 nint lengthToExamine = length;
227 if (((int)Unsafe.AsPointer(ref searchSpace) & 1) != 0)
229 // Input isn't char aligned, we won't be able to align it to a Vector
231 else if (Sse2.IsSupported)
233 // Avx2 branch also operates on Sse2 sizes, so check is combined.
234 // Needs to be double length to allow us to align the data first.
235 if (length >= Vector128<ushort>.Count * 2)
237 lengthToExamine = UnalignedCountVector128(ref searchSpace);
240 else if (Vector.IsHardwareAccelerated)
242 // Needs to be double length to allow us to align the data first.
243 if (length >= Vector<ushort>.Count * 2)
245 lengthToExamine = UnalignedCountVector(ref searchSpace);
249 SequentialScan:
250 // In the non-vector case lengthToExamine is the total length.
251 // In the vector case lengthToExamine first aligns to Vector,
252 // then in a second pass after the Vector lengths is the
253 // remaining data that is shorter than a Vector length.
254 while (lengthToExamine >= 4)
256 ref char current = ref Add(ref searchSpace, offset);
258 if (value == current)
259 goto Found;
260 if (value == Add(ref current, 1))
261 goto Found1;
262 if (value == Add(ref current, 2))
263 goto Found2;
264 if (value == Add(ref current, 3))
265 goto Found3;
267 offset += 4;
268 lengthToExamine -= 4;
271 while (lengthToExamine > 0)
273 if (value == Add(ref searchSpace, offset))
274 goto Found;
276 offset += 1;
277 lengthToExamine -= 1;
280 // We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow
281 // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
282 if (Avx2.IsSupported)
284 if (offset < length)
286 Debug.Assert(length - offset >= Vector128<ushort>.Count);
287 if (((nint)Unsafe.AsPointer(ref Unsafe.Add(ref searchSpace, (IntPtr)offset)) & (nint)(Vector256<byte>.Count - 1)) != 0)
289 // Not currently aligned to Vector256 (is aligned to Vector128); this can cause a problem for searches
290 // with no upper bound e.g. String.wcslen. Start with a check on Vector128 to align to Vector256,
291 // before moving to processing Vector256.
293 // If the input searchSpan has been fixed or pinned, this ensures we do not fault across memory pages
294 // while searching for an end of string. Specifically that this assumes that the length is either correct
295 // or that the data is pinned otherwise it may cause an AccessViolation from crossing a page boundary into an
296 // unowned page. If the search is unbounded (e.g. null terminator in wcslen) and the search value is not found,
297 // again this will likely cause an AccessViolation. However, correctly bounded searches will return -1 rather
298 // than ever causing an AV.
300 // If the searchSpan has not been fixed or pinned the GC can relocate it during the execution of this
301 // method, so the alignment only acts as best endeavour. The GC cost is likely to dominate over
302 // the misalignment that may occur after; to we default to giving the GC a free hand to relocate and
303 // its up to the caller whether they are operating over fixed data.
304 Vector128<ushort> values = Vector128.Create((ushort)value);
305 Vector128<ushort> search = LoadVector128(ref searchSpace, offset);
307 // Same method as below
308 int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte());
309 if (matches == 0)
311 // Zero flags set so no matches
312 offset += Vector128<ushort>.Count;
314 else
316 // Find bitflag offset of first match and add to current offset
317 return (int)(offset + (BitOperations.TrailingZeroCount(matches) / sizeof(char)));
321 lengthToExamine = GetCharVector256SpanLength(offset, length);
322 if (lengthToExamine > 0)
324 Vector256<ushort> values = Vector256.Create((ushort)value);
327 Debug.Assert(lengthToExamine >= Vector256<ushort>.Count);
329 Vector256<ushort> search = LoadVector256(ref searchSpace, offset);
330 int matches = Avx2.MoveMask(Avx2.CompareEqual(values, search).AsByte());
331 // Note that MoveMask has converted the equal vector elements into a set of bit flags,
332 // So the bit position in 'matches' corresponds to the element offset.
333 if (matches == 0)
335 // Zero flags set so no matches
336 offset += Vector256<ushort>.Count;
337 lengthToExamine -= Vector256<ushort>.Count;
338 continue;
341 // Find bitflag offset of first match and add to current offset,
342 // flags are in bytes so divide for chars
343 return (int)(offset + (BitOperations.TrailingZeroCount(matches) / sizeof(char)));
344 } while (lengthToExamine > 0);
347 lengthToExamine = GetCharVector128SpanLength(offset, length);
348 if (lengthToExamine > 0)
350 Debug.Assert(lengthToExamine >= Vector128<ushort>.Count);
352 Vector128<ushort> values = Vector128.Create((ushort)value);
353 Vector128<ushort> search = LoadVector128(ref searchSpace, offset);
355 // Same method as above
356 int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte());
357 if (matches == 0)
359 // Zero flags set so no matches
360 offset += Vector128<ushort>.Count;
361 // Don't need to change lengthToExamine here as we don't use its current value again.
363 else
365 // Find bitflag offset of first match and add to current offset,
366 // flags are in bytes so divide for chars
367 return (int)(offset + (BitOperations.TrailingZeroCount(matches) / sizeof(char)));
371 if (offset < length)
373 lengthToExamine = length - offset;
374 goto SequentialScan;
378 else if (Sse2.IsSupported)
380 if (offset < length)
382 Debug.Assert(length - offset >= Vector128<ushort>.Count);
384 lengthToExamine = GetCharVector128SpanLength(offset, length);
385 if (lengthToExamine > 0)
387 Vector128<ushort> values = Vector128.Create((ushort)value);
390 Debug.Assert(lengthToExamine >= Vector128<ushort>.Count);
392 Vector128<ushort> search = LoadVector128(ref searchSpace, offset);
394 // Same method as above
395 int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte());
396 if (matches == 0)
398 // Zero flags set so no matches
399 offset += Vector128<ushort>.Count;
400 lengthToExamine -= Vector128<ushort>.Count;
401 continue;
404 // Find bitflag offset of first match and add to current offset,
405 // flags are in bytes so divide for chars
406 return (int)(offset + (BitOperations.TrailingZeroCount(matches) / sizeof(char)));
407 } while (lengthToExamine > 0);
410 if (offset < length)
412 lengthToExamine = length - offset;
413 goto SequentialScan;
417 else if (Vector.IsHardwareAccelerated)
419 if (offset < length)
421 Debug.Assert(length - offset >= Vector<ushort>.Count);
423 lengthToExamine = GetCharVectorSpanLength(offset, length);
425 if (lengthToExamine > 0)
427 Vector<ushort> values = new Vector<ushort>((ushort)value);
430 Debug.Assert(lengthToExamine >= Vector<ushort>.Count);
432 var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset));
433 if (Vector<ushort>.Zero.Equals(matches))
435 offset += Vector<ushort>.Count;
436 lengthToExamine -= Vector<ushort>.Count;
437 continue;
440 // Find offset of first match
441 return (int)(offset + LocateFirstFoundChar(matches));
442 } while (lengthToExamine > 0);
445 if (offset < length)
447 lengthToExamine = length - offset;
448 goto SequentialScan;
452 return -1;
453 Found3:
454 return (int)(offset + 3);
455 Found2:
456 return (int)(offset + 2);
457 Found1:
458 return (int)(offset + 1);
459 Found:
460 return (int)(offset);
463 [MethodImpl(MethodImplOptions.AggressiveOptimization)]
464 public static unsafe int IndexOfAny(ref char searchSpace, char value0, char value1, int length)
466 Debug.Assert(length >= 0);
468 fixed (char* pChars = &searchSpace)
470 char* pCh = pChars;
471 char* pEndCh = pCh + length;
473 if (Vector.IsHardwareAccelerated && length >= Vector<ushort>.Count * 2)
475 // Figure out how many characters to read sequentially until we are vector aligned
476 // This is equivalent to:
477 // unaligned = ((int)pCh % Unsafe.SizeOf<Vector<ushort>>()) / elementsPerByte
478 // length = (Vector<ushort>.Count - unaligned) % Vector<ushort>.Count
479 const int elementsPerByte = sizeof(ushort) / sizeof(byte);
480 int unaligned = ((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) / elementsPerByte;
481 length = (Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1);
484 SequentialScan:
485 while (length >= 4)
487 length -= 4;
489 if (pCh[0] == value0 || pCh[0] == value1)
490 goto Found;
491 if (pCh[1] == value0 || pCh[1] == value1)
492 goto Found1;
493 if (pCh[2] == value0 || pCh[2] == value1)
494 goto Found2;
495 if (pCh[3] == value0 || pCh[3] == value1)
496 goto Found3;
498 pCh += 4;
501 while (length > 0)
503 length--;
505 if (pCh[0] == value0 || pCh[0] == value1)
506 goto Found;
508 pCh++;
511 // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
512 // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
513 if (Vector.IsHardwareAccelerated && pCh < pEndCh)
515 // Get the highest multiple of Vector<ushort>.Count that is within the search space.
516 // That will be how many times we iterate in the loop below.
517 // This is equivalent to: length = Vector<ushort>.Count * ((int)(pEndCh - pCh) / Vector<ushort>.Count)
518 length = (int)((pEndCh - pCh) & ~(Vector<ushort>.Count - 1));
520 // Get comparison Vector
521 Vector<ushort> values0 = new Vector<ushort>(value0);
522 Vector<ushort> values1 = new Vector<ushort>(value1);
524 while (length > 0)
526 // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned
527 Debug.Assert(((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) == 0);
528 Vector<ushort> vData = Unsafe.Read<Vector<ushort>>(pCh);
529 var vMatches = Vector.BitwiseOr(
530 Vector.Equals(vData, values0),
531 Vector.Equals(vData, values1));
532 if (Vector<ushort>.Zero.Equals(vMatches))
534 pCh += Vector<ushort>.Count;
535 length -= Vector<ushort>.Count;
536 continue;
538 // Find offset of first match
539 return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches);
542 if (pCh < pEndCh)
544 length = (int)(pEndCh - pCh);
545 goto SequentialScan;
549 return -1;
550 Found3:
551 pCh++;
552 Found2:
553 pCh++;
554 Found1:
555 pCh++;
556 Found:
557 return (int)(pCh - pChars);
561 [MethodImpl(MethodImplOptions.AggressiveOptimization)]
562 public static unsafe int IndexOfAny(ref char searchSpace, char value0, char value1, char value2, int length)
564 Debug.Assert(length >= 0);
566 fixed (char* pChars = &searchSpace)
568 char* pCh = pChars;
569 char* pEndCh = pCh + length;
571 if (Vector.IsHardwareAccelerated && length >= Vector<ushort>.Count * 2)
573 // Figure out how many characters to read sequentially until we are vector aligned
574 // This is equivalent to:
575 // unaligned = ((int)pCh % Unsafe.SizeOf<Vector<ushort>>()) / elementsPerByte
576 // length = (Vector<ushort>.Count - unaligned) % Vector<ushort>.Count
577 const int elementsPerByte = sizeof(ushort) / sizeof(byte);
578 int unaligned = ((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) / elementsPerByte;
579 length = (Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1);
582 SequentialScan:
583 while (length >= 4)
585 length -= 4;
587 if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2)
588 goto Found;
589 if (pCh[1] == value0 || pCh[1] == value1 || pCh[1] == value2)
590 goto Found1;
591 if (pCh[2] == value0 || pCh[2] == value1 || pCh[2] == value2)
592 goto Found2;
593 if (pCh[3] == value0 || pCh[3] == value1 || pCh[3] == value2)
594 goto Found3;
596 pCh += 4;
599 while (length > 0)
601 length--;
603 if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2)
604 goto Found;
606 pCh++;
609 // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
610 // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
611 if (Vector.IsHardwareAccelerated && pCh < pEndCh)
613 // Get the highest multiple of Vector<ushort>.Count that is within the search space.
614 // That will be how many times we iterate in the loop below.
615 // This is equivalent to: length = Vector<ushort>.Count * ((int)(pEndCh - pCh) / Vector<ushort>.Count)
616 length = (int)((pEndCh - pCh) & ~(Vector<ushort>.Count - 1));
618 // Get comparison Vector
619 Vector<ushort> values0 = new Vector<ushort>(value0);
620 Vector<ushort> values1 = new Vector<ushort>(value1);
621 Vector<ushort> values2 = new Vector<ushort>(value2);
623 while (length > 0)
625 // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned
626 Debug.Assert(((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) == 0);
627 Vector<ushort> vData = Unsafe.Read<Vector<ushort>>(pCh);
628 var vMatches = Vector.BitwiseOr(
629 Vector.BitwiseOr(
630 Vector.Equals(vData, values0),
631 Vector.Equals(vData, values1)),
632 Vector.Equals(vData, values2));
634 if (Vector<ushort>.Zero.Equals(vMatches))
636 pCh += Vector<ushort>.Count;
637 length -= Vector<ushort>.Count;
638 continue;
640 // Find offset of first match
641 return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches);
644 if (pCh < pEndCh)
646 length = (int)(pEndCh - pCh);
647 goto SequentialScan;
650 return -1;
651 Found3:
652 pCh++;
653 Found2:
654 pCh++;
655 Found1:
656 pCh++;
657 Found:
658 return (int)(pCh - pChars);
662 [MethodImpl(MethodImplOptions.AggressiveOptimization)]
663 public static unsafe int IndexOfAny(ref char searchSpace, char value0, char value1, char value2, char value3, int length)
665 Debug.Assert(length >= 0);
667 fixed (char* pChars = &searchSpace)
669 char* pCh = pChars;
670 char* pEndCh = pCh + length;
672 if (Vector.IsHardwareAccelerated && length >= Vector<ushort>.Count * 2)
674 // Figure out how many characters to read sequentially until we are vector aligned
675 // This is equivalent to:
676 // unaligned = ((int)pCh % Unsafe.SizeOf<Vector<ushort>>()) / elementsPerByte
677 // length = (Vector<ushort>.Count - unaligned) % Vector<ushort>.Count
678 const int elementsPerByte = sizeof(ushort) / sizeof(byte);
679 int unaligned = ((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) / elementsPerByte;
680 length = (Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1);
683 SequentialScan:
684 while (length >= 4)
686 length -= 4;
688 if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2 || pCh[0] == value3)
689 goto Found;
690 if (pCh[1] == value0 || pCh[1] == value1 || pCh[1] == value2 || pCh[1] == value3)
691 goto Found1;
692 if (pCh[2] == value0 || pCh[2] == value1 || pCh[2] == value2 || pCh[2] == value3)
693 goto Found2;
694 if (pCh[3] == value0 || pCh[3] == value1 || pCh[3] == value2 || pCh[3] == value3)
695 goto Found3;
697 pCh += 4;
700 while (length > 0)
702 length--;
704 if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2 || pCh[0] == value3)
705 goto Found;
707 pCh++;
710 // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
711 // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
712 if (Vector.IsHardwareAccelerated && pCh < pEndCh)
714 // Get the highest multiple of Vector<ushort>.Count that is within the search space.
715 // That will be how many times we iterate in the loop below.
716 // This is equivalent to: length = Vector<ushort>.Count * ((int)(pEndCh - pCh) / Vector<ushort>.Count)
717 length = (int)((pEndCh - pCh) & ~(Vector<ushort>.Count - 1));
719 // Get comparison Vector
720 Vector<ushort> values0 = new Vector<ushort>(value0);
721 Vector<ushort> values1 = new Vector<ushort>(value1);
722 Vector<ushort> values2 = new Vector<ushort>(value2);
723 Vector<ushort> values3 = new Vector<ushort>(value3);
725 while (length > 0)
727 // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned
728 Debug.Assert(((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) == 0);
729 Vector<ushort> vData = Unsafe.Read<Vector<ushort>>(pCh);
730 var vMatches = Vector.BitwiseOr(
731 Vector.BitwiseOr(
732 Vector.BitwiseOr(Vector.Equals(vData, values0), Vector.Equals(vData, values1)),
733 Vector.Equals(vData, values2)),
734 Vector.Equals(vData, values3));
736 if (Vector<ushort>.Zero.Equals(vMatches))
738 pCh += Vector<ushort>.Count;
739 length -= Vector<ushort>.Count;
740 continue;
742 // Find offset of first match
743 return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches);
746 if (pCh < pEndCh)
748 length = (int)(pEndCh - pCh);
749 goto SequentialScan;
753 return -1;
754 Found3:
755 pCh++;
756 Found2:
757 pCh++;
758 Found1:
759 pCh++;
760 Found:
761 return (int)(pCh - pChars);
765 [MethodImpl(MethodImplOptions.AggressiveOptimization)]
766 public static unsafe int IndexOfAny(ref char searchSpace, char value0, char value1, char value2, char value3, char value4, int length)
768 Debug.Assert(length >= 0);
770 fixed (char* pChars = &searchSpace)
772 char* pCh = pChars;
773 char* pEndCh = pCh + length;
775 if (Vector.IsHardwareAccelerated && length >= Vector<ushort>.Count * 2)
777 // Figure out how many characters to read sequentially until we are vector aligned
778 // This is equivalent to:
779 // unaligned = ((int)pCh % Unsafe.SizeOf<Vector<ushort>>()) / elementsPerByte
780 // length = (Vector<ushort>.Count - unaligned) % Vector<ushort>.Count
781 const int elementsPerByte = sizeof(ushort) / sizeof(byte);
782 int unaligned = ((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) / elementsPerByte;
783 length = (Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1);
786 SequentialScan:
787 while (length >= 4)
789 length -= 4;
791 if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2 || pCh[0] == value3 || pCh[0] == value4)
792 goto Found;
793 if (pCh[1] == value0 || pCh[1] == value1 || pCh[1] == value2 || pCh[1] == value3 || pCh[1] == value4)
794 goto Found1;
795 if (pCh[2] == value0 || pCh[2] == value1 || pCh[2] == value2 || pCh[2] == value3 || pCh[2] == value4)
796 goto Found2;
797 if (pCh[3] == value0 || pCh[3] == value1 || pCh[3] == value2 || pCh[3] == value3 || pCh[3] == value4)
798 goto Found3;
800 pCh += 4;
803 while (length > 0)
805 length--;
807 if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2 || pCh[0] == value3 || pCh[0] == value4)
808 goto Found;
810 pCh++;
813 // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
814 // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
815 if (Vector.IsHardwareAccelerated && pCh < pEndCh)
817 // Get the highest multiple of Vector<ushort>.Count that is within the search space.
818 // That will be how many times we iterate in the loop below.
819 // This is equivalent to: length = Vector<ushort>.Count * ((int)(pEndCh - pCh) / Vector<ushort>.Count)
820 length = (int)((pEndCh - pCh) & ~(Vector<ushort>.Count - 1));
822 // Get comparison Vector
823 Vector<ushort> values0 = new Vector<ushort>(value0);
824 Vector<ushort> values1 = new Vector<ushort>(value1);
825 Vector<ushort> values2 = new Vector<ushort>(value2);
826 Vector<ushort> values3 = new Vector<ushort>(value3);
827 Vector<ushort> values4 = new Vector<ushort>(value4);
829 while (length > 0)
831 // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned
832 Debug.Assert(((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) == 0);
833 Vector<ushort> vData = Unsafe.Read<Vector<ushort>>(pCh);
834 var vMatches = Vector.BitwiseOr(
835 Vector.BitwiseOr(
836 Vector.BitwiseOr(
837 Vector.BitwiseOr(Vector.Equals(vData, values0), Vector.Equals(vData, values1)),
838 Vector.Equals(vData, values2)),
839 Vector.Equals(vData, values3)),
840 Vector.Equals(vData, values4));
842 if (Vector<ushort>.Zero.Equals(vMatches))
844 pCh += Vector<ushort>.Count;
845 length -= Vector<ushort>.Count;
846 continue;
848 // Find offset of first match
849 return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches);
852 if (pCh < pEndCh)
854 length = (int)(pEndCh - pCh);
855 goto SequentialScan;
859 return -1;
860 Found3:
861 pCh++;
862 Found2:
863 pCh++;
864 Found1:
865 pCh++;
866 Found:
867 return (int)(pCh - pChars);
871 [MethodImpl(MethodImplOptions.AggressiveOptimization)]
872 public static unsafe int LastIndexOf(ref char searchSpace, char value, int length)
874 Debug.Assert(length >= 0);
876 fixed (char* pChars = &searchSpace)
878 char* pCh = pChars + length;
879 char* pEndCh = pChars;
881 if (Vector.IsHardwareAccelerated && length >= Vector<ushort>.Count * 2)
883 // Figure out how many characters to read sequentially from the end until we are vector aligned
884 // This is equivalent to: length = ((int)pCh % Unsafe.SizeOf<Vector<ushort>>()) / elementsPerByte
885 const int elementsPerByte = sizeof(ushort) / sizeof(byte);
886 length = ((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) / elementsPerByte;
889 SequentialScan:
890 while (length >= 4)
892 length -= 4;
893 pCh -= 4;
895 if (*(pCh + 3) == value)
896 goto Found3;
897 if (*(pCh + 2) == value)
898 goto Found2;
899 if (*(pCh + 1) == value)
900 goto Found1;
901 if (*pCh == value)
902 goto Found;
905 while (length > 0)
907 length -= 1;
908 pCh -= 1;
910 if (*pCh == value)
911 goto Found;
914 // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
915 // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
916 if (Vector.IsHardwareAccelerated && pCh > pEndCh)
918 // Get the highest multiple of Vector<ushort>.Count that is within the search space.
919 // That will be how many times we iterate in the loop below.
920 // This is equivalent to: length = Vector<ushort>.Count * ((int)(pCh - pEndCh) / Vector<ushort>.Count)
921 length = (int)((pCh - pEndCh) & ~(Vector<ushort>.Count - 1));
923 // Get comparison Vector
924 Vector<ushort> vComparison = new Vector<ushort>(value);
926 while (length > 0)
928 char* pStart = pCh - Vector<ushort>.Count;
929 // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh (and hence pSart) is always vector aligned
930 Debug.Assert(((int)pStart & (Unsafe.SizeOf<Vector<ushort>>() - 1)) == 0);
931 Vector<ushort> vMatches = Vector.Equals(vComparison, Unsafe.Read<Vector<ushort>>(pStart));
932 if (Vector<ushort>.Zero.Equals(vMatches))
934 pCh -= Vector<ushort>.Count;
935 length -= Vector<ushort>.Count;
936 continue;
938 // Find offset of last match
939 return (int)(pStart - pEndCh) + LocateLastFoundChar(vMatches);
942 if (pCh > pEndCh)
944 length = (int)(pCh - pEndCh);
945 goto SequentialScan;
949 return -1;
950 Found:
951 return (int)(pCh - pEndCh);
952 Found1:
953 return (int)(pCh - pEndCh) + 1;
954 Found2:
955 return (int)(pCh - pEndCh) + 2;
956 Found3:
957 return (int)(pCh - pEndCh) + 3;
961 // Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138
962 [MethodImpl(MethodImplOptions.AggressiveInlining)]
963 private static int LocateFirstFoundChar(Vector<ushort> match)
965 var vector64 = Vector.AsVectorUInt64(match);
966 ulong candidate = 0;
967 int i = 0;
968 // Pattern unrolled by jit https://github.com/dotnet/coreclr/pull/8001
969 for (; i < Vector<ulong>.Count; i++)
971 candidate = vector64[i];
972 if (candidate != 0)
974 break;
978 // Single LEA instruction with jitted const (using function result)
979 return i * 4 + LocateFirstFoundChar(candidate);
982 [MethodImpl(MethodImplOptions.AggressiveInlining)]
983 private static int LocateFirstFoundChar(ulong match)
985 // TODO: Arm variants
986 if (Bmi1.X64.IsSupported)
988 return (int)(Bmi1.X64.TrailingZeroCount(match) >> 4);
990 else
992 unchecked
994 // Flag least significant power of two bit
995 var powerOfTwoFlag = match ^ (match - 1);
996 // Shift all powers of two into the high byte and extract
997 return (int)((powerOfTwoFlag * XorPowerOfTwoToHighChar) >> 49);
1002 private const ulong XorPowerOfTwoToHighChar = (0x03ul |
1003 0x02ul << 16 |
1004 0x01ul << 32) + 1;
1006 // Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138
1007 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1008 private static int LocateLastFoundChar(Vector<ushort> match)
1010 var vector64 = Vector.AsVectorUInt64(match);
1011 ulong candidate = 0;
1012 int i = Vector<ulong>.Count - 1;
1013 // Pattern unrolled by jit https://github.com/dotnet/coreclr/pull/8001
1014 for (; i >= 0; i--)
1016 candidate = vector64[i];
1017 if (candidate != 0)
1019 break;
1023 // Single LEA instruction with jitted const (using function result)
1024 return i * 4 + LocateLastFoundChar(candidate);
1027 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1028 private static int LocateLastFoundChar(ulong match)
1030 return 3 - (BitOperations.LeadingZeroCount(match) >> 4);
1033 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1034 public static ref char Add(ref char source, nint elementOffset)
1035 => ref Unsafe.Add(ref source, (IntPtr)elementOffset);
1037 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1038 private static unsafe Vector<ushort> LoadVector(ref char start, nint offset)
1039 => Unsafe.ReadUnaligned<Vector<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref start, (IntPtr)offset)));
1041 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1042 private static unsafe Vector128<ushort> LoadVector128(ref char start, nint offset)
1043 => Unsafe.ReadUnaligned<Vector128<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref start, (IntPtr)offset)));
1045 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1046 private static unsafe Vector256<ushort> LoadVector256(ref char start, nint offset)
1047 => Unsafe.ReadUnaligned<Vector256<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref start, (IntPtr)offset)));
1049 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1050 private static unsafe UIntPtr LoadUIntPtr(ref char start, nint offset)
1051 => Unsafe.ReadUnaligned<UIntPtr>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref start, (IntPtr)offset)));
1053 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1054 private static unsafe nint GetCharVectorSpanLength(nint offset, nint length)
1055 => ((length - offset) & ~(Vector<ushort>.Count - 1));
1057 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1058 private static unsafe nint GetCharVector128SpanLength(nint offset, nint length)
1059 => ((length - offset) & ~(Vector128<ushort>.Count - 1));
1061 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1062 private static nint GetCharVector256SpanLength(nint offset, nint length)
1063 => ((length - offset) & ~(Vector256<ushort>.Count - 1));
1065 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1066 private static unsafe nint UnalignedCountVector(ref char searchSpace)
1068 const int ElementsPerByte = sizeof(ushort) / sizeof(byte);
1069 // Figure out how many characters to read sequentially until we are vector aligned
1070 // This is equivalent to:
1071 // unaligned = ((int)pCh % Unsafe.SizeOf<Vector<ushort>>()) / ElementsPerByte
1072 // length = (Vector<ushort>.Count - unaligned) % Vector<ushort>.Count
1074 // This alignment is only valid if the GC does not relocate; so we use ReadUnaligned to get the data.
1075 // If a GC does occur and alignment is lost, the GC cost will outweigh any gains from alignment so it
1076 // isn't too important to pin to maintain the alignment.
1077 return (nint)(uint)(-(int)Unsafe.AsPointer(ref searchSpace) / ElementsPerByte ) & (Vector<ushort>.Count - 1);
1080 [MethodImpl(MethodImplOptions.AggressiveInlining)]
1081 private static unsafe nint UnalignedCountVector128(ref char searchSpace)
1083 const int ElementsPerByte = sizeof(ushort) / sizeof(byte);
1084 // This alignment is only valid if the GC does not relocate; so we use ReadUnaligned to get the data.
1085 // If a GC does occur and alignment is lost, the GC cost will outweigh any gains from alignment so it
1086 // isn't too important to pin to maintain the alignment.
1087 return (nint)(uint)(-(int)Unsafe.AsPointer(ref searchSpace) / ElementsPerByte ) & (Vector128<ushort>.Count - 1);