1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // This file should only be compiled if you're on x86 or x86_64. Additionally,
8 // you'll need to compile this file with -msse2 if you're using gcc.
10 #include <emmintrin.h>
12 #include "nsAlgorithm.h"
13 #include "nsTextFragmentImpl.h"
16 namespace mozilla::SSE2
{
18 static inline bool is_zero(__m128i x
) {
19 return _mm_movemask_epi8(_mm_cmpeq_epi8(x
, _mm_setzero_si128())) == 0xffff;
22 int32_t FirstNon8Bit(const char16_t
* str
, const char16_t
* end
) {
23 const uint32_t numUnicharsPerVector
= 8;
24 using p
= Non8BitParameters
<sizeof(size_t)>;
25 const size_t mask
= p::mask();
26 const uint32_t numUnicharsPerWord
= p::numUnicharsPerWord();
27 const int32_t len
= end
- str
;
30 // Align ourselves to a 16-byte boundary, as required by _mm_load_si128
32 int32_t alignLen
= std::min(
33 len
, int32_t(((-NS_PTR_TO_INT32(str
)) & 0xf) / sizeof(char16_t
)));
34 for (; i
< alignLen
; i
++) {
35 if (str
[i
] > 255) return i
;
38 // Check one XMM register (16 bytes) at a time.
39 const int32_t vectWalkEnd
=
40 ((len
- i
) / numUnicharsPerVector
) * numUnicharsPerVector
;
41 const uint16_t shortMask
= 0xff00;
42 __m128i vectmask
= _mm_set1_epi16(static_cast<int16_t>(shortMask
));
43 for (; i
< vectWalkEnd
; i
+= numUnicharsPerVector
) {
44 const __m128i vect
= *reinterpret_cast<const __m128i
*>(str
+ i
);
45 if (!is_zero(_mm_and_si128(vect
, vectmask
))) return i
;
48 // Check one word at a time.
49 const int32_t wordWalkEnd
=
50 ((len
- i
) / numUnicharsPerWord
) * numUnicharsPerWord
;
51 for (; i
< wordWalkEnd
; i
+= numUnicharsPerWord
) {
52 const size_t word
= *reinterpret_cast<const size_t*>(str
+ i
);
53 if (word
& mask
) return i
;
56 // Take care of the remainder one character at a time.
57 for (; i
< len
; i
++) {
66 } // namespace mozilla::SSE2