1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/utf_offset_string_conversions.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "base/string_piece.h"
11 #include "base/utf_string_conversion_utils.h"
13 using base::PrepareForUTF16Or32Output
;
14 using base::ReadUnicodeCharacter
;
15 using base::WriteUnicodeCharacter
;
17 // Converts the given source Unicode character type to the given destination
18 // Unicode character type as a STL string. The given input buffer and size
19 // determine the source, and the given output STL string will be replaced by
21 bool ConvertUnicode(const char* src
,
24 std::vector
<size_t>* offsets_for_adjustment
) {
25 if (offsets_for_adjustment
) {
26 std::for_each(offsets_for_adjustment
->begin(),
27 offsets_for_adjustment
->end(),
28 LimitOffset
<string16
>(src_len
));
31 // ICU requires 32-bit numbers.
33 OffsetAdjuster
offset_adjuster(offsets_for_adjustment
);
34 int32 src_len32
= static_cast<int32
>(src_len
);
35 for (int32 i
= 0; i
< src_len32
; i
++) {
37 size_t original_i
= i
;
38 size_t chars_written
= 0;
39 if (ReadUnicodeCharacter(src
, src_len32
, &i
, &code_point
)) {
40 chars_written
= WriteUnicodeCharacter(code_point
, output
);
42 chars_written
= WriteUnicodeCharacter(0xFFFD, output
);
45 if (offsets_for_adjustment
) {
46 // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
47 // character read, not after it (so that incrementing it in the loop
48 // increment will place it at the right location), so we need to account
49 // for that in determining the amount that was read.
50 offset_adjuster
.Add(OffsetAdjuster::Adjustment(original_i
,
51 i
- original_i
+ 1, chars_written
));
57 bool UTF8ToUTF16AndAdjustOffset(const char* src
,
60 size_t* offset_for_adjustment
) {
61 std::vector
<size_t> offsets
;
62 if (offset_for_adjustment
)
63 offsets
.push_back(*offset_for_adjustment
);
64 PrepareForUTF16Or32Output(src
, src_len
, output
);
65 bool ret
= ConvertUnicode(src
, src_len
, output
, &offsets
);
66 if (offset_for_adjustment
)
67 *offset_for_adjustment
= offsets
[0];
71 bool UTF8ToUTF16AndAdjustOffsets(const char* src
,
74 std::vector
<size_t>* offsets_for_adjustment
) {
75 PrepareForUTF16Or32Output(src
, src_len
, output
);
76 return ConvertUnicode(src
, src_len
, output
, offsets_for_adjustment
);
79 string16
UTF8ToUTF16AndAdjustOffset(const base::StringPiece
& utf8
,
80 size_t* offset_for_adjustment
) {
81 std::vector
<size_t> offsets
;
82 if (offset_for_adjustment
)
83 offsets
.push_back(*offset_for_adjustment
);
85 UTF8ToUTF16AndAdjustOffsets(utf8
.data(), utf8
.length(), &result
,
87 if (offset_for_adjustment
)
88 *offset_for_adjustment
= offsets
[0];
92 string16
UTF8ToUTF16AndAdjustOffsets(
93 const base::StringPiece
& utf8
,
94 std::vector
<size_t>* offsets_for_adjustment
) {
96 UTF8ToUTF16AndAdjustOffsets(utf8
.data(), utf8
.length(), &result
,
97 offsets_for_adjustment
);
101 OffsetAdjuster::Adjustment::Adjustment(size_t original_offset
,
102 size_t original_length
,
103 size_t output_length
)
104 : original_offset(original_offset
),
105 original_length(original_length
),
106 output_length(output_length
) {
109 OffsetAdjuster::OffsetAdjuster(std::vector
<size_t>* offsets_for_adjustment
)
110 : offsets_for_adjustment_(offsets_for_adjustment
) {
113 OffsetAdjuster::~OffsetAdjuster() {
114 if (!offsets_for_adjustment_
|| adjustments_
.empty())
116 for (std::vector
<size_t>::iterator
i(offsets_for_adjustment_
->begin());
117 i
!= offsets_for_adjustment_
->end(); ++i
)
121 void OffsetAdjuster::Add(const Adjustment
& adjustment
) {
122 adjustments_
.push_back(adjustment
);
125 void OffsetAdjuster::AdjustOffset(std::vector
<size_t>::iterator offset
) {
126 if (*offset
== string16::npos
)
128 size_t adjustment
= 0;
129 for (std::vector
<Adjustment
>::const_iterator i
= adjustments_
.begin();
130 i
!= adjustments_
.end(); ++i
) {
131 if (*offset
== i
->original_offset
&& i
->output_length
== 0) {
132 *offset
= string16::npos
;
135 if (*offset
<= i
->original_offset
)
137 if (*offset
< (i
->original_offset
+ i
->original_length
)) {
138 *offset
= string16::npos
;
141 adjustment
+= (i
->original_length
- i
->output_length
);
143 *offset
-= adjustment
;