1 // This file is part of the ustl library, an STL implementation.
3 // Copyright (C) 2005 by Mike Sharov <msharov@users.sourceforge.net>
4 // This file is free software, distributed under the MIT License.
8 // STL basic_string equivalent functionality.
15 #include <stdio.h> // for vsnprintf (in string::format)
19 //----------------------------------------------------------------------
21 const uoff_t
string::npos
;
22 const string::size_type
string::size_Terminator
;
23 const string::value_type
string::c_Terminator
;
24 const char string::empty_string
[string::size_Terminator
] = "";
26 //----------------------------------------------------------------------
28 /// Creates an empty string.
32 relink (VectorBlock(empty_string
)-1);
35 /// Assigns itself the value of string \p s
36 string::string (const string
& s
)
37 : memblock ((s
.size() + size_Terminator
) & (s
.is_linked()-1)) // Allocate with terminator if not linked (can't call virtuals from base ctor)
40 relink (s
.c_str(), s
.size());
42 copy_n (s
.begin(), size(), begin());
43 relink (begin(), size() - size_Terminator
); // --m_Size
48 string::string (const_pointer s
)
53 relink (s
, strlen(s
));
56 /// Creates a string of length \p n filled with character \p c.
57 string::string (size_type n
, value_type c
)
58 : memblock (n
+ size_Terminator
) // because base ctor can't call virtuals of this class
60 relink (begin(), size() - size_Terminator
); // --m_Size
61 fill_n (begin(), n
, c
);
65 /// Resize the string to \p n characters. New space contents is undefined.
66 void string::resize (size_type n
)
68 if (!(n
| memblock::capacity()))
69 return (relink (VectorBlock(empty_string
)-1));
74 /// Assigns itself the value of string \p s
75 void string::assign (const_pointer s
)
79 assign (s
, strlen (s
));
82 /// Assigns itself the value of string \p s of length \p len.
83 void string::assign (const_pointer s
, size_type len
)
85 while (len
&& s
[len
- 1] == c_Terminator
)
91 /// Appends to itself the value of string \p s of length \p len.
92 void string::append (const_pointer s
)
96 append (s
, strlen (s
));
99 /// Appends to itself the value of string \p s of length \p len.
100 void string::append (const_pointer s
, size_type len
)
102 while (len
&& s
[len
- 1] == c_Terminator
)
104 resize (size() + len
);
105 copy_n (s
, len
, end() - len
);
108 /// Appends to itself \p n characters of value \p c.
109 void string::append (size_type n
, value_type c
)
112 fill_n (end() - n
, n
, c
);
115 /// Copies into itself at offset \p start, the value of string \p p of length \p n.
116 string::size_type
string::copyto (pointer p
, size_type n
, const_iterator start
) const
121 const size_type btc
= min(n
- size_Terminator
, size());
122 copy_n (start
, btc
, p
);
123 p
[btc
] = c_Terminator
;
124 return (btc
+ size_Terminator
);
127 /// Returns comparison value regarding string \p s.
128 /// The return value is:
129 /// \li 1 if this string is greater (by value, not length) than string \p s
130 /// \li 0 if this string is equal to string \p s
131 /// \li -1 if this string is less than string \p s
133 /*static*/ int string::compare (const_iterator first1
, const_iterator last1
, const_iterator first2
, const_iterator last2
)
135 assert (first1
<= last1
&& (first2
<= last2
|| !last2
) && "Negative ranges result in memory allocation errors.");
136 const size_type len1
= distance (first1
, last1
), len2
= distance (first2
, last2
);
137 const int rvbylen
= sign (int(len1
- len2
));
138 int rv
= memcmp (first1
, first2
, min (len1
, len2
));
139 return (rv
? rv
: rvbylen
);
142 /// Returns true if this string is equal to string \p s.
143 bool string::operator== (const_pointer s
) const
147 return (size() == strlen(s
) && 0 == memcmp (c_str(), s
, size()));
150 /// Returns the beginning of character \p i.
151 string::const_iterator
string::wiat (uoff_t i
) const
153 utf8in_iterator
<string::const_iterator
> cfinder (begin());
155 return (cfinder
.base());
158 /// Inserts wide character \p c at \p ipo \p n times as a UTF-8 string.
160 /// \p ipo is a byte position, not a character position, and is intended
161 /// to be obtained from one of the find functions. Generally you are not
162 /// able to know the character position in a localized string; different
163 /// languages will have different character counts, so use find instead.
165 void string::insert (const uoff_t ipo
, wchar_t c
, size_type n
)
167 iterator
ip (iat(ipo
));
168 ip
= iterator (memblock::insert (memblock::iterator(ip
), n
* Utf8Bytes(c
)));
169 fill_n (utf8out (ip
), n
, c
);
170 *end() = c_Terminator
;
173 /// Inserts sequence of wide characters at \p ipo (byte position from a find call)
174 void string::insert (const uoff_t ipo
, const wchar_t* first
, const wchar_t* last
, const size_type n
)
176 iterator
ip (iat(ipo
));
177 size_type nti
= distance (first
, last
), bti
= 0;
178 for (uoff_t i
= 0; i
< nti
; ++ i
)
179 bti
+= Utf8Bytes(first
[i
]);
180 ip
= iterator (memblock::insert (memblock::iterator(ip
), n
* bti
));
181 utf8out_iterator
<string::iterator
> uout (utf8out (ip
));
182 for (uoff_t j
= 0; j
< n
; ++ j
)
183 for (uoff_t k
= 0; k
< nti
; ++ k
, ++ uout
)
185 *end() = c_Terminator
;
188 /// Inserts character \p c into this string at \p start.
189 string::iterator
string::insert (iterator start
, const_reference c
, size_type n
)
191 start
= iterator (memblock::insert (memblock::iterator(start
), n
));
192 fill_n (start
, n
, c
);
193 *end() = c_Terminator
;
197 /// Inserts \p count instances of string \p s at offset \p start.
198 string::iterator
string::insert (iterator start
, const_pointer s
, size_type n
)
202 return (insert (start
, s
, s
+ strlen(s
), n
));
205 /// Inserts [first,last] \p n times.
206 string::iterator
string::insert (iterator start
, const_pointer first
, const_pointer last
, size_type n
)
208 assert (first
<= last
);
209 assert (begin() <= start
&& end() >= start
);
210 assert ((first
< begin() || first
>= end() || size() + abs_distance(first
,last
) < capacity()) && "Insertion of self with autoresize is not supported");
211 start
= iterator (memblock::insert (memblock::iterator(start
), distance(first
, last
) * n
));
212 fill (memblock::iterator(start
), first
, distance(first
, last
), n
);
213 *end() = c_Terminator
;
217 /// Erases \p size bytes at \p ep.
218 string::iterator
string::erase (iterator ep
, size_type n
)
220 string::iterator rv
= memblock::erase (memblock::iterator(ep
), n
);
221 *end() = c_Terminator
;
225 /// Erases \p n bytes at byte offset \p epo.
226 void string::erase (uoff_t epo
, size_type n
)
231 /// Replaces range [\p start, \p start + \p len] with string \p s.
232 void string::replace (iterator first
, iterator last
, const_pointer s
)
236 replace (first
, last
, s
, s
+ strlen(s
));
239 /// Replaces range [\p start, \p start + \p len] with \p count instances of string \p s.
240 void string::replace (iterator first
, iterator last
, const_pointer i1
, const_pointer i2
, size_type n
)
242 assert (first
<= last
);
243 assert (n
|| distance(first
, last
));
244 assert (first
>= begin() && first
<= end() && last
>= first
&& last
<= end());
245 assert ((i1
< begin() || i1
>= end() || abs_distance(i1
,i2
) * n
+ size() < capacity()) && "Replacement by self can not autoresize");
246 const size_type bte
= distance(first
, last
), bti
= distance(i1
, i2
) * n
;
248 first
= iterator (memblock::erase (memblock::iterator(first
), bte
- bti
));
250 first
= iterator (memblock::insert (memblock::iterator(first
), bti
- bte
));
251 fill (memblock::iterator(first
), i1
, distance(i1
, i2
), n
);
252 *end() = c_Terminator
;
255 /// Returns the offset of the first occurence of \p c after \p pos.
256 uoff_t
string::find (const_reference c
, uoff_t pos
) const
258 const_iterator found
= ::ustl::find (iat(pos
), end(), c
);
259 return (found
< end() ? distance(begin(),found
) : npos
);
262 /// Returns the offset of the first occurence of substring \p s of length \p n after \p pos.
263 uoff_t
string::find (const string
& s
, uoff_t pos
) const
265 if (s
.empty() || s
.size() > size() - pos
)
267 const uoff_t endi
= s
.size() - 1;
268 const_reference endchar
= s
[endi
];
269 uoff_t lastPos
= endi
;
270 while (lastPos
-- && s
[lastPos
] != endchar
) ;
271 const size_type skip
= endi
- lastPos
;
272 const_iterator i
= iat(pos
) + endi
;
273 for (; i
< end() && (i
= ::ustl::find (i
, end(), endchar
)) < end(); i
+= skip
)
274 if (memcmp (i
- endi
, s
.c_str(), s
.size()) == 0)
275 return (distance (begin(), i
) - endi
);
279 /// Returns the offset of the last occurence of character \p c before \p pos.
280 uoff_t
string::rfind (const_reference c
, uoff_t pos
) const
282 for (int i
= min(pos
,size()-1); i
>= 0; --i
)
288 /// Returns the offset of the last occurence of substring \p s of size \p n before \p pos.
289 uoff_t
string::rfind (const string
& s
, uoff_t pos
) const
291 const_iterator d
= iat(pos
) - 1;
292 const_iterator sp
= begin() + s
.size() - 1;
293 const_iterator m
= s
.end() - 1;
294 for (long int i
= 0; d
> sp
&& size_type(i
) < s
.size(); -- d
)
295 for (i
= 0; size_type(i
) < s
.size(); ++ i
)
298 return (d
> sp
? distance (begin(), d
+ 2 - s
.size()) : npos
);
301 /// Returns the offset of the first occurence of one of characters in \p s of size \p n after \p pos.
302 uoff_t
string::find_first_of (const string
& s
, uoff_t pos
) const
304 for (uoff_t i
= min(pos
,size()); i
< size(); ++ i
)
305 if (s
.find (at(i
)) != npos
)
310 /// Returns the offset of the first occurence of one of characters not in \p s of size \p n after \p pos.
311 uoff_t
string::find_first_not_of (const string
& s
, uoff_t pos
) const
313 for (uoff_t i
= min(pos
,size()); i
< size(); ++ i
)
314 if (s
.find (at(i
)) == npos
)
319 /// Returns the offset of the last occurence of one of characters in \p s of size \p n before \p pos.
320 uoff_t
string::find_last_of (const string
& s
, uoff_t pos
) const
322 for (int i
= min(pos
,size()-1); i
>= 0; -- i
)
323 if (s
.find (at(i
)) != npos
)
328 /// Returns the offset of the last occurence of one of characters not in \p s of size \p n before \p pos.
329 uoff_t
string::find_last_not_of (const string
& s
, uoff_t pos
) const
331 for (int i
= min(pos
,size()-1); i
>= 0; -- i
)
332 if (s
.find (at(i
)) == npos
)
337 /// Equivalent to a vsprintf on the string.
338 int string::vformat (const char* fmt
, va_list args
)
345 #define __va_copy(x,y)
350 __va_copy (args2
, args
);
351 rv
= vsnprintf (data(), memblock::capacity(), fmt
, args2
);
352 rv
= min (rv
, memblock::capacity());
353 } while (rv
> capacity());
354 resize (min (rv
, capacity()));
358 /// Equivalent to a sprintf on the string.
359 int string::format (const char* fmt
, ...)
362 va_start (args
, fmt
);
363 const int rv
= vformat (fmt
, args
);
368 /// Returns the number of bytes required to write this object to a stream.
369 size_t string::stream_size (void) const
371 return (Utf8Bytes(size()) + size());
374 /// Reads the object from stream \p os
375 void string::read (istream
& is
)
379 size_t szsz (Utf8SequenceBytes (szbuf
[0]) - 1), n
= 0;
380 is
.verify_remaining ("read", "ustl::string", szsz
);
381 is
.read (szbuf
+ 1, szsz
);
383 is
.verify_remaining ("read", "ustl::string", n
);
385 is
.read (data(), size());
388 /// Writes the object to stream \p os
389 void string::write (ostream
& os
) const
391 const written_size_type
sz (size());
392 assert (sz
== size() && "No support for writing strings larger than 4G");
395 utf8out_iterator
<char*> szout (szbuf
);
397 size_t szsz
= distance (szbuf
, szout
.base());
399 os
.verify_remaining ("write", "ustl::string", szsz
+ sz
);
400 os
.write (szbuf
, szsz
);
401 os
.write (cdata(), sz
);
404 /// Returns a hash value for [first, last)
405 /*static*/ hashvalue_t
string::hash (const char* first
, const char* last
)
408 // This has the bits flowing into each other from both sides of the number
409 for (; first
< last
; ++ first
)
410 h
= *first
+ ((h
<< 7) | (h
>> (BitsInType(hashvalue_t
) - 7)));