1 // This file is part of the ustl library, an STL implementation.
3 // Copyright (C) 2005 by Mike Sharov <msharov@users.sourceforge.net>
4 // This file is free software, distributed under the MIT License.
9 #ifndef USTRING_H_1249CB7A098A9010763AAC6D37B133CF
10 #define USTRING_H_1249CB7A098A9010763AAC6D37B133CF
14 #include <stdarg.h> // for va_list, va_start, and va_end (in string::format)
18 /// \class string ustring.h ustl.h
19 /// \ingroup Sequences
21 /// \brief STL basic_string<char> equivalent.
23 /// An STL container for text string manipulation.
24 /// Differences from C++ standard:
25 /// - string is a class, not a template. Wide characters are assumed to be
26 /// encoded with utf8 at all times except when rendering or editing,
27 /// where you would use a utf8 iterator.
28 /// - format member function - you can, of course use an \ref ostringstream,
29 /// which also have format functions, but most of the time this way
30 /// is more convenient. Because uSTL does not implement locales,
31 /// format is the only way to create localized strings.
32 /// - const char* cast operator. It is much clearer to use this than having
33 /// to type .c_str() every time.
34 /// - length returns the number of _characters_, not bytes.
35 /// This function is O(N), so use wisely.
37 /// An additional note is in order regarding the use of indexes. All indexes
38 /// passed in as arguments or returned by find are byte offsets, not character
39 /// offsets. Likewise, sizes are specified in bytes, not characters. The
40 /// rationale is that there is no way for you to know what is in the string.
41 /// There is no way for you to know how many characters are needed to express
42 /// one thing or another. The only thing you can do to a localized string is
43 /// search for delimiters and modify text between them as opaque blocks. If you
44 /// do anything else, you are hardcoding yourself into a locale! So stop it!
46 class string
: public memblock
{
48 typedef char value_type
;
49 typedef value_type
* pointer
;
50 typedef const value_type
* const_pointer
;
51 typedef wchar_t wvalue_type
;
52 typedef wvalue_type
* wpointer
;
53 typedef const wvalue_type
* const_wpointer
;
54 typedef pointer iterator
;
55 typedef const_pointer const_iterator
;
56 typedef value_type
& reference
;
57 typedef value_type const_reference
;
58 typedef ::ustl::reverse_iterator
<iterator
> reverse_iterator
;
59 typedef ::ustl::reverse_iterator
<const_iterator
> const_reverse_iterator
;
60 typedef utf8in_iterator
<const_iterator
> utf8_iterator
;
62 static const uoff_t npos
= static_cast<uoff_t
>(-1); ///< Value that means the end of string.
63 static const value_type c_Terminator
= 0; ///< String terminator
64 static const size_type size_Terminator
= sizeof(c_Terminator
); ///< Most systems terminate strings with '\\0'
65 static const char empty_string
[size_Terminator
]; ///< An empty string.
68 string (const string
& s
);
69 inline string (const string
& s
, uoff_t o
, size_type n
);
70 inline explicit string (const cmemlink
& l
);
71 string (const_pointer s
);
72 inline string (const_pointer s
, size_type len
);
73 inline string (const_pointer s1
, const_pointer s2
);
74 explicit string (size_type n
, value_type c
= c_Terminator
);
75 inline pointer
data (void) { return (string::pointer (memblock::data())); }
76 inline const_pointer
c_str (void) const { return (string::const_pointer (memblock::cdata())); }
77 inline size_type
max_size (void) const { size_type
s (memblock::max_size()); return (s
- !!s
); }
78 inline size_type
capacity (void) const { size_type
c (memblock::capacity()); return (c
- !!c
); }
79 void resize (size_type n
);
80 inline void clear (void) { resize (0); }
81 inline const_iterator
begin (void) const { return (const_iterator (memblock::begin())); }
82 inline iterator
begin (void) { return (iterator (memblock::begin())); }
83 inline const_iterator
end (void) const { return (const_iterator (memblock::end())); }
84 inline iterator
end (void) { return (iterator (memblock::end())); }
85 inline const_reverse_iterator
rbegin (void) const { return (const_reverse_iterator (end())); }
86 inline reverse_iterator
rbegin (void) { return (reverse_iterator (end())); }
87 inline const_reverse_iterator
rend (void) const { return (const_reverse_iterator (begin())); }
88 inline reverse_iterator
rend (void) { return (reverse_iterator (begin())); }
89 inline utf8_iterator
utf8_begin (void) const { return (utf8_iterator (begin())); }
90 inline utf8_iterator
utf8_end (void) const { return (utf8_iterator (end())); }
91 inline const_reference
at (uoff_t pos
) const { assert (pos
<= size() && begin()); return (begin()[pos
]); }
92 inline reference
at (uoff_t pos
) { assert (pos
<= size() && begin()); return (begin()[pos
]); }
93 inline const_iterator
iat (uoff_t pos
) const { return (begin() + min (pos
, size())); }
94 inline iterator
iat (uoff_t pos
) { return (begin() + min (pos
, size())); }
95 const_iterator
wiat (uoff_t i
) const;
96 inline iterator
wiat (uoff_t i
) { return (const_cast<iterator
>(const_cast<const string
*>(this)->wiat(i
))); }
97 inline const_reference
back (void) const { return (at(size()-1)); }
98 inline reference
back (void) { return (at(size()-1)); }
99 inline size_type
length (void) const { return (distance (utf8_begin(), utf8_end())); }
100 inline void append (const_iterator i1
, const_iterator i2
) { append (i1
, distance (i1
, i2
)); }
101 void append (const_pointer s
, size_type len
);
102 void append (const_pointer s
);
103 void append (size_type n
, const_reference c
);
104 inline void append (size_type n
, wvalue_type c
) { insert (size(), c
, n
); }
105 inline void append (const_wpointer s1
, const_wpointer s2
) { insert (size(), s1
, s2
); }
106 inline void append (const_wpointer s
) { const_wpointer
se (s
); for (;se
&&*se
;++se
) ; append (s
, se
); }
107 inline void append (const string
& s
) { append (s
.begin(), s
.end()); }
108 inline void append (const string
& s
, uoff_t o
, size_type n
) { append (s
.iat(o
), s
.iat(o
+n
)); }
109 inline void assign (const_iterator i1
, const_iterator i2
) { assign (i1
, distance (i1
, i2
)); }
110 void assign (const_pointer s
, size_type len
);
111 void assign (const_pointer s
);
112 inline void assign (const_wpointer s1
, const_wpointer s2
) { clear(); append (s1
, s2
); }
113 inline void assign (const_wpointer s1
) { clear(); append (s1
); }
114 inline void assign (const string
& s
) { assign (s
.begin(), s
.end()); }
115 inline void assign (const string
& s
, uoff_t o
, size_type n
) { assign (s
.iat(o
), s
.iat(o
+n
)); }
116 size_type
copyto (pointer p
, size_type n
, const_iterator start
= NULL
) const;
117 inline int compare (const string
& s
) const { return (compare (begin(), end(), s
.begin(), s
.end())); }
118 inline int compare (const_pointer s
) const { return (compare (begin(), end(), s
, s
+ strlen(s
))); }
119 static int compare (const_iterator first1
, const_iterator last1
, const_iterator first2
, const_iterator last2
);
120 inline operator const value_type
* (void) const;
121 inline operator value_type
* (void);
122 inline const string
& operator= (const string
& s
) { assign (s
.begin(), s
.end()); return (*this); }
123 inline const string
& operator= (const_reference c
) { assign (&c
, 1); return (*this); }
124 inline const string
& operator= (const_pointer s
) { assign (s
); return (*this); }
125 inline const string
& operator= (const_wpointer s
) { assign (s
); return (*this); }
126 inline const string
& operator+= (const string
& s
) { append (s
.begin(), s
.size()); return (*this); }
127 inline const string
& operator+= (const_reference c
) { append (1, c
); return (*this); }
128 inline const string
& operator+= (const_pointer s
) { append (s
); return (*this); }
129 inline const string
& operator+= (wvalue_type c
) { append (1, c
); return (*this); }
130 inline const string
& operator+= (const_wpointer s
) { append (s
); return (*this); }
131 inline string
operator+ (const string
& s
) const;
132 inline bool operator== (const string
& s
) const { return (memblock::operator== (s
)); }
133 bool operator== (const_pointer s
) const;
134 inline bool operator== (const_reference c
) const { return (size() == 1 && c
== at(0)); }
135 inline bool operator!= (const string
& s
) const { return (!operator== (s
)); }
136 inline bool operator!= (const_pointer s
) const { return (!operator== (s
)); }
137 inline bool operator!= (const_reference c
) const { return (!operator== (c
)); }
138 inline bool operator< (const string
& s
) const { return (0 > compare (s
)); }
139 inline bool operator< (const_pointer s
) const { return (0 > compare (s
)); }
140 inline bool operator< (const_reference c
) const { return (0 > compare (begin(), end(), &c
, &c
+ 1)); }
141 inline bool operator> (const_pointer s
) const { return (0 < compare (s
)); }
142 void insert (const uoff_t ip
, wvalue_type c
, size_type n
= 1);
143 void insert (const uoff_t ip
, const_wpointer first
, const_wpointer last
, const size_type n
= 1);
144 iterator
insert (iterator start
, const_reference c
, size_type n
= 1);
145 iterator
insert (iterator start
, const_pointer s
, size_type n
= 1);
146 iterator
insert (iterator start
, const_pointer first
, const_iterator last
, size_type n
= 1);
147 inline void insert (uoff_t ip
, const_pointer s
, size_type nlen
) { insert (iat(ip
), s
, s
+ nlen
); }
148 inline void insert (uoff_t ip
, size_type n
, value_type c
) { insert (iat(ip
), c
, n
); }
149 inline void insert (uoff_t ip
, const string
& s
, uoff_t sp
, size_type slen
) { insert (iat(ip
), s
.iat(sp
), s
.iat(sp
+ slen
)); }
150 iterator
erase (iterator epo
, size_type n
= 1);
151 void erase (uoff_t epo
, size_type n
= 1);
152 inline iterator
erase (iterator first
, const_iterator last
) { return (erase (first
, size_type(distance(first
,last
)))); }
153 inline void eraser (uoff_t first
, uoff_t last
) { erase (iat(first
), iat(last
)); }
154 inline void push_back (const_reference c
) { append (1, c
); }
155 inline void push_back (wvalue_type c
) { append (1, c
); }
156 inline void pop_back (void) { resize (size() - 1); }
157 void replace (iterator first
, iterator last
, const_pointer s
);
158 void replace (iterator first
, iterator last
, const_pointer i1
, const_pointer i2
, size_type n
= 1);
159 inline void replace (iterator first
, iterator last
, const string
& s
) { replace (first
, last
, s
.begin(), s
.end()); }
160 inline void replace (iterator first
, iterator last
, const_pointer s
, size_type slen
) { replace (first
, last
, s
, s
+ slen
); }
161 inline void replace (iterator first
, iterator last
, size_type n
, value_type c
) { replace (first
, last
, &c
, &c
+ 1, n
); }
162 inline void replace (uoff_t rp
, size_type n
, const string
& s
) { replace (iat(rp
), iat(rp
+ n
), s
); }
163 inline void replace (uoff_t rp
, size_type n
, const string
& s
, uoff_t sp
, size_type slen
) { replace (iat(rp
), iat(rp
+ n
), s
.iat(sp
), s
.iat(sp
+ slen
)); }
164 inline void replace (uoff_t rp
, size_type n
, const_pointer s
, size_type slen
) { replace (iat(rp
), iat(rp
+ n
), s
, s
+ slen
); }
165 inline void replace (uoff_t rp
, size_type n
, const_pointer s
) { replace (iat(rp
), iat(rp
+ n
), string(s
)); }
166 inline void replace (uoff_t rp
, size_type n
, size_type count
, value_type c
) { replace (iat(rp
), iat(rp
+ n
), count
, c
); }
167 inline string
substr (uoff_t o
, size_type n
) const { return (string (*this, o
, n
)); }
168 uoff_t
find (const_reference c
, uoff_t pos
= 0) const;
169 uoff_t
find (const string
& s
, uoff_t pos
= 0) const;
170 uoff_t
rfind (const_reference c
, uoff_t pos
= npos
) const;
171 uoff_t
rfind (const string
& s
, uoff_t pos
= npos
) const;
172 uoff_t
find_first_of (const string
& s
, uoff_t pos
= 0) const;
173 uoff_t
find_first_not_of (const string
& s
, uoff_t pos
= 0) const;
174 uoff_t
find_last_of (const string
& s
, uoff_t pos
= npos
) const;
175 uoff_t
find_last_not_of (const string
& s
, uoff_t pos
= npos
) const;
176 int vformat (const char* fmt
, va_list args
);
177 int format (const char* fmt
, ...) __attribute__((__format__(__printf__
, 2, 3)));
178 void read (istream
&);
179 void write (ostream
& os
) const;
180 size_t stream_size (void) const;
181 static hashvalue_t
hash (const char* f1
, const char* l1
);
183 inline virtual size_type
minimumFreeCapacity (void) const { return (size_Terminator
); }
186 //----------------------------------------------------------------------
188 /// Assigns itself the value of string \p s
189 inline string::string (const cmemlink
& s
)
192 assign (const_iterator (s
.begin()), s
.size());
195 /// Assigns itself a [o,o+n) substring of \p s.
196 inline string::string (const string
& s
, uoff_t o
, size_type n
)
202 /// Copies the value of \p s of length \p len into itself.
203 inline string::string (const_pointer s
, size_type len
)
209 /// Copies into itself the string data between \p s1 and \p s2
210 inline string::string (const_pointer s1
, const_pointer s2
)
213 assert (s1
<= s2
&& "Negative ranges result in memory allocation errors.");
217 /// Returns the pointer to the first character.
218 inline string::operator const string::value_type
* (void) const
220 assert ((!end() || *end() == c_Terminator
) && "This string is linked to data that is not 0-terminated. This may cause serious security problems. Please assign the data instead of linking.");
224 /// Returns the pointer to the first character.
225 inline string::operator string::value_type
* (void)
227 assert ((end() && *end() == c_Terminator
) && "This string is linked to data that is not 0-terminated. This may cause serious security problems. Please assign the data instead of linking.");
231 /// Concatenates itself with \p s
232 inline string
string::operator+ (const string
& s
) const
234 string
result (*this);
239 //----------------------------------------------------------------------
240 // Operators needed to avoid comparing pointer to pointer
242 #define PTR_STRING_CMP(op, impl) \
243 inline bool op (const char* s1, const string& s2) { return impl; }
244 PTR_STRING_CMP (operator==, (s2
== s1
))
245 PTR_STRING_CMP (operator!=, (s2
!= s1
))
246 PTR_STRING_CMP (operator<, (s2
> s1
))
247 PTR_STRING_CMP (operator<=, (s2
>= s1
))
248 PTR_STRING_CMP (operator>, (s2
< s1
))
249 PTR_STRING_CMP (operator>=, (s2
<= s1
))
250 #undef PTR_STRING_CMP
252 //----------------------------------------------------------------------
254 template <typename T
>
255 inline hashvalue_t
hash_value (const T
& v
)
256 { return (string::hash (v
.begin(), v
.end())); }
259 inline hashvalue_t
hash_value (const string::const_pointer
& v
)
260 { return (string::hash (v
, v
+ strlen(v
))); }
263 inline hashvalue_t
hash_value (const string::pointer
& v
)
264 { return (string::hash (v
, v
+ strlen(v
))); }
266 //----------------------------------------------------------------------
270 // Specialization for stream alignment
271 ALIGNOF (ustl::string
, alignof (string::value_type()))