1 ------------------------------------------------------------------------------
3 -- GNAT RUN-TIME COMPONENTS --
5 -- G N A T . E N C O D E _ S T R I N G --
9 -- Copyright (C) 2007, AdaCore --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
32 ------------------------------------------------------------------------------
34 with Interfaces
; use Interfaces
;
36 with System
.WCh_Con
; use System
.WCh_Con
;
37 with System
.WCh_Cnv
; use System
.WCh_Cnv
;
39 package body GNAT
.Encode_String
is
41 -----------------------
42 -- Local Subprograms --
43 -----------------------
46 pragma No_Return
(Bad
);
47 -- Raise error for bad character code
50 pragma No_Return
(Past_End
);
51 -- Raise error for off end of string
59 raise Constraint_Error
with
60 "character cannot be encoded with given Encoding_Method";
63 ------------------------
64 -- Encode_Wide_String --
65 ------------------------
67 function Encode_Wide_String
(S
: Wide_String) return String is
68 Long
: constant Natural := WC_Longest_Sequences
(Encoding_Method
);
69 Result
: String (1 .. S
'Length * Long
);
72 Encode_Wide_String
(S
, Result
, Length
);
73 return Result
(1 .. Length
);
74 end Encode_Wide_String
;
76 procedure Encode_Wide_String
86 Encode_Wide_Character
(S
(J
), Result
, Ptr
);
89 Length
:= Ptr
- S
'First;
90 end Encode_Wide_String
;
92 -----------------------------
93 -- Encode_Wide_Wide_String --
94 -----------------------------
96 function Encode_Wide_Wide_String
(S
: Wide_Wide_String
) return String is
97 Long
: constant Natural := WC_Longest_Sequences
(Encoding_Method
);
98 Result
: String (1 .. S
'Length * Long
);
101 Encode_Wide_Wide_String
(S
, Result
, Length
);
102 return Result
(1 .. Length
);
103 end Encode_Wide_Wide_String
;
105 procedure Encode_Wide_Wide_String
106 (S
: Wide_Wide_String
;
108 Length
: out Natural)
114 for J
in S
'Range loop
115 Encode_Wide_Wide_Character
(S
(J
), Result
, Ptr
);
118 Length
:= Ptr
- S
'First;
119 end Encode_Wide_Wide_String
;
121 ---------------------------
122 -- Encode_Wide_Character --
123 ---------------------------
125 procedure Encode_Wide_Character
126 (Char
: Wide_Character;
127 Result
: in out String;
128 Ptr
: in out Natural)
131 Encode_Wide_Wide_Character
132 (Wide_Wide_Character
'Val (Wide_Character'Pos (Char
)), Result
, Ptr
);
135 when Constraint_Error
=>
137 end Encode_Wide_Character
;
139 --------------------------------
140 -- Encode_Wide_Wide_Character --
141 --------------------------------
143 procedure Encode_Wide_Wide_Character
144 (Char
: Wide_Wide_Character
;
145 Result
: in out String;
146 Ptr
: in out Natural)
150 procedure Out_Char
(C
: Character);
151 pragma Inline
(Out_Char
);
152 -- Procedure to store one character for instantiation below
158 procedure Out_Char
(C
: Character) is
160 if Ptr
> Result
'Last then
168 -- Start of processing for Encode_Wide_Wide_Character;
171 -- Efficient code for UTF-8 case
173 if Encoding_Method
= WCEM_UTF8
then
175 -- Note: for details of UTF8 encoding see RFC 3629
177 U
:= Unsigned_32
(Wide_Wide_Character
'Pos (Char
));
179 -- 16#00_0000#-16#00_007F#: 0xxxxxxx
181 if U
<= 16#
00_007F#
then
182 Out_Char
(Character'Val (U
));
184 -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
186 elsif U
<= 16#
00_07FF#
then
187 Out_Char
(Character'Val (2#
11000000#
or Shift_Right
(U
, 6)));
188 Out_Char
(Character'Val (2#
10000000#
or (U
and 2#
00111111#
)));
190 -- 16#00_0800#-16#00_FFFF#: 1110xxxx 10xxxxxx 10xxxxxx
192 elsif U
<= 16#
00_FFFF#
then
193 Out_Char
(Character'Val (2#
11100000#
or Shift_Right
(U
, 12)));
194 Out_Char
(Character'Val (2#
10000000#
or (Shift_Right
(U
, 6)
196 Out_Char
(Character'Val (2#
10000000#
or (U
and 2#
00111111#
)));
198 -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
200 elsif U
<= 16#
10_FFFF#
then
201 Out_Char
(Character'Val (2#
11110000#
or Shift_Right
(U
, 18)));
202 Out_Char
(Character'Val (2#
10000000#
or (Shift_Right
(U
, 12)
204 Out_Char
(Character'Val (2#
10000000#
or (Shift_Right
(U
, 6)
206 Out_Char
(Character'Val (2#
10000000#
or (U
and 2#
00111111#
)));
208 -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
211 elsif U
<= 16#
03FF_FFFF#
then
212 Out_Char
(Character'Val (2#
11111000#
or Shift_Right
(U
, 24)));
213 Out_Char
(Character'Val (2#
10000000#
or (Shift_Right
(U
, 18)
215 Out_Char
(Character'Val (2#
10000000#
or (Shift_Right
(U
, 12)
217 Out_Char
(Character'Val (2#
10000000#
or (Shift_Right
(U
, 6)
219 Out_Char
(Character'Val (2#
10000000#
or (U
and 2#
00111111#
)));
221 -- All other cases are invalid character codes, not this includes:
223 -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
224 -- 10xxxxxx 10xxxxxx 10xxxxxx
226 -- since Wide_Wide_Character values cannot exceed 16#3F_FFFF#
232 -- All encoding methods other than UTF-8
236 procedure UTF_32_To_String
is
237 new UTF_32_To_Char_Sequence
(Out_Char
);
238 -- Instantiate conversion procedure with above Out_Char routine
242 (UTF_32_Code
(Wide_Wide_Character
'Pos (Char
)), Encoding_Method
);
245 when Constraint_Error
=>
249 end Encode_Wide_Wide_Character
;
255 procedure Past_End
is
257 raise Constraint_Error
with "past end of string";
260 end GNAT
.Encode_String
;