PR middle-end/77674
[official-gcc.git] / gcc / ada / g-encstr.adb
blob80ca6d04d1816d314a68aee0ed483c84aa95a668
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT RUN-TIME COMPONENTS --
4 -- --
5 -- G N A T . E N C O D E _ S T R I N G --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 2007-2012, Free Software Foundation, Inc. --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
17 -- --
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
19 -- additional permissions described in the GCC Runtime Library Exception, --
20 -- version 3.1, as published by the Free Software Foundation. --
21 -- --
22 -- You should have received a copy of the GNU General Public License and --
23 -- a copy of the GCC Runtime Library Exception along with this program; --
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
25 -- <http://www.gnu.org/licenses/>. --
26 -- --
27 -- GNAT was originally developed by the GNAT team at New York University. --
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
29 -- --
30 ------------------------------------------------------------------------------
32 with Interfaces; use Interfaces;
34 with System.WCh_Con; use System.WCh_Con;
35 with System.WCh_Cnv; use System.WCh_Cnv;
37 package body GNAT.Encode_String is
39 -----------------------
40 -- Local Subprograms --
41 -----------------------
43 procedure Bad;
44 pragma No_Return (Bad);
45 -- Raise error for bad character code
47 procedure Past_End;
48 pragma No_Return (Past_End);
49 -- Raise error for off end of string
51 ---------
52 -- Bad --
53 ---------
55 procedure Bad is
56 begin
57 raise Constraint_Error with
58 "character cannot be encoded with given Encoding_Method";
59 end Bad;
61 ------------------------
62 -- Encode_Wide_String --
63 ------------------------
65 function Encode_Wide_String (S : Wide_String) return String is
66 Long : constant Natural := WC_Longest_Sequences (Encoding_Method);
67 Result : String (1 .. S'Length * Long);
68 Length : Natural;
69 begin
70 Encode_Wide_String (S, Result, Length);
71 return Result (1 .. Length);
72 end Encode_Wide_String;
74 procedure Encode_Wide_String
75 (S : Wide_String;
76 Result : out String;
77 Length : out Natural)
79 Ptr : Natural;
81 begin
82 Ptr := S'First;
83 for J in S'Range loop
84 Encode_Wide_Character (S (J), Result, Ptr);
85 end loop;
87 Length := Ptr - S'First;
88 end Encode_Wide_String;
90 -----------------------------
91 -- Encode_Wide_Wide_String --
92 -----------------------------
94 function Encode_Wide_Wide_String (S : Wide_Wide_String) return String is
95 Long : constant Natural := WC_Longest_Sequences (Encoding_Method);
96 Result : String (1 .. S'Length * Long);
97 Length : Natural;
98 begin
99 Encode_Wide_Wide_String (S, Result, Length);
100 return Result (1 .. Length);
101 end Encode_Wide_Wide_String;
103 procedure Encode_Wide_Wide_String
104 (S : Wide_Wide_String;
105 Result : out String;
106 Length : out Natural)
108 Ptr : Natural;
110 begin
111 Ptr := S'First;
112 for J in S'Range loop
113 Encode_Wide_Wide_Character (S (J), Result, Ptr);
114 end loop;
116 Length := Ptr - S'First;
117 end Encode_Wide_Wide_String;
119 ---------------------------
120 -- Encode_Wide_Character --
121 ---------------------------
123 procedure Encode_Wide_Character
124 (Char : Wide_Character;
125 Result : in out String;
126 Ptr : in out Natural)
128 begin
129 Encode_Wide_Wide_Character
130 (Wide_Wide_Character'Val (Wide_Character'Pos (Char)), Result, Ptr);
132 exception
133 when Constraint_Error =>
134 Bad;
135 end Encode_Wide_Character;
137 --------------------------------
138 -- Encode_Wide_Wide_Character --
139 --------------------------------
141 procedure Encode_Wide_Wide_Character
142 (Char : Wide_Wide_Character;
143 Result : in out String;
144 Ptr : in out Natural)
146 U : Unsigned_32;
148 procedure Out_Char (C : Character);
149 pragma Inline (Out_Char);
150 -- Procedure to store one character for instantiation below
152 --------------
153 -- Out_Char --
154 --------------
156 procedure Out_Char (C : Character) is
157 begin
158 if Ptr > Result'Last then
159 Past_End;
160 else
161 Result (Ptr) := C;
162 Ptr := Ptr + 1;
163 end if;
164 end Out_Char;
166 -- Start of processing for Encode_Wide_Wide_Character;
168 begin
169 -- Efficient code for UTF-8 case
171 if Encoding_Method = WCEM_UTF8 then
173 -- Note: for details of UTF8 encoding see RFC 3629
175 U := Unsigned_32 (Wide_Wide_Character'Pos (Char));
177 -- 16#00_0000#-16#00_007F#: 0xxxxxxx
179 if U <= 16#00_007F# then
180 Out_Char (Character'Val (U));
182 -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
184 elsif U <= 16#00_07FF# then
185 Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6)));
186 Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
188 -- 16#00_0800#-16#00_FFFF#: 1110xxxx 10xxxxxx 10xxxxxx
190 elsif U <= 16#00_FFFF# then
191 Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12)));
192 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
193 and 2#00111111#)));
194 Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
196 -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
198 elsif U <= 16#10_FFFF# then
199 Out_Char (Character'Val (2#11110000# or Shift_Right (U, 18)));
200 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
201 and 2#00111111#)));
202 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
203 and 2#00111111#)));
204 Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
206 -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
207 -- 10xxxxxx 10xxxxxx
209 elsif U <= 16#03FF_FFFF# then
210 Out_Char (Character'Val (2#11111000# or Shift_Right (U, 24)));
211 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18)
212 and 2#00111111#)));
213 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
214 and 2#00111111#)));
215 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
216 and 2#00111111#)));
217 Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
219 -- All other cases are invalid character codes, not this includes:
221 -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
222 -- 10xxxxxx 10xxxxxx 10xxxxxx
224 -- since Wide_Wide_Character values cannot exceed 16#3F_FFFF#
226 else
227 Bad;
228 end if;
230 -- All encoding methods other than UTF-8
232 else
233 Non_UTF8 : declare
234 procedure UTF_32_To_String is
235 new UTF_32_To_Char_Sequence (Out_Char);
236 -- Instantiate conversion procedure with above Out_Char routine
238 begin
239 UTF_32_To_String
240 (UTF_32_Code (Wide_Wide_Character'Pos (Char)), Encoding_Method);
242 exception
243 when Constraint_Error =>
244 Bad;
245 end Non_UTF8;
246 end if;
247 end Encode_Wide_Wide_Character;
249 --------------
250 -- Past_End --
251 --------------
253 procedure Past_End is
254 begin
255 raise Constraint_Error with "past end of string";
256 end Past_End;
258 end GNAT.Encode_String;