PR c++/54038
[official-gcc.git] / gcc / ada / a-stuten.adb
blobfc669b56ee431c0bc136fa34ffc2672bd18d85c5
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT RUN-TIME COMPONENTS --
4 -- --
5 -- A D A . S T R I N G S . U T F _ E N C O D I N G --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 2010, Free Software Foundation, Inc. --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
17 -- --
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
19 -- additional permissions described in the GCC Runtime Library Exception, --
20 -- version 3.1, as published by the Free Software Foundation. --
21 -- --
22 -- You should have received a copy of the GNU General Public License and --
23 -- a copy of the GCC Runtime Library Exception along with this program; --
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
25 -- <http://www.gnu.org/licenses/>. --
26 -- --
27 -- GNAT was originally developed by the GNAT team at New York University. --
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
29 -- --
30 ------------------------------------------------------------------------------
32 package body Ada.Strings.UTF_Encoding is
33 use Interfaces;
35 --------------
36 -- Encoding --
37 --------------
39 function Encoding
40 (Item : UTF_String;
41 Default : Encoding_Scheme := UTF_8) return Encoding_Scheme
43 begin
44 if Item'Length >= 2 then
45 if Item (Item'First .. Item'First + 1) = BOM_16BE then
46 return UTF_16BE;
48 elsif Item (Item'First .. Item'First + 1) = BOM_16LE then
49 return UTF_16LE;
51 elsif Item'Length >= 3
52 and then Item (Item'First .. Item'First + 2) = BOM_8
53 then
54 return UTF_8;
55 end if;
56 end if;
58 return Default;
59 end Encoding;
61 -----------------
62 -- From_UTF_16 --
63 -----------------
65 function From_UTF_16
66 (Item : UTF_16_Wide_String;
67 Output_Scheme : UTF_XE_Encoding;
68 Output_BOM : Boolean := False) return UTF_String
70 BSpace : constant Natural := 2 * Boolean'Pos (Output_BOM);
71 Result : UTF_String (1 .. 2 * Item'Length + BSpace);
72 Len : Natural;
73 C : Unsigned_16;
74 Iptr : Natural;
76 begin
77 if Output_BOM then
78 Result (1 .. 2) :=
79 (if Output_Scheme = UTF_16BE then BOM_16BE else BOM_16LE);
80 Len := 2;
81 else
82 Len := 0;
83 end if;
85 -- Skip input BOM
87 Iptr := Item'First;
89 if Iptr <= Item'Last and then Item (Iptr) = BOM_16 (1) then
90 Iptr := Iptr + 1;
91 end if;
93 -- UTF-16BE case
95 if Output_Scheme = UTF_16BE then
96 while Iptr <= Item'Last loop
97 C := To_Unsigned_16 (Item (Iptr));
98 Result (Len + 1) := Character'Val (Shift_Right (C, 8));
99 Result (Len + 2) := Character'Val (C and 16#00_FF#);
100 Len := Len + 2;
101 Iptr := Iptr + 1;
102 end loop;
104 -- UTF-16LE case
106 else
107 while Iptr <= Item'Last loop
108 C := To_Unsigned_16 (Item (Iptr));
109 Result (Len + 1) := Character'Val (C and 16#00_FF#);
110 Result (Len + 2) := Character'Val (Shift_Right (C, 8));
111 Len := Len + 2;
112 Iptr := Iptr + 1;
113 end loop;
114 end if;
116 return Result (1 .. Len);
117 end From_UTF_16;
119 --------------------------
120 -- Raise_Encoding_Error --
121 --------------------------
123 procedure Raise_Encoding_Error (Index : Natural) is
124 Val : constant String := Index'Img;
125 begin
126 raise Encoding_Error with
127 "bad input at Item (" & Val (Val'First + 1 .. Val'Last) & ')';
128 end Raise_Encoding_Error;
130 ---------------
131 -- To_UTF_16 --
132 ---------------
134 function To_UTF_16
135 (Item : UTF_String;
136 Input_Scheme : UTF_XE_Encoding;
137 Output_BOM : Boolean := False) return UTF_16_Wide_String
139 Result : UTF_16_Wide_String (1 .. Item'Length / 2 + 1);
140 Len : Natural;
141 Iptr : Natural;
143 begin
144 if Item'Length mod 2 /= 0 then
145 raise Encoding_Error with "UTF-16BE/LE string has odd length";
146 end if;
148 -- Deal with input BOM, skip if OK, error if bad BOM
150 Iptr := Item'First;
152 if Item'Length >= 2 then
153 if Item (Iptr .. Iptr + 1) = BOM_16BE then
154 if Input_Scheme = UTF_16BE then
155 Iptr := Iptr + 2;
156 else
157 Raise_Encoding_Error (Iptr);
158 end if;
160 elsif Item (Iptr .. Iptr + 1) = BOM_16LE then
161 if Input_Scheme = UTF_16LE then
162 Iptr := Iptr + 2;
163 else
164 Raise_Encoding_Error (Iptr);
165 end if;
167 elsif Item'Length >= 3 and then Item (Iptr .. Iptr + 2) = BOM_8 then
168 Raise_Encoding_Error (Iptr);
169 end if;
170 end if;
172 -- Output BOM if specified
174 if Output_BOM then
175 Result (1) := BOM_16 (1);
176 Len := 1;
177 else
178 Len := 0;
179 end if;
181 -- UTF-16BE case
183 if Input_Scheme = UTF_16BE then
184 while Iptr < Item'Last loop
185 Len := Len + 1;
186 Result (Len) :=
187 Wide_Character'Val
188 (Character'Pos (Item (Iptr)) * 256 +
189 Character'Pos (Item (Iptr + 1)));
190 Iptr := Iptr + 2;
191 end loop;
193 -- UTF-16LE case
195 else
196 while Iptr < Item'Last loop
197 Len := Len + 1;
198 Result (Len) :=
199 Wide_Character'Val
200 (Character'Pos (Item (Iptr)) +
201 Character'Pos (Item (Iptr + 1)) * 256);
202 Iptr := Iptr + 2;
203 end loop;
204 end if;
206 return Result (1 .. Len);
207 end To_UTF_16;
209 end Ada.Strings.UTF_Encoding;