fixing pr42337
[official-gcc.git] / gcc / ada / g-byorma.adb
blob27138b463f404335232ff8a6115a41165f7cc9b7
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT COMPILER COMPONENTS --
4 -- --
5 -- G N A T . B Y T E _ O R D E R _ M A R K --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 2006-2008, AdaCore --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
21 -- --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
28 -- --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
31 -- --
32 ------------------------------------------------------------------------------
34 pragma Compiler_Unit;
36 package body GNAT.Byte_Order_Mark is
38 --------------
39 -- Read_BOM --
40 --------------
42 procedure Read_BOM
43 (Str : String;
44 Len : out Natural;
45 BOM : out BOM_Kind;
46 XML_Support : Boolean := False)
48 begin
49 -- Note: the order of these tests is important, because in some cases
50 -- one sequence is a prefix of a longer sequence, and we must test for
51 -- the longer sequence first
53 -- UTF-32 (big-endian)
55 if Str'Length >= 4
56 and then Str (Str'First) = Character'Val (16#00#)
57 and then Str (Str'First + 1) = Character'Val (16#00#)
58 and then Str (Str'First + 2) = Character'Val (16#FE#)
59 and then Str (Str'First + 3) = Character'Val (16#FF#)
60 then
61 Len := 4;
62 BOM := UTF32_BE;
64 -- UTF-32 (little-endian)
66 elsif Str'Length >= 4
67 and then Str (Str'First) = Character'Val (16#FF#)
68 and then Str (Str'First + 1) = Character'Val (16#FE#)
69 and then Str (Str'First + 2) = Character'Val (16#00#)
70 and then Str (Str'First + 3) = Character'Val (16#00#)
71 then
72 Len := 4;
73 BOM := UTF32_LE;
75 -- UTF-16 (big-endian)
77 elsif Str'Length >= 2
78 and then Str (Str'First) = Character'Val (16#FE#)
79 and then Str (Str'First + 1) = Character'Val (16#FF#)
80 then
81 Len := 2;
82 BOM := UTF16_BE;
84 -- UTF-16 (little-endian)
86 elsif Str'Length >= 2
87 and then Str (Str'First) = Character'Val (16#FF#)
88 and then Str (Str'First + 1) = Character'Val (16#FE#)
89 then
90 Len := 2;
91 BOM := UTF16_LE;
93 -- UTF-8 (endian-independent)
95 elsif Str'Length >= 3
96 and then Str (Str'First) = Character'Val (16#EF#)
97 and then Str (Str'First + 1) = Character'Val (16#BB#)
98 and then Str (Str'First + 2) = Character'Val (16#BF#)
99 then
100 Len := 3;
101 BOM := UTF8_All;
103 -- UCS-4 (big-endian) XML only
105 elsif XML_Support
106 and then Str'Length >= 4
107 and then Str (Str'First) = Character'Val (16#00#)
108 and then Str (Str'First + 1) = Character'Val (16#00#)
109 and then Str (Str'First + 2) = Character'Val (16#00#)
110 and then Str (Str'First + 3) = Character'Val (16#3C#)
111 then
112 Len := 0;
113 BOM := UCS4_BE;
115 -- UCS-4 (little-endian) XML case
117 elsif XML_Support
118 and then Str'Length >= 4
119 and then Str (Str'First) = Character'Val (16#3C#)
120 and then Str (Str'First + 1) = Character'Val (16#00#)
121 and then Str (Str'First + 2) = Character'Val (16#00#)
122 and then Str (Str'First + 3) = Character'Val (16#00#)
123 then
124 Len := 0;
125 BOM := UCS4_LE;
127 -- UCS-4 (unusual byte order 2143) XML case
129 elsif XML_Support
130 and then Str'Length >= 4
131 and then Str (Str'First) = Character'Val (16#00#)
132 and then Str (Str'First + 1) = Character'Val (16#00#)
133 and then Str (Str'First + 2) = Character'Val (16#3C#)
134 and then Str (Str'First + 3) = Character'Val (16#00#)
135 then
136 Len := 0;
137 BOM := UCS4_2143;
139 -- UCS-4 (unusual byte order 3412) XML case
141 elsif XML_Support
142 and then Str'Length >= 4
143 and then Str (Str'First) = Character'Val (16#00#)
144 and then Str (Str'First + 1) = Character'Val (16#3C#)
145 and then Str (Str'First + 2) = Character'Val (16#00#)
146 and then Str (Str'First + 3) = Character'Val (16#00#)
147 then
148 Len := 0;
149 BOM := UCS4_3412;
151 -- UTF-16 (big-endian) XML case
153 elsif XML_Support
154 and then Str'Length >= 4
155 and then Str (Str'First) = Character'Val (16#00#)
156 and then Str (Str'First + 1) = Character'Val (16#3C#)
157 and then Str (Str'First + 2) = Character'Val (16#00#)
158 and then Str (Str'First + 3) = Character'Val (16#3F#)
159 then
160 Len := 0;
161 BOM := UTF16_BE;
163 -- UTF-32 (little-endian) XML case
165 elsif XML_Support
166 and then Str'Length >= 4
167 and then Str (Str'First) = Character'Val (16#3C#)
168 and then Str (Str'First + 1) = Character'Val (16#00#)
169 and then Str (Str'First + 2) = Character'Val (16#3F#)
170 and then Str (Str'First + 3) = Character'Val (16#00#)
171 then
172 Len := 0;
173 BOM := UTF16_LE;
175 -- Unrecognized special encodings XML only
177 elsif XML_Support
178 and then Str'Length >= 4
179 and then Str (Str'First) = Character'Val (16#3C#)
180 and then Str (Str'First + 1) = Character'Val (16#3F#)
181 and then Str (Str'First + 2) = Character'Val (16#78#)
182 and then Str (Str'First + 3) = Character'Val (16#6D#)
183 then
184 -- UTF-8, ASCII, some part of ISO8859, Shift-JIS, EUC,...
186 Len := 0;
187 BOM := Unknown;
189 -- No BOM recognized
191 else
192 Len := 0;
193 BOM := Unknown;
194 end if;
195 end Read_BOM;
197 end GNAT.Byte_Order_Mark;