2015-05-22 Eric Botcazou <ebotcazou@adacore.com>
[official-gcc.git] / gcc / ada / g-byorma.adb
blob0b389f5d4032dc834a1f6e9aed3c08303c39066e
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT COMPILER COMPONENTS --
4 -- --
5 -- G N A T . B Y T E _ O R D E R _ M A R K --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 2006-2013, AdaCore --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
17 -- --
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
19 -- additional permissions described in the GCC Runtime Library Exception, --
20 -- version 3.1, as published by the Free Software Foundation. --
21 -- --
22 -- You should have received a copy of the GNU General Public License and --
23 -- a copy of the GCC Runtime Library Exception along with this program; --
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
25 -- <http://www.gnu.org/licenses/>. --
26 -- --
27 -- GNAT was originally developed by the GNAT team at New York University. --
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
29 -- --
30 ------------------------------------------------------------------------------
32 pragma Compiler_Unit_Warning;
34 package body GNAT.Byte_Order_Mark is
36 --------------
37 -- Read_BOM --
38 --------------
40 procedure Read_BOM
41 (Str : String;
42 Len : out Natural;
43 BOM : out BOM_Kind;
44 XML_Support : Boolean := False)
46 begin
47 -- Note: the order of these tests is important, because in some cases
48 -- one sequence is a prefix of a longer sequence, and we must test for
49 -- the longer sequence first
51 -- UTF-32 (big-endian)
53 if Str'Length >= 4
54 and then Str (Str'First) = Character'Val (16#00#)
55 and then Str (Str'First + 1) = Character'Val (16#00#)
56 and then Str (Str'First + 2) = Character'Val (16#FE#)
57 and then Str (Str'First + 3) = Character'Val (16#FF#)
58 then
59 Len := 4;
60 BOM := UTF32_BE;
62 -- UTF-32 (little-endian)
64 elsif Str'Length >= 4
65 and then Str (Str'First) = Character'Val (16#FF#)
66 and then Str (Str'First + 1) = Character'Val (16#FE#)
67 and then Str (Str'First + 2) = Character'Val (16#00#)
68 and then Str (Str'First + 3) = Character'Val (16#00#)
69 then
70 Len := 4;
71 BOM := UTF32_LE;
73 -- UTF-16 (big-endian)
75 elsif Str'Length >= 2
76 and then Str (Str'First) = Character'Val (16#FE#)
77 and then Str (Str'First + 1) = Character'Val (16#FF#)
78 then
79 Len := 2;
80 BOM := UTF16_BE;
82 -- UTF-16 (little-endian)
84 elsif Str'Length >= 2
85 and then Str (Str'First) = Character'Val (16#FF#)
86 and then Str (Str'First + 1) = Character'Val (16#FE#)
87 then
88 Len := 2;
89 BOM := UTF16_LE;
91 -- UTF-8 (endian-independent)
93 elsif Str'Length >= 3
94 and then Str (Str'First) = Character'Val (16#EF#)
95 and then Str (Str'First + 1) = Character'Val (16#BB#)
96 and then Str (Str'First + 2) = Character'Val (16#BF#)
97 then
98 Len := 3;
99 BOM := UTF8_All;
101 -- UCS-4 (big-endian) XML only
103 elsif XML_Support
104 and then Str'Length >= 4
105 and then Str (Str'First) = Character'Val (16#00#)
106 and then Str (Str'First + 1) = Character'Val (16#00#)
107 and then Str (Str'First + 2) = Character'Val (16#00#)
108 and then Str (Str'First + 3) = Character'Val (16#3C#)
109 then
110 Len := 0;
111 BOM := UCS4_BE;
113 -- UCS-4 (little-endian) XML case
115 elsif XML_Support
116 and then Str'Length >= 4
117 and then Str (Str'First) = Character'Val (16#3C#)
118 and then Str (Str'First + 1) = Character'Val (16#00#)
119 and then Str (Str'First + 2) = Character'Val (16#00#)
120 and then Str (Str'First + 3) = Character'Val (16#00#)
121 then
122 Len := 0;
123 BOM := UCS4_LE;
125 -- UCS-4 (unusual byte order 2143) XML case
127 elsif XML_Support
128 and then Str'Length >= 4
129 and then Str (Str'First) = Character'Val (16#00#)
130 and then Str (Str'First + 1) = Character'Val (16#00#)
131 and then Str (Str'First + 2) = Character'Val (16#3C#)
132 and then Str (Str'First + 3) = Character'Val (16#00#)
133 then
134 Len := 0;
135 BOM := UCS4_2143;
137 -- UCS-4 (unusual byte order 3412) XML case
139 elsif XML_Support
140 and then Str'Length >= 4
141 and then Str (Str'First) = Character'Val (16#00#)
142 and then Str (Str'First + 1) = Character'Val (16#3C#)
143 and then Str (Str'First + 2) = Character'Val (16#00#)
144 and then Str (Str'First + 3) = Character'Val (16#00#)
145 then
146 Len := 0;
147 BOM := UCS4_3412;
149 -- UTF-16 (big-endian) XML case
151 elsif XML_Support
152 and then Str'Length >= 4
153 and then Str (Str'First) = Character'Val (16#00#)
154 and then Str (Str'First + 1) = Character'Val (16#3C#)
155 and then Str (Str'First + 2) = Character'Val (16#00#)
156 and then Str (Str'First + 3) = Character'Val (16#3F#)
157 then
158 Len := 0;
159 BOM := UTF16_BE;
161 -- UTF-32 (little-endian) XML case
163 elsif XML_Support
164 and then Str'Length >= 4
165 and then Str (Str'First) = Character'Val (16#3C#)
166 and then Str (Str'First + 1) = Character'Val (16#00#)
167 and then Str (Str'First + 2) = Character'Val (16#3F#)
168 and then Str (Str'First + 3) = Character'Val (16#00#)
169 then
170 Len := 0;
171 BOM := UTF16_LE;
173 -- Unrecognized special encodings XML only
175 elsif XML_Support
176 and then Str'Length >= 4
177 and then Str (Str'First) = Character'Val (16#3C#)
178 and then Str (Str'First + 1) = Character'Val (16#3F#)
179 and then Str (Str'First + 2) = Character'Val (16#78#)
180 and then Str (Str'First + 3) = Character'Val (16#6D#)
181 then
182 -- UTF-8, ASCII, some part of ISO8859, Shift-JIS, EUC,...
184 Len := 0;
185 BOM := Unknown;
187 -- No BOM recognized
189 else
190 Len := 0;
191 BOM := Unknown;
192 end if;
193 end Read_BOM;
195 end GNAT.Byte_Order_Mark;