Merge from mainline (167278:168000).
[official-gcc/graphite-test-results.git] / gcc / ada / a-stuten.ads
blob5299c6f88e238a0dae5a1744c27f75d0f7e8902b
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT RUN-TIME COMPONENTS --
4 -- --
5 -- A D A . S T R I N G S . U T F _ E N C O D I N G --
6 -- --
7 -- S p e c --
8 -- --
9 -- This specification is derived from the Ada Reference Manual for use with --
10 -- GNAT. The copyright notice above, and the license provisions that follow --
11 -- apply solely to the contents of the part following the private keyword. --
12 -- --
13 -- GNAT is free software; you can redistribute it and/or modify it under --
14 -- terms of the GNU General Public License as published by the Free Soft- --
15 -- ware Foundation; either version 2, or (at your option) any later ver- --
16 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
17 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
18 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
19 -- for more details. You should have received a copy of the GNU General --
20 -- Public License distributed with GNAT; see file COPYING. If not, write --
21 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
22 -- Boston, MA 02110-1301, USA. --
23 -- --
24 -- As a special exception, if other files instantiate generics from this --
25 -- unit, or you link this unit with other files to produce an executable, --
26 -- this unit does not by itself cause the resulting executable to be --
27 -- covered by the GNU General Public License. This exception does not --
28 -- however invalidate any other reasons why the executable file might be --
29 -- covered by the GNU Public License. --
30 -- --
31 -- GNAT was originally developed by the GNAT team at New York University. --
32 -- Extensive contributions were provided by Ada Core Technologies Inc. --
33 -- --
34 ------------------------------------------------------------------------------
36 -- This is one of the Ada 2012 package defined in AI05-0137-1. It is a parent
37 -- package that contains declarations used in the child packages for handling
38 -- UTF encoded strings. Note: this package is consistent with Ada 95, and may
39 -- be used in Ada 95 or Ada 2005 mode.
41 with Interfaces;
42 with Unchecked_Conversion;
44 package Ada.Strings.UTF_Encoding is
45 pragma Pure (UTF_Encoding);
47 subtype UTF_String is String;
48 -- Used to represent a string of 8-bit values containing a sequence of
49 -- values encoded in one of three ways (UTF-8, UTF-16BE, or UTF-16LE).
50 -- Typically used in connection with a Scheme parameter indicating which
51 -- of the encodings applies. This is not strictly a String value in the
52 -- sense defined in the Ada RM, but in practice type String accomodates
53 -- all possible 256 codes, and can be used to hold any sequence of 8-bit
54 -- codes. We use String directly rather than create a new type so that
55 -- all existing facilities for manipulating type String (e.g. the child
56 -- packages of Ada.Strings) are available for manipulation of UTF_Strings.
58 type Encoding_Scheme is (UTF_8, UTF_16BE, UTF_16LE);
59 -- Used to specify which of three possible encodings apply to a UTF_String
61 subtype UTF_8_String is String;
62 -- Similar to UTF_String but specifically represents a UTF-8 encoded string
64 subtype UTF_16_Wide_String is Wide_String;
65 -- This is similar to UTF_8_String but is used to represent a Wide_String
66 -- value which is a sequence of 16-bit values encoded using UTF-16. Again
67 -- this is not strictly a Wide_String in the sense of the Ada RM, but the
68 -- type Wide_String can be used to represent a sequence of arbitrary 16-bit
69 -- values, and it is more convenient to use Wide_String than a new type.
71 Encoding_Error : exception;
72 -- This exception is raised in the following situations:
73 -- a) A UTF encoded string contains an invalid encoding sequence
74 -- b) A UTF-16BE or UTF-16LE input string has an odd length
75 -- c) An incorrect character value is present in the Input string
76 -- d) The result for a Wide_Character output exceeds 16#FFFF#
77 -- The exception message has the index value where the error occurred.
79 -- The BOM (BYTE_ORDER_MARK) values defined here are used at the start of
80 -- a string to indicate the encoding. The convention in this package is
81 -- that on input a correct BOM is ignored and an incorrect BOM causes an
82 -- Encoding_Error exception. On output, the output string may or may not
83 -- include a BOM depending on the setting of Output_BOM.
85 BOM_8 : constant UTF_8_String :=
86 Character'Val (16#EF#) &
87 Character'Val (16#BB#) &
88 Character'Val (16#BF#);
90 BOM_16BE : constant UTF_String :=
91 Character'Val (16#FE#) &
92 Character'Val (16#FF#);
94 BOM_16LE : constant UTF_String :=
95 Character'Val (16#FF#) &
96 Character'Val (16#FE#);
98 BOM_16 : constant UTF_16_Wide_String :=
99 (1 => Wide_Character'Val (16#FEFF#));
101 function Encoding
102 (Item : UTF_String;
103 Default : Encoding_Scheme := UTF_8) return Encoding_Scheme;
104 -- This function inspects a UTF_String value to determine whether it
105 -- starts with a BOM for UTF-8, UTF-16BE, or UTF_16LE. If so, the result
106 -- is the scheme corresponding to the BOM. If no valid BOM is present
107 -- then the result is the specified Default value.
109 private
110 function To_Unsigned_8 is new
111 Unchecked_Conversion (Character, Interfaces.Unsigned_8);
113 function To_Unsigned_16 is new
114 Unchecked_Conversion (Wide_Character, Interfaces.Unsigned_16);
116 function To_Unsigned_32 is new
117 Unchecked_Conversion (Wide_Wide_Character, Interfaces.Unsigned_32);
119 subtype UTF_XE_Encoding is Encoding_Scheme range UTF_16BE .. UTF_16LE;
120 -- Subtype containing only UTF_16BE and UTF_16LE entries
122 -- Utility routines for converting between UTF-16 and UTF-16LE/BE
124 function From_UTF_16
125 (Item : UTF_16_Wide_String;
126 Output_Scheme : UTF_XE_Encoding;
127 Output_BOM : Boolean := False) return UTF_String;
128 -- The input string Item is encoded in UTF-16. The output is encoded using
129 -- Output_Scheme (which is either UTF-16LE or UTF-16BE). There are no error
130 -- cases. The output starts with BOM_16BE/LE if Output_BOM is True.
132 function To_UTF_16
133 (Item : UTF_String;
134 Input_Scheme : UTF_XE_Encoding;
135 Output_BOM : Boolean := False) return UTF_16_Wide_String;
136 -- The input string Item is encoded using Input_Scheme which is either
137 -- UTF-16LE or UTF-16BE. The output is the corresponding UTF_16 wide
138 -- string. Encoding error is raised if the length of the input is odd.
139 -- The output starts with BOM_16 if Output_BOM is True.
141 procedure Raise_Encoding_Error (Index : Natural);
142 pragma No_Return (Raise_Encoding_Error);
143 -- Raise Encoding_Error exception for bad encoding in input item. The
144 -- parameter Index is the index of the location in Item for the error.
146 end Ada.Strings.UTF_Encoding;