4 * This file is part of LyX, the document processor.
5 * Licence details can be found in the file COPYING.
7 * \author Lars Gullik Bjønnes
8 * \author Jean-Marc Lasgouttes
10 * Full author contact details are available in file CREDITS.
16 #include "support/docstring.h"
17 #include "support/types.h"
24 namespace support
{ class FileName
; }
28 class EncodingException
: public std::exception
{
30 EncodingException(char_type c
);
31 virtual ~EncodingException() throw() {}
32 virtual const char * what() const throw();
34 char_type failed_char
;
43 /// Which LaTeX package handles this encoding?
52 Encoding(std::string
const & n
, std::string
const & l
,
53 std::string
const & i
, bool f
, Package p
);
57 std::string
const & name() const { return Name_
; }
59 std::string
const & latexName() const { return LatexName_
; }
61 std::string
const & iconvName() const { return iconvName_
; }
63 * Convert \p c to something that LaTeX can understand.
64 * This is either the character itself (if it is representable
65 * in this encoding), or a LaTeX macro.
66 * If the character is not representable in this encoding, but no
67 * LaTeX macro is known, a warning is given of lyxerr, and the
68 * character is returned.
70 docstring
const latexChar(char_type c
) const;
71 /// Which LaTeX package handles this encoding?
72 Package
package() const { return package_
; }
73 /// A list of all characters usable in this encoding
74 std::set
<char_type
> getSymbolsList() const;
79 std::string LatexName_
;
81 std::string iconvName_
;
82 /// Is this a fixed width encoding?
85 typedef std::set
<char_type
> CharSet
;
86 /// Set of UCS4 characters that we can encode (for singlebyte
88 mutable CharSet encodable_
;
89 /// All code points below this are encodable. This helps us to avoid
90 /// lokup of ASCII characters in encodable_ and gives about 1 sec
91 /// speedup on export of the Userguide.
92 mutable char_type start_encodable_
;
93 /// Which LaTeX package handles this encoding?
96 * If this is true the stored information about the encoding covers
97 * all encodable characters. We set this to false initially so that
98 * we only need to query iconv for the actually used encodings.
99 * This is needed especially for the multibyte encodings, if we
100 * complete all encoding info on startup it takes 2-3 minutes.
102 mutable bool complete_
;
108 typedef std::map
<std::string
, Encoding
> EncodingList
;
109 /// iterator to iterate over all encodings.
110 /// We hide the fact that our encoding list is implemented as a map.
111 class const_iterator
: public EncodingList::const_iterator
{
112 typedef EncodingList::const_iterator base
;
114 const_iterator() : base() {}
115 const_iterator(base
const & b
) : base(b
) {}
116 Encoding
const & operator*() const { return base::operator*().second
; }
117 Encoding
const * operator->() const { return &(base::operator*().second
); }
121 /// Read the encodings.
122 /// \param encfile encodings definition file
123 /// \param symbolsfile unicode->LaTeX mapping file
124 void read(support::FileName
const & encfile
,
125 support::FileName
const & symbolsfile
);
126 /// Get encoding from LyX name \p name
127 Encoding
const * getFromLyXName(std::string
const & name
) const;
128 /// Get encoding from LaTeX name \p name
129 Encoding
const * getFromLaTeXName(std::string
const & name
) const;
132 const_iterator
begin() const { return encodinglist
.begin(); }
134 const_iterator
end() const { return encodinglist
.end(); }
148 static bool isComposeChar_hebrew(char_type c
);
150 static bool isComposeChar_arabic(char_type c
);
152 static bool is_arabic_special(char_type c
);
154 static bool is_arabic(char_type c
);
156 static char_type
transformChar(char_type c
, Letter_Form form
);
157 /// Is this a combining char?
158 static bool isCombiningChar(char_type c
);
160 * Is this a known char from some language?
161 * If \p preamble is empty and code point \p c is known to belong
162 * to a supported script, true is returned and \p preamble is set
163 * to the corresponding entry in the unicodesymbols file.
164 * If \p preamble is not empty, a check is made whether code point
165 * \p c is a known character matching the preamble entry.
167 static bool isKnownScriptChar(char_type
const c
, std::string
& preamble
);
169 * Add the preamble snippet needed for the output of \p c to
171 * This does not depend on the used encoding, since the inputenc
172 * package only maps the code point \p c to a command, it does not
173 * make this command available.
175 static void validate(char_type c
, LaTeXFeatures
& features
);
179 EncodingList encodinglist
;
182 extern Encodings encodings
;