2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/runtime/ext/ext_icu_ucsdet.h"
19 #include "unicode/unistr.h"
21 #include "hphp/system/lib/systemlib.h"
24 ///////////////////////////////////////////////////////////////////////////////
25 IMPLEMENT_DEFAULT_EXTENSION(icu_ucsdet
);
27 c_EncodingDetector::c_EncodingDetector(Class
* cb
) :
29 UErrorCode status
= U_ZERO_ERROR
;
30 m_encoding_detector
= ucsdet_open(&status
);
32 if (U_FAILURE(status
)) {
33 throw Exception("Could not open spoof checker, error %d (%s)",
34 status
, u_errorName(status
));
38 c_EncodingDetector::~c_EncodingDetector() {
39 ucsdet_close(m_encoding_detector
);
42 void c_EncodingDetector::t___construct() {
45 void c_EncodingDetector::t_settext(CStrRef text
) {
46 UErrorCode status
= U_ZERO_ERROR
;
53 if (U_FAILURE(status
)) {
55 "Could not set encoding detector text to [%s], error %d (%s)",
56 text
.c_str(), status
, u_errorName(status
));
60 void c_EncodingDetector::t_setdeclaredencoding(CStrRef text
) {
61 UErrorCode status
= U_ZERO_ERROR
;
62 m_declaredencoding
= text
;
63 ucsdet_setDeclaredEncoding(
65 m_declaredencoding
.data(),
66 m_declaredencoding
.length(),
68 if (U_FAILURE(status
)) {
70 "Could not set encoding detector declared encoding to [%s], error %d (%s)",
71 text
.c_str(), status
, u_errorName(status
));
75 Object
c_EncodingDetector::t_detect() {
76 UErrorCode status
= U_ZERO_ERROR
;
77 const UCharsetMatch
* match
= ucsdet_detect(
80 if (U_FAILURE(status
)) {
82 "Could not detect encoding, error %d (%s)", status
, u_errorName(status
));
85 p_EncodingMatch matchobj
= NEWOBJ(c_EncodingMatch
)();
86 matchobj
->m_encoding_match
= match
;
90 Array
c_EncodingDetector::t_detectall() {
92 UErrorCode status
= U_ZERO_ERROR
;
93 const UCharsetMatch
** matches
= ucsdet_detectAll(
97 if (U_FAILURE(status
)) {
99 "Could not detect all encodings, error %d (%s)", status
, u_errorName(status
));
102 Array ret
= Array::Create();
104 for (i
= 0; i
< matchesFound
; i
++) {
105 p_EncodingMatch matchobj
= NEWOBJ(c_EncodingMatch
)();
106 matchobj
->m_encoding_match
= matches
[i
];
107 ret
.append(matchobj
);
112 ///////////////////////////////////////////////////////////////////////////////
113 c_EncodingMatch::c_EncodingMatch(Class
* cb
) :
114 ExtObjectData(cb
), m_encoding_match(0) {
117 c_EncodingMatch::~c_EncodingMatch() {
120 void c_EncodingMatch::t___construct() {
123 void c_EncodingMatch::validate() {
124 if (m_encoding_match
== 0) {
125 throw Exception("EncodingMatch object is not valid! Call isValid() before using.");
129 bool c_EncodingMatch::t_isvalid() {
130 return m_encoding_match
!= 0;
133 String
c_EncodingMatch::t_getencoding() {
136 UErrorCode status
= U_ZERO_ERROR
;
137 const char* encoding
= ucsdet_getName(
140 if (U_FAILURE(status
)) {
142 "Could not get encoding for match, error %d (%s)",
143 status
, u_errorName(status
));
145 return String(encoding
);
148 int64_t c_EncodingMatch::t_getconfidence() {
151 UErrorCode status
= U_ZERO_ERROR
;
152 int32_t confidence
= ucsdet_getConfidence(
155 if (U_FAILURE(status
)) {
157 "Could not get confidence for match, error %d (%s)",
158 status
, u_errorName(status
));
163 String
c_EncodingMatch::t_getlanguage() {
166 UErrorCode status
= U_ZERO_ERROR
;
167 const char* language
= ucsdet_getLanguage(
170 if (U_FAILURE(status
)) {
172 "Could not get language for match, error %d (%s)",
173 status
, u_errorName(status
));
175 return String(language
);
178 String
c_EncodingMatch::t_getutf8() {
182 icu::UnicodeString ustr
;
183 int32_t ustrSize
= ustr
.getCapacity();
186 status
= U_ZERO_ERROR
;
187 UChar
* buf
= ustr
.getBuffer(ustrSize
);
188 ustrSize
= ucsdet_getUChars(
193 ustr
.releaseBuffer();
194 ustr
.truncate(ustrSize
);
195 } while (status
== U_BUFFER_OVERFLOW_ERROR
);
197 if (U_FAILURE(status
)) {
199 "Could not get UTF-8 for match, error %d (%s)",
200 status
, u_errorName(status
));
203 ustr
.toUTF8String(utf8str
);
204 return String(utf8str
);
207 ///////////////////////////////////////////////////////////////////////////////