3 * Copyright (C) Igor Sysoev
4 * Copyright (C) Nginx, Inc.
8 #include <ngx_config.h>
13 #define NGX_HTTP_CHARSET_OFF -2
14 #define NGX_HTTP_NO_CHARSET -3
15 #define NGX_HTTP_CHARSET_VAR 0x10000
17 /* 1 byte length and up to 3 bytes for the UTF-8 encoding of the UCS-2 */
20 #define NGX_HTML_ENTITY_LEN (sizeof("") - 1)
35 } ngx_http_charset_recode_t
;
43 } ngx_http_charset_tables_t
;
47 ngx_array_t charsets
; /* ngx_http_charset_t */
48 ngx_array_t tables
; /* ngx_http_charset_tables_t */
49 ngx_array_t recodes
; /* ngx_http_charset_recode_t */
50 } ngx_http_charset_main_conf_t
;
55 ngx_int_t source_charset
;
56 ngx_flag_t override_charset
;
59 ngx_array_t
*types_keys
;
60 } ngx_http_charset_loc_conf_t
;
66 ngx_str_t charset_name
;
69 ngx_chain_t
*free_bufs
;
70 ngx_chain_t
*free_buffers
;
73 u_char saved
[NGX_UTF_LEN
];
78 } ngx_http_charset_ctx_t
;
82 ngx_http_charset_tables_t
*table
;
83 ngx_http_charset_t
*charset
;
84 ngx_uint_t characters
;
85 } ngx_http_charset_conf_ctx_t
;
88 static ngx_int_t
ngx_http_destination_charset(ngx_http_request_t
*r
,
90 static ngx_int_t
ngx_http_main_request_charset(ngx_http_request_t
*r
,
92 static ngx_int_t
ngx_http_source_charset(ngx_http_request_t
*r
,
94 static ngx_int_t
ngx_http_get_charset(ngx_http_request_t
*r
, ngx_str_t
*name
);
95 static ngx_inline
void ngx_http_set_charset(ngx_http_request_t
*r
,
97 static ngx_int_t
ngx_http_charset_ctx(ngx_http_request_t
*r
,
98 ngx_http_charset_t
*charsets
, ngx_int_t charset
, ngx_int_t source_charset
);
99 static ngx_uint_t
ngx_http_charset_recode(ngx_buf_t
*b
, u_char
*table
);
100 static ngx_chain_t
*ngx_http_charset_recode_from_utf8(ngx_pool_t
*pool
,
101 ngx_buf_t
*buf
, ngx_http_charset_ctx_t
*ctx
);
102 static ngx_chain_t
*ngx_http_charset_recode_to_utf8(ngx_pool_t
*pool
,
103 ngx_buf_t
*buf
, ngx_http_charset_ctx_t
*ctx
);
105 static ngx_chain_t
*ngx_http_charset_get_buf(ngx_pool_t
*pool
,
106 ngx_http_charset_ctx_t
*ctx
);
107 static ngx_chain_t
*ngx_http_charset_get_buffer(ngx_pool_t
*pool
,
108 ngx_http_charset_ctx_t
*ctx
, size_t size
);
110 static char *ngx_http_charset_map_block(ngx_conf_t
*cf
, ngx_command_t
*cmd
,
112 static char *ngx_http_charset_map(ngx_conf_t
*cf
, ngx_command_t
*dummy
,
115 static char *ngx_http_set_charset_slot(ngx_conf_t
*cf
, ngx_command_t
*cmd
,
117 static ngx_int_t
ngx_http_add_charset(ngx_array_t
*charsets
, ngx_str_t
*name
);
119 static void *ngx_http_charset_create_main_conf(ngx_conf_t
*cf
);
120 static void *ngx_http_charset_create_loc_conf(ngx_conf_t
*cf
);
121 static char *ngx_http_charset_merge_loc_conf(ngx_conf_t
*cf
,
122 void *parent
, void *child
);
123 static ngx_int_t
ngx_http_charset_postconfiguration(ngx_conf_t
*cf
);
126 ngx_str_t ngx_http_charset_default_types
[] = {
127 ngx_string("text/html"),
128 ngx_string("text/xml"),
129 ngx_string("text/plain"),
130 ngx_string("text/vnd.wap.wml"),
131 ngx_string("application/x-javascript"),
132 ngx_string("application/rss+xml"),
137 static ngx_command_t ngx_http_charset_filter_commands
[] = {
139 { ngx_string("charset"),
140 NGX_HTTP_MAIN_CONF
|NGX_HTTP_SRV_CONF
|NGX_HTTP_LOC_CONF
141 |NGX_HTTP_LIF_CONF
|NGX_CONF_TAKE1
,
142 ngx_http_set_charset_slot
,
143 NGX_HTTP_LOC_CONF_OFFSET
,
144 offsetof(ngx_http_charset_loc_conf_t
, charset
),
147 { ngx_string("source_charset"),
148 NGX_HTTP_MAIN_CONF
|NGX_HTTP_SRV_CONF
|NGX_HTTP_LOC_CONF
149 |NGX_HTTP_LIF_CONF
|NGX_CONF_TAKE1
,
150 ngx_http_set_charset_slot
,
151 NGX_HTTP_LOC_CONF_OFFSET
,
152 offsetof(ngx_http_charset_loc_conf_t
, source_charset
),
155 { ngx_string("override_charset"),
156 NGX_HTTP_MAIN_CONF
|NGX_HTTP_SRV_CONF
|NGX_HTTP_LOC_CONF
157 |NGX_HTTP_LIF_CONF
|NGX_CONF_FLAG
,
158 ngx_conf_set_flag_slot
,
159 NGX_HTTP_LOC_CONF_OFFSET
,
160 offsetof(ngx_http_charset_loc_conf_t
, override_charset
),
163 { ngx_string("charset_types"),
164 NGX_HTTP_MAIN_CONF
|NGX_HTTP_SRV_CONF
|NGX_HTTP_LOC_CONF
|NGX_CONF_1MORE
,
166 NGX_HTTP_LOC_CONF_OFFSET
,
167 offsetof(ngx_http_charset_loc_conf_t
, types_keys
),
168 &ngx_http_charset_default_types
[0] },
170 { ngx_string("charset_map"),
171 NGX_HTTP_MAIN_CONF
|NGX_CONF_BLOCK
|NGX_CONF_TAKE2
,
172 ngx_http_charset_map_block
,
173 NGX_HTTP_MAIN_CONF_OFFSET
,
181 static ngx_http_module_t ngx_http_charset_filter_module_ctx
= {
182 NULL
, /* preconfiguration */
183 ngx_http_charset_postconfiguration
, /* postconfiguration */
185 ngx_http_charset_create_main_conf
, /* create main configuration */
186 NULL
, /* init main configuration */
188 NULL
, /* create server configuration */
189 NULL
, /* merge server configuration */
191 ngx_http_charset_create_loc_conf
, /* create location configuration */
192 ngx_http_charset_merge_loc_conf
/* merge location configuration */
196 ngx_module_t ngx_http_charset_filter_module
= {
198 &ngx_http_charset_filter_module_ctx
, /* module context */
199 ngx_http_charset_filter_commands
, /* module directives */
200 NGX_HTTP_MODULE
, /* module type */
201 NULL
, /* init master */
202 NULL
, /* init module */
203 NULL
, /* init process */
204 NULL
, /* init thread */
205 NULL
, /* exit thread */
206 NULL
, /* exit process */
207 NULL
, /* exit master */
208 NGX_MODULE_V1_PADDING
212 static ngx_http_output_header_filter_pt ngx_http_next_header_filter
;
213 static ngx_http_output_body_filter_pt ngx_http_next_body_filter
;
217 ngx_http_charset_header_filter(ngx_http_request_t
*r
)
219 ngx_int_t charset
, source_charset
;
221 ngx_http_charset_t
*charsets
;
222 ngx_http_charset_main_conf_t
*mcf
;
225 charset
= ngx_http_destination_charset(r
, &dst
);
228 charset
= ngx_http_main_request_charset(r
, &dst
);
231 if (charset
== NGX_ERROR
) {
235 if (charset
== NGX_DECLINED
) {
236 return ngx_http_next_header_filter(r
);
239 /* charset: charset index or NGX_HTTP_NO_CHARSET */
241 source_charset
= ngx_http_source_charset(r
, &src
);
243 if (source_charset
== NGX_ERROR
) {
248 * source_charset: charset index, NGX_HTTP_NO_CHARSET,
249 * or NGX_HTTP_CHARSET_OFF
252 ngx_log_debug2(NGX_LOG_DEBUG_HTTP
, r
->connection
->log
, 0,
253 "charset: \"%V\" > \"%V\"", &src
, &dst
);
255 if (source_charset
== NGX_HTTP_CHARSET_OFF
) {
256 ngx_http_set_charset(r
, &dst
);
258 return ngx_http_next_header_filter(r
);
261 if (charset
== NGX_HTTP_NO_CHARSET
262 || source_charset
== NGX_HTTP_NO_CHARSET
)
264 if (source_charset
!= charset
265 || ngx_strncasecmp(dst
.data
, src
.data
, dst
.len
) != 0)
270 ngx_http_set_charset(r
, &dst
);
272 return ngx_http_next_header_filter(r
);
275 mcf
= ngx_http_get_module_main_conf(r
, ngx_http_charset_filter_module
);
276 charsets
= mcf
->charsets
.elts
;
278 if (source_charset
!= charset
279 && (charsets
[source_charset
].tables
== NULL
280 || charsets
[source_charset
].tables
[charset
] == NULL
))
285 r
->headers_out
.content_type
.len
= r
->headers_out
.content_type_len
;
287 ngx_http_set_charset(r
, &dst
);
289 if (source_charset
!= charset
) {
290 return ngx_http_charset_ctx(r
, charsets
, charset
, source_charset
);
293 return ngx_http_next_header_filter(r
);
297 ngx_log_error(NGX_LOG_ERR
, r
->connection
->log
, 0,
298 "no \"charset_map\" between the charsets \"%V\" and \"%V\"",
301 return ngx_http_next_header_filter(r
);
306 ngx_http_destination_charset(ngx_http_request_t
*r
, ngx_str_t
*name
)
309 ngx_http_charset_t
*charsets
;
310 ngx_http_variable_value_t
*vv
;
311 ngx_http_charset_loc_conf_t
*mlcf
;
312 ngx_http_charset_main_conf_t
*mcf
;
314 if (!r
->ignore_content_encoding
315 && r
->headers_out
.content_encoding
316 && r
->headers_out
.content_encoding
->value
.len
)
321 if (r
->headers_out
.content_type
.len
== 0) {
325 if (r
->headers_out
.override_charset
326 && r
->headers_out
.override_charset
->len
)
328 *name
= *r
->headers_out
.override_charset
;
330 charset
= ngx_http_get_charset(r
, name
);
332 if (charset
!= NGX_HTTP_NO_CHARSET
) {
336 ngx_log_error(NGX_LOG_ERR
, r
->connection
->log
, 0,
337 "unknown charset \"%V\" to override", name
);
342 mlcf
= ngx_http_get_module_loc_conf(r
, ngx_http_charset_filter_module
);
343 charset
= mlcf
->charset
;
345 if (charset
== NGX_HTTP_CHARSET_OFF
) {
349 if (r
->headers_out
.charset
.len
) {
350 if (mlcf
->override_charset
== 0) {
355 if (ngx_http_test_content_type(r
, &mlcf
->types
) == NULL
) {
360 if (charset
< NGX_HTTP_CHARSET_VAR
) {
361 mcf
= ngx_http_get_module_main_conf(r
, ngx_http_charset_filter_module
);
362 charsets
= mcf
->charsets
.elts
;
363 *name
= charsets
[charset
].name
;
367 vv
= ngx_http_get_indexed_variable(r
, charset
- NGX_HTTP_CHARSET_VAR
);
369 if (vv
== NULL
|| vv
->not_found
) {
374 name
->data
= vv
->data
;
376 return ngx_http_get_charset(r
, name
);
381 ngx_http_main_request_charset(ngx_http_request_t
*r
, ngx_str_t
*src
)
384 ngx_str_t
*main_charset
;
385 ngx_http_charset_ctx_t
*ctx
;
387 ctx
= ngx_http_get_module_ctx(r
->main
, ngx_http_charset_filter_module
);
390 *src
= ctx
->charset_name
;
394 main_charset
= &r
->main
->headers_out
.charset
;
396 if (main_charset
->len
== 0) {
400 ctx
= ngx_pcalloc(r
->pool
, sizeof(ngx_http_charset_ctx_t
));
405 ngx_http_set_ctx(r
->main
, ctx
, ngx_http_charset_filter_module
);
407 charset
= ngx_http_get_charset(r
, main_charset
);
409 ctx
->charset
= charset
;
410 ctx
->charset_name
= *main_charset
;
411 *src
= *main_charset
;
418 ngx_http_source_charset(ngx_http_request_t
*r
, ngx_str_t
*name
)
421 ngx_http_charset_t
*charsets
;
422 ngx_http_variable_value_t
*vv
;
423 ngx_http_charset_loc_conf_t
*lcf
;
424 ngx_http_charset_main_conf_t
*mcf
;
426 if (r
->headers_out
.charset
.len
) {
427 *name
= r
->headers_out
.charset
;
428 return ngx_http_get_charset(r
, name
);
431 lcf
= ngx_http_get_module_loc_conf(r
, ngx_http_charset_filter_module
);
433 charset
= lcf
->source_charset
;
435 if (charset
== NGX_HTTP_CHARSET_OFF
) {
440 if (charset
< NGX_HTTP_CHARSET_VAR
) {
441 mcf
= ngx_http_get_module_main_conf(r
, ngx_http_charset_filter_module
);
442 charsets
= mcf
->charsets
.elts
;
443 *name
= charsets
[charset
].name
;
447 vv
= ngx_http_get_indexed_variable(r
, charset
- NGX_HTTP_CHARSET_VAR
);
449 if (vv
== NULL
|| vv
->not_found
) {
454 name
->data
= vv
->data
;
456 return ngx_http_get_charset(r
, name
);
461 ngx_http_get_charset(ngx_http_request_t
*r
, ngx_str_t
*name
)
464 ngx_http_charset_t
*charset
;
465 ngx_http_charset_main_conf_t
*mcf
;
467 mcf
= ngx_http_get_module_main_conf(r
, ngx_http_charset_filter_module
);
469 charset
= mcf
->charsets
.elts
;
470 n
= mcf
->charsets
.nelts
;
472 for (i
= 0; i
< n
; i
++) {
473 if (charset
[i
].name
.len
!= name
->len
) {
477 if (ngx_strncasecmp(charset
[i
].name
.data
, name
->data
, name
->len
) == 0) {
482 return NGX_HTTP_NO_CHARSET
;
486 static ngx_inline
void
487 ngx_http_set_charset(ngx_http_request_t
*r
, ngx_str_t
*charset
)
493 if (r
->headers_out
.status
== NGX_HTTP_MOVED_PERMANENTLY
494 || r
->headers_out
.status
== NGX_HTTP_MOVED_TEMPORARILY
)
497 * do not set charset for the redirect because NN 4.x
498 * use this charset instead of the next page charset
501 r
->headers_out
.charset
.len
= 0;
505 r
->headers_out
.charset
= *charset
;
510 ngx_http_charset_ctx(ngx_http_request_t
*r
, ngx_http_charset_t
*charsets
,
511 ngx_int_t charset
, ngx_int_t source_charset
)
513 ngx_http_charset_ctx_t
*ctx
;
515 ctx
= ngx_pcalloc(r
->pool
, sizeof(ngx_http_charset_ctx_t
));
520 ngx_http_set_ctx(r
, ctx
, ngx_http_charset_filter_module
);
522 ctx
->table
= charsets
[source_charset
].tables
[charset
];
523 ctx
->charset
= charset
;
524 ctx
->charset_name
= charsets
[charset
].name
;
525 ctx
->length
= charsets
[charset
].length
;
526 ctx
->from_utf8
= charsets
[source_charset
].utf8
;
527 ctx
->to_utf8
= charsets
[charset
].utf8
;
529 r
->filter_need_in_memory
= 1;
531 if ((ctx
->to_utf8
|| ctx
->from_utf8
) && r
== r
->main
) {
532 ngx_http_clear_content_length(r
);
535 r
->filter_need_temporary
= 1;
538 return ngx_http_next_header_filter(r
);
543 ngx_http_charset_body_filter(ngx_http_request_t
*r
, ngx_chain_t
*in
)
547 ngx_chain_t
*cl
, *out
, **ll
;
548 ngx_http_charset_ctx_t
*ctx
;
550 ctx
= ngx_http_get_module_ctx(r
, ngx_http_charset_filter_module
);
552 if (ctx
== NULL
|| ctx
->table
== NULL
) {
553 return ngx_http_next_body_filter(r
, in
);
556 if ((ctx
->to_utf8
|| ctx
->from_utf8
) || ctx
->busy
) {
561 for (cl
= in
; cl
; cl
= cl
->next
) {
564 if (ngx_buf_size(b
) == 0) {
566 *ll
= ngx_alloc_chain_link(r
->pool
);
580 *ll
= ngx_http_charset_recode_to_utf8(r
->pool
, b
, ctx
);
583 *ll
= ngx_http_charset_recode_from_utf8(r
->pool
, b
, ctx
);
595 rc
= ngx_http_next_body_filter(r
, out
);
598 if (ctx
->busy
== NULL
) {
602 for (cl
= ctx
->busy
; cl
->next
; cl
= cl
->next
) { /* void */ }
612 if (ngx_buf_size(b
) != 0) {
616 ctx
->busy
= cl
->next
;
618 if (b
->tag
!= (ngx_buf_tag_t
) &ngx_http_charset_filter_module
) {
623 b
->shadow
->pos
= b
->shadow
->last
;
627 cl
->next
= ctx
->free_buffers
;
628 ctx
->free_buffers
= cl
;
632 cl
->next
= ctx
->free_bufs
;
639 for (cl
= in
; cl
; cl
= cl
->next
) {
640 (void) ngx_http_charset_recode(cl
->buf
, ctx
->table
);
643 return ngx_http_next_body_filter(r
, in
);
648 ngx_http_charset_recode(ngx_buf_t
*b
, u_char
*table
)
654 for (p
= b
->pos
; p
< last
; p
++) {
656 if (*p
!= table
[*p
]) {
666 if (*p
!= table
[*p
]) {
681 ngx_http_charset_recode_from_utf8(ngx_pool_t
*pool
, ngx_buf_t
*buf
,
682 ngx_http_charset_ctx_t
*ctx
)
685 u_char c
, *p
, *src
, *dst
, *saved
, **table
;
689 ngx_chain_t
*out
, *cl
, **ll
;
693 if (ctx
->saved_len
== 0) {
695 for ( /* void */ ; src
< buf
->last
; src
++) {
701 len
= src
- buf
->pos
;
704 out
= ngx_http_charset_get_buf(pool
, ctx
);
711 b
->temporary
= buf
->temporary
;
712 b
->memory
= buf
->memory
;
714 b
->flush
= buf
->flush
;
722 size
= buf
->last
- src
;
725 n
= ngx_utf8_decode(&saved
, size
);
727 if (n
== 0xfffffffe) {
728 /* incomplete UTF-8 symbol */
730 ngx_memcpy(ctx
->saved
, src
, size
);
731 ctx
->saved_len
= size
;
740 size
= len
+ buf
->last
- src
;
744 if (size
< NGX_HTML_ENTITY_LEN
) {
745 size
+= NGX_HTML_ENTITY_LEN
;
748 cl
= ngx_http_charset_get_buffer(pool
, ctx
, size
);
766 out
= ngx_alloc_chain_link(pool
);
777 /* process incomplete UTF sequence from previous buffer */
779 ngx_log_debug1(NGX_LOG_DEBUG_HTTP
, pool
->log
, 0,
780 "http charset utf saved: %z", ctx
->saved_len
);
784 for (i
= ctx
->saved_len
; i
< NGX_UTF_LEN
; i
++) {
785 ctx
->saved
[i
] = *p
++;
787 if (p
== buf
->last
) {
793 n
= ngx_utf8_decode(&saved
, i
);
798 table
= (u_char
**) ctx
->table
;
805 } else if (n
== 0xfffffffe) {
807 /* incomplete UTF-8 symbol */
809 if (i
< NGX_UTF_LEN
) {
810 out
= ngx_http_charset_get_buf(pool
, ctx
);
822 ngx_memcpy(&ctx
->saved
[ctx
->saved_len
], src
, i
);
829 size
= buf
->last
- buf
->pos
;
831 if (size
< NGX_HTML_ENTITY_LEN
) {
832 size
+= NGX_HTML_ENTITY_LEN
;
835 cl
= ngx_http_charset_get_buffer(pool
, ctx
, size
);
848 } else if (n
== 0xfffffffe) {
851 ngx_log_debug0(NGX_LOG_DEBUG_HTTP
, pool
->log
, 0,
852 "http charset invalid utf 0");
854 saved
= &ctx
->saved
[NGX_UTF_LEN
];
856 } else if (n
> 0x10ffff) {
859 ngx_log_debug0(NGX_LOG_DEBUG_HTTP
, pool
->log
, 0,
860 "http charset invalid utf 1");
863 dst
= ngx_sprintf(dst
, "&#%uD;", n
);
866 src
+= (saved
- ctx
->saved
) - ctx
->saved_len
;
873 table
= (u_char
**) ctx
->table
;
875 while (src
< buf
->last
) {
877 if ((size_t) (b
->end
- dst
) < NGX_HTML_ENTITY_LEN
) {
880 size
= buf
->last
- src
+ NGX_HTML_ENTITY_LEN
;
882 cl
= ngx_http_charset_get_buffer(pool
, ctx
, size
);
899 len
= buf
->last
- src
;
901 n
= ngx_utf8_decode(&src
, len
);
916 dst
= ngx_sprintf(dst
, "&#%uD;", n
);
921 if (n
== 0xfffffffe) {
922 /* incomplete UTF-8 symbol */
924 ngx_memcpy(ctx
->saved
, src
, len
);
925 ctx
->saved_len
= len
;
938 ngx_log_debug0(NGX_LOG_DEBUG_HTTP
, pool
->log
, 0,
939 "http charset invalid utf 2");
946 dst
= ngx_sprintf(dst
, "&#%uD;", n
);
951 b
->last_buf
= buf
->last_buf
;
952 b
->last_in_chain
= buf
->last_in_chain
;
953 b
->flush
= buf
->flush
;
962 ngx_http_charset_recode_to_utf8(ngx_pool_t
*pool
, ngx_buf_t
*buf
,
963 ngx_http_charset_ctx_t
*ctx
)
966 u_char
*p
, *src
, *dst
, *table
;
968 ngx_chain_t
*out
, *cl
, **ll
;
972 for (src
= buf
->pos
; src
< buf
->last
; src
++) {
973 if (table
[*src
* NGX_UTF_LEN
] == '\1') {
980 out
= ngx_alloc_chain_link(pool
);
993 * we assume that there are about half of characters to be recoded,
994 * so we preallocate "size / 2 + size / 2 * ctx->length"
997 len
= src
- buf
->pos
;
1000 out
= ngx_http_charset_get_buf(pool
, ctx
);
1007 b
->temporary
= buf
->temporary
;
1008 b
->memory
= buf
->memory
;
1009 b
->mmap
= buf
->mmap
;
1010 b
->flush
= buf
->flush
;
1018 size
= buf
->last
- src
;
1019 size
= size
/ 2 + size
/ 2 * ctx
->length
;
1024 size
= buf
->last
- src
;
1025 size
= len
+ size
/ 2 + size
/ 2 * ctx
->length
;
1030 cl
= ngx_http_charset_get_buffer(pool
, ctx
, size
);
1047 while (src
< buf
->last
) {
1049 p
= &table
[*src
++ * NGX_UTF_LEN
];
1052 if ((size_t) (b
->end
- dst
) < len
) {
1055 size
= buf
->last
- src
;
1056 size
= len
+ size
/ 2 + size
/ 2 * ctx
->length
;
1058 cl
= ngx_http_charset_get_buffer(pool
, ctx
, size
);
1078 b
->last_buf
= buf
->last_buf
;
1079 b
->last_in_chain
= buf
->last_in_chain
;
1080 b
->flush
= buf
->flush
;
1088 static ngx_chain_t
*
1089 ngx_http_charset_get_buf(ngx_pool_t
*pool
, ngx_http_charset_ctx_t
*ctx
)
1093 cl
= ctx
->free_bufs
;
1096 ctx
->free_bufs
= cl
->next
;
1098 cl
->buf
->shadow
= NULL
;
1104 cl
= ngx_alloc_chain_link(pool
);
1109 cl
->buf
= ngx_calloc_buf(pool
);
1110 if (cl
->buf
== NULL
) {
1116 cl
->buf
->tag
= (ngx_buf_tag_t
) &ngx_http_charset_filter_module
;
1122 static ngx_chain_t
*
1123 ngx_http_charset_get_buffer(ngx_pool_t
*pool
, ngx_http_charset_ctx_t
*ctx
,
1127 ngx_chain_t
*cl
, **ll
;
1129 for (ll
= &ctx
->free_buffers
, cl
= ctx
->free_buffers
;
1131 ll
= &cl
->next
, cl
= cl
->next
)
1135 if ((size_t) (b
->end
- b
->start
) >= size
) {
1147 cl
= ngx_alloc_chain_link(pool
);
1152 cl
->buf
= ngx_create_temp_buf(pool
, size
);
1153 if (cl
->buf
== NULL
) {
1159 cl
->buf
->temporary
= 1;
1160 cl
->buf
->tag
= (ngx_buf_tag_t
) &ngx_http_charset_filter_module
;
1167 ngx_http_charset_map_block(ngx_conf_t
*cf
, ngx_command_t
*cmd
, void *conf
)
1169 ngx_http_charset_main_conf_t
*mcf
= conf
;
1172 u_char
*p
, *dst2src
, **pp
;
1177 ngx_http_charset_t
*charset
;
1178 ngx_http_charset_tables_t
*table
;
1179 ngx_http_charset_conf_ctx_t ctx
;
1181 value
= cf
->args
->elts
;
1183 src
= ngx_http_add_charset(&mcf
->charsets
, &value
[1]);
1184 if (src
== NGX_ERROR
) {
1185 return NGX_CONF_ERROR
;
1188 dst
= ngx_http_add_charset(&mcf
->charsets
, &value
[2]);
1189 if (dst
== NGX_ERROR
) {
1190 return NGX_CONF_ERROR
;
1194 ngx_conf_log_error(NGX_LOG_EMERG
, cf
, 0,
1195 "\"charset_map\" between the same charsets "
1196 "\"%V\" and \"%V\"", &value
[1], &value
[2]);
1197 return NGX_CONF_ERROR
;
1200 table
= mcf
->tables
.elts
;
1201 for (i
= 0; i
< mcf
->tables
.nelts
; i
++) {
1202 if ((src
== table
->src
&& dst
== table
->dst
)
1203 || (src
== table
->dst
&& dst
== table
->src
))
1205 ngx_conf_log_error(NGX_LOG_EMERG
, cf
, 0,
1206 "duplicate \"charset_map\" between "
1207 "\"%V\" and \"%V\"", &value
[1], &value
[2]);
1208 return NGX_CONF_ERROR
;
1212 table
= ngx_array_push(&mcf
->tables
);
1213 if (table
== NULL
) {
1214 return NGX_CONF_ERROR
;
1220 if (ngx_strcasecmp(value
[2].data
, (u_char
*) "utf-8") == 0) {
1221 table
->src2dst
= ngx_pcalloc(cf
->pool
, 256 * NGX_UTF_LEN
);
1222 if (table
->src2dst
== NULL
) {
1223 return NGX_CONF_ERROR
;
1226 table
->dst2src
= ngx_pcalloc(cf
->pool
, 256 * sizeof(void *));
1227 if (table
->dst2src
== NULL
) {
1228 return NGX_CONF_ERROR
;
1231 dst2src
= ngx_pcalloc(cf
->pool
, 256);
1232 if (dst2src
== NULL
) {
1233 return NGX_CONF_ERROR
;
1236 pp
= (u_char
**) &table
->dst2src
[0];
1239 for (i
= 0; i
< 128; i
++) {
1240 p
= &table
->src2dst
[i
* NGX_UTF_LEN
];
1243 dst2src
[i
] = (u_char
) i
;
1246 for (/* void */; i
< 256; i
++) {
1247 p
= &table
->src2dst
[i
* NGX_UTF_LEN
];
1253 table
->src2dst
= ngx_palloc(cf
->pool
, 256);
1254 if (table
->src2dst
== NULL
) {
1255 return NGX_CONF_ERROR
;
1258 table
->dst2src
= ngx_palloc(cf
->pool
, 256);
1259 if (table
->dst2src
== NULL
) {
1260 return NGX_CONF_ERROR
;
1263 for (i
= 0; i
< 128; i
++) {
1264 table
->src2dst
[i
] = (u_char
) i
;
1265 table
->dst2src
[i
] = (u_char
) i
;
1268 for (/* void */; i
< 256; i
++) {
1269 table
->src2dst
[i
] = '?';
1270 table
->dst2src
[i
] = '?';
1274 charset
= mcf
->charsets
.elts
;
1277 ctx
.charset
= &charset
[dst
];
1282 cf
->handler
= ngx_http_charset_map
;
1283 cf
->handler_conf
= conf
;
1285 rv
= ngx_conf_parse(cf
, NULL
);
1289 if (ctx
.characters
) {
1290 n
= ctx
.charset
->length
;
1291 ctx
.charset
->length
/= ctx
.characters
;
1293 if (((n
* 10) / ctx
.characters
) % 10 > 4) {
1294 ctx
.charset
->length
++;
1303 ngx_http_charset_map(ngx_conf_t
*cf
, ngx_command_t
*dummy
, void *conf
)
1305 u_char
*p
, *dst2src
, **pp
;
1310 ngx_http_charset_tables_t
*table
;
1311 ngx_http_charset_conf_ctx_t
*ctx
;
1313 if (cf
->args
->nelts
!= 2) {
1314 ngx_conf_log_error(NGX_LOG_EMERG
, cf
, 0, "invalid parameters number");
1315 return NGX_CONF_ERROR
;
1318 value
= cf
->args
->elts
;
1320 src
= ngx_hextoi(value
[0].data
, value
[0].len
);
1321 if (src
== NGX_ERROR
|| src
> 255) {
1322 ngx_conf_log_error(NGX_LOG_EMERG
, cf
, 0,
1323 "invalid value \"%V\"", &value
[0]);
1324 return NGX_CONF_ERROR
;
1330 if (ctx
->charset
->utf8
) {
1331 p
= &table
->src2dst
[src
* NGX_UTF_LEN
];
1333 *p
++ = (u_char
) (value
[1].len
/ 2);
1335 for (i
= 0; i
< value
[1].len
; i
+= 2) {
1336 dst
= ngx_hextoi(&value
[1].data
[i
], 2);
1337 if (dst
== NGX_ERROR
|| dst
> 255) {
1338 ngx_conf_log_error(NGX_LOG_EMERG
, cf
, 0,
1339 "invalid value \"%V\"", &value
[1]);
1340 return NGX_CONF_ERROR
;
1343 *p
++ = (u_char
) dst
;
1348 ctx
->charset
->length
+= i
;
1351 p
= &table
->src2dst
[src
* NGX_UTF_LEN
] + 1;
1353 n
= ngx_utf8_decode(&p
, i
);
1356 ngx_conf_log_error(NGX_LOG_EMERG
, cf
, 0,
1357 "invalid value \"%V\"", &value
[1]);
1358 return NGX_CONF_ERROR
;
1361 pp
= (u_char
**) &table
->dst2src
[0];
1363 dst2src
= pp
[n
>> 8];
1365 if (dst2src
== NULL
) {
1366 dst2src
= ngx_pcalloc(cf
->pool
, 256);
1367 if (dst2src
== NULL
) {
1368 return NGX_CONF_ERROR
;
1371 pp
[n
>> 8] = dst2src
;
1374 dst2src
[n
& 0xff] = (u_char
) src
;
1377 dst
= ngx_hextoi(value
[1].data
, value
[1].len
);
1378 if (dst
== NGX_ERROR
|| dst
> 255) {
1379 ngx_conf_log_error(NGX_LOG_EMERG
, cf
, 0,
1380 "invalid value \"%V\"", &value
[1]);
1381 return NGX_CONF_ERROR
;
1384 table
->src2dst
[src
] = (u_char
) dst
;
1385 table
->dst2src
[dst
] = (u_char
) src
;
1393 ngx_http_set_charset_slot(ngx_conf_t
*cf
, ngx_command_t
*cmd
, void *conf
)
1398 ngx_str_t
*value
, var
;
1399 ngx_http_charset_main_conf_t
*mcf
;
1401 cp
= (ngx_int_t
*) (p
+ cmd
->offset
);
1403 if (*cp
!= NGX_CONF_UNSET
) {
1404 return "is duplicate";
1407 value
= cf
->args
->elts
;
1409 if (cmd
->offset
== offsetof(ngx_http_charset_loc_conf_t
, charset
)
1410 && ngx_strcmp(value
[1].data
, "off") == 0)
1412 *cp
= NGX_HTTP_CHARSET_OFF
;
1417 if (value
[1].data
[0] == '$') {
1418 var
.len
= value
[1].len
- 1;
1419 var
.data
= value
[1].data
+ 1;
1421 *cp
= ngx_http_get_variable_index(cf
, &var
);
1423 if (*cp
== NGX_ERROR
) {
1424 return NGX_CONF_ERROR
;
1427 *cp
+= NGX_HTTP_CHARSET_VAR
;
1432 mcf
= ngx_http_conf_get_module_main_conf(cf
,
1433 ngx_http_charset_filter_module
);
1435 *cp
= ngx_http_add_charset(&mcf
->charsets
, &value
[1]);
1436 if (*cp
== NGX_ERROR
) {
1437 return NGX_CONF_ERROR
;
1445 ngx_http_add_charset(ngx_array_t
*charsets
, ngx_str_t
*name
)
1448 ngx_http_charset_t
*c
;
1451 for (i
= 0; i
< charsets
->nelts
; i
++) {
1452 if (name
->len
!= c
[i
].name
.len
) {
1456 if (ngx_strcasecmp(name
->data
, c
[i
].name
.data
) == 0) {
1461 if (i
< charsets
->nelts
) {
1465 c
= ngx_array_push(charsets
);
1474 if (ngx_strcasecmp(name
->data
, (u_char
*) "utf-8") == 0) {
1486 ngx_http_charset_create_main_conf(ngx_conf_t
*cf
)
1488 ngx_http_charset_main_conf_t
*mcf
;
1490 mcf
= ngx_pcalloc(cf
->pool
, sizeof(ngx_http_charset_main_conf_t
));
1495 if (ngx_array_init(&mcf
->charsets
, cf
->pool
, 2, sizeof(ngx_http_charset_t
))
1501 if (ngx_array_init(&mcf
->tables
, cf
->pool
, 1,
1502 sizeof(ngx_http_charset_tables_t
))
1508 if (ngx_array_init(&mcf
->recodes
, cf
->pool
, 2,
1509 sizeof(ngx_http_charset_recode_t
))
1520 ngx_http_charset_create_loc_conf(ngx_conf_t
*cf
)
1522 ngx_http_charset_loc_conf_t
*lcf
;
1524 lcf
= ngx_pcalloc(cf
->pool
, sizeof(ngx_http_charset_loc_conf_t
));
1530 * set by ngx_pcalloc():
1532 * lcf->types = { NULL };
1533 * lcf->types_keys = NULL;
1536 lcf
->charset
= NGX_CONF_UNSET
;
1537 lcf
->source_charset
= NGX_CONF_UNSET
;
1538 lcf
->override_charset
= NGX_CONF_UNSET
;
1545 ngx_http_charset_merge_loc_conf(ngx_conf_t
*cf
, void *parent
, void *child
)
1547 ngx_http_charset_loc_conf_t
*prev
= parent
;
1548 ngx_http_charset_loc_conf_t
*conf
= child
;
1551 ngx_http_charset_recode_t
*recode
;
1552 ngx_http_charset_main_conf_t
*mcf
;
1554 if (ngx_http_merge_types(cf
, &conf
->types_keys
, &conf
->types
,
1555 &prev
->types_keys
, &prev
->types
,
1556 ngx_http_charset_default_types
)
1559 return NGX_CONF_ERROR
;
1562 ngx_conf_merge_value(conf
->override_charset
, prev
->override_charset
, 0);
1563 ngx_conf_merge_value(conf
->charset
, prev
->charset
, NGX_HTTP_CHARSET_OFF
);
1564 ngx_conf_merge_value(conf
->source_charset
, prev
->source_charset
,
1565 NGX_HTTP_CHARSET_OFF
);
1567 if (conf
->charset
== NGX_HTTP_CHARSET_OFF
1568 || conf
->source_charset
== NGX_HTTP_CHARSET_OFF
1569 || conf
->charset
== conf
->source_charset
)
1574 if (conf
->source_charset
>= NGX_HTTP_CHARSET_VAR
1575 || conf
->charset
>= NGX_HTTP_CHARSET_VAR
)
1580 mcf
= ngx_http_conf_get_module_main_conf(cf
,
1581 ngx_http_charset_filter_module
);
1582 recode
= mcf
->recodes
.elts
;
1583 for (i
= 0; i
< mcf
->recodes
.nelts
; i
++) {
1584 if (conf
->source_charset
== recode
[i
].src
1585 && conf
->charset
== recode
[i
].dst
)
1591 recode
= ngx_array_push(&mcf
->recodes
);
1592 if (recode
== NULL
) {
1593 return NGX_CONF_ERROR
;
1596 recode
->src
= conf
->source_charset
;
1597 recode
->dst
= conf
->charset
;
1604 ngx_http_charset_postconfiguration(ngx_conf_t
*cf
)
1606 u_char
**src
, **dst
;
1609 ngx_http_charset_t
*charset
;
1610 ngx_http_charset_recode_t
*recode
;
1611 ngx_http_charset_tables_t
*tables
;
1612 ngx_http_charset_main_conf_t
*mcf
;
1614 mcf
= ngx_http_conf_get_module_main_conf(cf
,
1615 ngx_http_charset_filter_module
);
1617 recode
= mcf
->recodes
.elts
;
1618 tables
= mcf
->tables
.elts
;
1619 charset
= mcf
->charsets
.elts
;
1621 for (i
= 0; i
< mcf
->recodes
.nelts
; i
++) {
1625 for (t
= 0; t
< mcf
->tables
.nelts
; t
++) {
1627 if (c
== tables
[t
].src
&& recode
[i
].dst
== tables
[t
].dst
) {
1631 if (c
== tables
[t
].dst
&& recode
[i
].dst
== tables
[t
].src
) {
1636 ngx_log_error(NGX_LOG_EMERG
, cf
->log
, 0,
1637 "no \"charset_map\" between the charsets \"%V\" and \"%V\"",
1638 &charset
[c
].name
, &charset
[recode
[i
].dst
].name
);
1646 for (t
= 0; t
< mcf
->tables
.nelts
; t
++) {
1648 src
= charset
[tables
[t
].src
].tables
;
1651 src
= ngx_pcalloc(cf
->pool
, sizeof(u_char
*) * mcf
->charsets
.nelts
);
1656 charset
[tables
[t
].src
].tables
= src
;
1659 dst
= charset
[tables
[t
].dst
].tables
;
1662 dst
= ngx_pcalloc(cf
->pool
, sizeof(u_char
*) * mcf
->charsets
.nelts
);
1667 charset
[tables
[t
].dst
].tables
= dst
;
1670 src
[tables
[t
].dst
] = tables
[t
].src2dst
;
1671 dst
[tables
[t
].src
] = tables
[t
].dst2src
;
1674 ngx_http_next_header_filter
= ngx_http_top_header_filter
;
1675 ngx_http_top_header_filter
= ngx_http_charset_header_filter
;
1677 ngx_http_next_body_filter
= ngx_http_top_body_filter
;
1678 ngx_http_top_body_filter
= ngx_http_charset_body_filter
;