Update and clean Tomato RAF files
[tomato.git] / release / src / router / nginx / src / http / modules / ngx_http_charset_filter_module.c
blob27a00d09aeeed8f019c827c5ed8c927a97cc1af9
2 /*
3 * Copyright (C) Igor Sysoev
4 * Copyright (C) Nginx, Inc.
5 */
8 #include <ngx_config.h>
9 #include <ngx_core.h>
10 #include <ngx_http.h>
13 #define NGX_HTTP_CHARSET_OFF -2
14 #define NGX_HTTP_NO_CHARSET -3
15 #define NGX_HTTP_CHARSET_VAR 0x10000
17 /* 1 byte length and up to 3 bytes for the UTF-8 encoding of the UCS-2 */
18 #define NGX_UTF_LEN 4
20 #define NGX_HTML_ENTITY_LEN (sizeof("&#1114111;") - 1)
23 typedef struct {
24 u_char **tables;
25 ngx_str_t name;
27 unsigned length:16;
28 unsigned utf8:1;
29 } ngx_http_charset_t;
32 typedef struct {
33 ngx_int_t src;
34 ngx_int_t dst;
35 } ngx_http_charset_recode_t;
38 typedef struct {
39 ngx_int_t src;
40 ngx_int_t dst;
41 u_char *src2dst;
42 u_char *dst2src;
43 } ngx_http_charset_tables_t;
46 typedef struct {
47 ngx_array_t charsets; /* ngx_http_charset_t */
48 ngx_array_t tables; /* ngx_http_charset_tables_t */
49 ngx_array_t recodes; /* ngx_http_charset_recode_t */
50 } ngx_http_charset_main_conf_t;
53 typedef struct {
54 ngx_int_t charset;
55 ngx_int_t source_charset;
56 ngx_flag_t override_charset;
58 ngx_hash_t types;
59 ngx_array_t *types_keys;
60 } ngx_http_charset_loc_conf_t;
63 typedef struct {
64 u_char *table;
65 ngx_int_t charset;
66 ngx_str_t charset_name;
68 ngx_chain_t *busy;
69 ngx_chain_t *free_bufs;
70 ngx_chain_t *free_buffers;
72 size_t saved_len;
73 u_char saved[NGX_UTF_LEN];
75 unsigned length:16;
76 unsigned from_utf8:1;
77 unsigned to_utf8:1;
78 } ngx_http_charset_ctx_t;
81 typedef struct {
82 ngx_http_charset_tables_t *table;
83 ngx_http_charset_t *charset;
84 ngx_uint_t characters;
85 } ngx_http_charset_conf_ctx_t;
88 static ngx_int_t ngx_http_destination_charset(ngx_http_request_t *r,
89 ngx_str_t *name);
90 static ngx_int_t ngx_http_main_request_charset(ngx_http_request_t *r,
91 ngx_str_t *name);
92 static ngx_int_t ngx_http_source_charset(ngx_http_request_t *r,
93 ngx_str_t *name);
94 static ngx_int_t ngx_http_get_charset(ngx_http_request_t *r, ngx_str_t *name);
95 static ngx_inline void ngx_http_set_charset(ngx_http_request_t *r,
96 ngx_str_t *charset);
97 static ngx_int_t ngx_http_charset_ctx(ngx_http_request_t *r,
98 ngx_http_charset_t *charsets, ngx_int_t charset, ngx_int_t source_charset);
99 static ngx_uint_t ngx_http_charset_recode(ngx_buf_t *b, u_char *table);
100 static ngx_chain_t *ngx_http_charset_recode_from_utf8(ngx_pool_t *pool,
101 ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx);
102 static ngx_chain_t *ngx_http_charset_recode_to_utf8(ngx_pool_t *pool,
103 ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx);
105 static ngx_chain_t *ngx_http_charset_get_buf(ngx_pool_t *pool,
106 ngx_http_charset_ctx_t *ctx);
107 static ngx_chain_t *ngx_http_charset_get_buffer(ngx_pool_t *pool,
108 ngx_http_charset_ctx_t *ctx, size_t size);
110 static char *ngx_http_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd,
111 void *conf);
112 static char *ngx_http_charset_map(ngx_conf_t *cf, ngx_command_t *dummy,
113 void *conf);
115 static char *ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd,
116 void *conf);
117 static ngx_int_t ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name);
119 static void *ngx_http_charset_create_main_conf(ngx_conf_t *cf);
120 static void *ngx_http_charset_create_loc_conf(ngx_conf_t *cf);
121 static char *ngx_http_charset_merge_loc_conf(ngx_conf_t *cf,
122 void *parent, void *child);
123 static ngx_int_t ngx_http_charset_postconfiguration(ngx_conf_t *cf);
126 ngx_str_t ngx_http_charset_default_types[] = {
127 ngx_string("text/html"),
128 ngx_string("text/xml"),
129 ngx_string("text/plain"),
130 ngx_string("text/vnd.wap.wml"),
131 ngx_string("application/x-javascript"),
132 ngx_string("application/rss+xml"),
133 ngx_null_string
137 static ngx_command_t ngx_http_charset_filter_commands[] = {
139 { ngx_string("charset"),
140 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
141 |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1,
142 ngx_http_set_charset_slot,
143 NGX_HTTP_LOC_CONF_OFFSET,
144 offsetof(ngx_http_charset_loc_conf_t, charset),
145 NULL },
147 { ngx_string("source_charset"),
148 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
149 |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1,
150 ngx_http_set_charset_slot,
151 NGX_HTTP_LOC_CONF_OFFSET,
152 offsetof(ngx_http_charset_loc_conf_t, source_charset),
153 NULL },
155 { ngx_string("override_charset"),
156 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
157 |NGX_HTTP_LIF_CONF|NGX_CONF_FLAG,
158 ngx_conf_set_flag_slot,
159 NGX_HTTP_LOC_CONF_OFFSET,
160 offsetof(ngx_http_charset_loc_conf_t, override_charset),
161 NULL },
163 { ngx_string("charset_types"),
164 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_1MORE,
165 ngx_http_types_slot,
166 NGX_HTTP_LOC_CONF_OFFSET,
167 offsetof(ngx_http_charset_loc_conf_t, types_keys),
168 &ngx_http_charset_default_types[0] },
170 { ngx_string("charset_map"),
171 NGX_HTTP_MAIN_CONF|NGX_CONF_BLOCK|NGX_CONF_TAKE2,
172 ngx_http_charset_map_block,
173 NGX_HTTP_MAIN_CONF_OFFSET,
175 NULL },
177 ngx_null_command
181 static ngx_http_module_t ngx_http_charset_filter_module_ctx = {
182 NULL, /* preconfiguration */
183 ngx_http_charset_postconfiguration, /* postconfiguration */
185 ngx_http_charset_create_main_conf, /* create main configuration */
186 NULL, /* init main configuration */
188 NULL, /* create server configuration */
189 NULL, /* merge server configuration */
191 ngx_http_charset_create_loc_conf, /* create location configuration */
192 ngx_http_charset_merge_loc_conf /* merge location configuration */
196 ngx_module_t ngx_http_charset_filter_module = {
197 NGX_MODULE_V1,
198 &ngx_http_charset_filter_module_ctx, /* module context */
199 ngx_http_charset_filter_commands, /* module directives */
200 NGX_HTTP_MODULE, /* module type */
201 NULL, /* init master */
202 NULL, /* init module */
203 NULL, /* init process */
204 NULL, /* init thread */
205 NULL, /* exit thread */
206 NULL, /* exit process */
207 NULL, /* exit master */
208 NGX_MODULE_V1_PADDING
212 static ngx_http_output_header_filter_pt ngx_http_next_header_filter;
213 static ngx_http_output_body_filter_pt ngx_http_next_body_filter;
216 static ngx_int_t
217 ngx_http_charset_header_filter(ngx_http_request_t *r)
219 ngx_int_t charset, source_charset;
220 ngx_str_t dst, src;
221 ngx_http_charset_t *charsets;
222 ngx_http_charset_main_conf_t *mcf;
224 if (r == r->main) {
225 charset = ngx_http_destination_charset(r, &dst);
227 } else {
228 charset = ngx_http_main_request_charset(r, &dst);
231 if (charset == NGX_ERROR) {
232 return NGX_ERROR;
235 if (charset == NGX_DECLINED) {
236 return ngx_http_next_header_filter(r);
239 /* charset: charset index or NGX_HTTP_NO_CHARSET */
241 source_charset = ngx_http_source_charset(r, &src);
243 if (source_charset == NGX_ERROR) {
244 return NGX_ERROR;
248 * source_charset: charset index, NGX_HTTP_NO_CHARSET,
249 * or NGX_HTTP_CHARSET_OFF
252 ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
253 "charset: \"%V\" > \"%V\"", &src, &dst);
255 if (source_charset == NGX_HTTP_CHARSET_OFF) {
256 ngx_http_set_charset(r, &dst);
258 return ngx_http_next_header_filter(r);
261 if (charset == NGX_HTTP_NO_CHARSET
262 || source_charset == NGX_HTTP_NO_CHARSET)
264 if (source_charset != charset
265 || ngx_strncasecmp(dst.data, src.data, dst.len) != 0)
267 goto no_charset_map;
270 ngx_http_set_charset(r, &dst);
272 return ngx_http_next_header_filter(r);
275 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
276 charsets = mcf->charsets.elts;
278 if (source_charset != charset
279 && (charsets[source_charset].tables == NULL
280 || charsets[source_charset].tables[charset] == NULL))
282 goto no_charset_map;
285 r->headers_out.content_type.len = r->headers_out.content_type_len;
287 ngx_http_set_charset(r, &dst);
289 if (source_charset != charset) {
290 return ngx_http_charset_ctx(r, charsets, charset, source_charset);
293 return ngx_http_next_header_filter(r);
295 no_charset_map:
297 ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
298 "no \"charset_map\" between the charsets \"%V\" and \"%V\"",
299 &src, &dst);
301 return ngx_http_next_header_filter(r);
305 static ngx_int_t
306 ngx_http_destination_charset(ngx_http_request_t *r, ngx_str_t *name)
308 ngx_int_t charset;
309 ngx_http_charset_t *charsets;
310 ngx_http_variable_value_t *vv;
311 ngx_http_charset_loc_conf_t *mlcf;
312 ngx_http_charset_main_conf_t *mcf;
314 if (!r->ignore_content_encoding
315 && r->headers_out.content_encoding
316 && r->headers_out.content_encoding->value.len)
318 return NGX_DECLINED;
321 if (r->headers_out.content_type.len == 0) {
322 return NGX_DECLINED;
325 if (r->headers_out.override_charset
326 && r->headers_out.override_charset->len)
328 *name = *r->headers_out.override_charset;
330 charset = ngx_http_get_charset(r, name);
332 if (charset != NGX_HTTP_NO_CHARSET) {
333 return charset;
336 ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
337 "unknown charset \"%V\" to override", name);
339 return NGX_DECLINED;
342 mlcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module);
343 charset = mlcf->charset;
345 if (charset == NGX_HTTP_CHARSET_OFF) {
346 return NGX_DECLINED;
349 if (r->headers_out.charset.len) {
350 if (mlcf->override_charset == 0) {
351 return NGX_DECLINED;
354 } else {
355 if (ngx_http_test_content_type(r, &mlcf->types) == NULL) {
356 return NGX_DECLINED;
360 if (charset < NGX_HTTP_CHARSET_VAR) {
361 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
362 charsets = mcf->charsets.elts;
363 *name = charsets[charset].name;
364 return charset;
367 vv = ngx_http_get_indexed_variable(r, charset - NGX_HTTP_CHARSET_VAR);
369 if (vv == NULL || vv->not_found) {
370 return NGX_ERROR;
373 name->len = vv->len;
374 name->data = vv->data;
376 return ngx_http_get_charset(r, name);
380 static ngx_int_t
381 ngx_http_main_request_charset(ngx_http_request_t *r, ngx_str_t *src)
383 ngx_int_t charset;
384 ngx_str_t *main_charset;
385 ngx_http_charset_ctx_t *ctx;
387 ctx = ngx_http_get_module_ctx(r->main, ngx_http_charset_filter_module);
389 if (ctx) {
390 *src = ctx->charset_name;
391 return ctx->charset;
394 main_charset = &r->main->headers_out.charset;
396 if (main_charset->len == 0) {
397 return NGX_DECLINED;
400 ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
401 if (ctx == NULL) {
402 return NGX_ERROR;
405 ngx_http_set_ctx(r->main, ctx, ngx_http_charset_filter_module);
407 charset = ngx_http_get_charset(r, main_charset);
409 ctx->charset = charset;
410 ctx->charset_name = *main_charset;
411 *src = *main_charset;
413 return charset;
417 static ngx_int_t
418 ngx_http_source_charset(ngx_http_request_t *r, ngx_str_t *name)
420 ngx_int_t charset;
421 ngx_http_charset_t *charsets;
422 ngx_http_variable_value_t *vv;
423 ngx_http_charset_loc_conf_t *lcf;
424 ngx_http_charset_main_conf_t *mcf;
426 if (r->headers_out.charset.len) {
427 *name = r->headers_out.charset;
428 return ngx_http_get_charset(r, name);
431 lcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module);
433 charset = lcf->source_charset;
435 if (charset == NGX_HTTP_CHARSET_OFF) {
436 name->len = 0;
437 return charset;
440 if (charset < NGX_HTTP_CHARSET_VAR) {
441 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
442 charsets = mcf->charsets.elts;
443 *name = charsets[charset].name;
444 return charset;
447 vv = ngx_http_get_indexed_variable(r, charset - NGX_HTTP_CHARSET_VAR);
449 if (vv == NULL || vv->not_found) {
450 return NGX_ERROR;
453 name->len = vv->len;
454 name->data = vv->data;
456 return ngx_http_get_charset(r, name);
460 static ngx_int_t
461 ngx_http_get_charset(ngx_http_request_t *r, ngx_str_t *name)
463 ngx_uint_t i, n;
464 ngx_http_charset_t *charset;
465 ngx_http_charset_main_conf_t *mcf;
467 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
469 charset = mcf->charsets.elts;
470 n = mcf->charsets.nelts;
472 for (i = 0; i < n; i++) {
473 if (charset[i].name.len != name->len) {
474 continue;
477 if (ngx_strncasecmp(charset[i].name.data, name->data, name->len) == 0) {
478 return i;
482 return NGX_HTTP_NO_CHARSET;
486 static ngx_inline void
487 ngx_http_set_charset(ngx_http_request_t *r, ngx_str_t *charset)
489 if (r != r->main) {
490 return;
493 if (r->headers_out.status == NGX_HTTP_MOVED_PERMANENTLY
494 || r->headers_out.status == NGX_HTTP_MOVED_TEMPORARILY)
497 * do not set charset for the redirect because NN 4.x
498 * use this charset instead of the next page charset
501 r->headers_out.charset.len = 0;
502 return;
505 r->headers_out.charset = *charset;
509 static ngx_int_t
510 ngx_http_charset_ctx(ngx_http_request_t *r, ngx_http_charset_t *charsets,
511 ngx_int_t charset, ngx_int_t source_charset)
513 ngx_http_charset_ctx_t *ctx;
515 ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
516 if (ctx == NULL) {
517 return NGX_ERROR;
520 ngx_http_set_ctx(r, ctx, ngx_http_charset_filter_module);
522 ctx->table = charsets[source_charset].tables[charset];
523 ctx->charset = charset;
524 ctx->charset_name = charsets[charset].name;
525 ctx->length = charsets[charset].length;
526 ctx->from_utf8 = charsets[source_charset].utf8;
527 ctx->to_utf8 = charsets[charset].utf8;
529 r->filter_need_in_memory = 1;
531 if ((ctx->to_utf8 || ctx->from_utf8) && r == r->main) {
532 ngx_http_clear_content_length(r);
534 } else {
535 r->filter_need_temporary = 1;
538 return ngx_http_next_header_filter(r);
542 static ngx_int_t
543 ngx_http_charset_body_filter(ngx_http_request_t *r, ngx_chain_t *in)
545 ngx_int_t rc;
546 ngx_buf_t *b;
547 ngx_chain_t *cl, *out, **ll;
548 ngx_http_charset_ctx_t *ctx;
550 ctx = ngx_http_get_module_ctx(r, ngx_http_charset_filter_module);
552 if (ctx == NULL || ctx->table == NULL) {
553 return ngx_http_next_body_filter(r, in);
556 if ((ctx->to_utf8 || ctx->from_utf8) || ctx->busy) {
558 out = NULL;
559 ll = &out;
561 for (cl = in; cl; cl = cl->next) {
562 b = cl->buf;
564 if (ngx_buf_size(b) == 0) {
566 *ll = ngx_alloc_chain_link(r->pool);
567 if (*ll == NULL) {
568 return NGX_ERROR;
571 (*ll)->buf = b;
572 (*ll)->next = NULL;
574 ll = &(*ll)->next;
576 continue;
579 if (ctx->to_utf8) {
580 *ll = ngx_http_charset_recode_to_utf8(r->pool, b, ctx);
582 } else {
583 *ll = ngx_http_charset_recode_from_utf8(r->pool, b, ctx);
586 if (*ll == NULL) {
587 return NGX_ERROR;
590 while (*ll) {
591 ll = &(*ll)->next;
595 rc = ngx_http_next_body_filter(r, out);
597 if (out) {
598 if (ctx->busy == NULL) {
599 ctx->busy = out;
601 } else {
602 for (cl = ctx->busy; cl->next; cl = cl->next) { /* void */ }
603 cl->next = out;
607 while (ctx->busy) {
609 cl = ctx->busy;
610 b = cl->buf;
612 if (ngx_buf_size(b) != 0) {
613 break;
616 ctx->busy = cl->next;
618 if (b->tag != (ngx_buf_tag_t) &ngx_http_charset_filter_module) {
619 continue;
622 if (b->shadow) {
623 b->shadow->pos = b->shadow->last;
626 if (b->pos) {
627 cl->next = ctx->free_buffers;
628 ctx->free_buffers = cl;
629 continue;
632 cl->next = ctx->free_bufs;
633 ctx->free_bufs = cl;
636 return rc;
639 for (cl = in; cl; cl = cl->next) {
640 (void) ngx_http_charset_recode(cl->buf, ctx->table);
643 return ngx_http_next_body_filter(r, in);
647 static ngx_uint_t
648 ngx_http_charset_recode(ngx_buf_t *b, u_char *table)
650 u_char *p, *last;
652 last = b->last;
654 for (p = b->pos; p < last; p++) {
656 if (*p != table[*p]) {
657 goto recode;
661 return 0;
663 recode:
665 do {
666 if (*p != table[*p]) {
667 *p = table[*p];
670 p++;
672 } while (p < last);
674 b->in_file = 0;
676 return 1;
680 static ngx_chain_t *
681 ngx_http_charset_recode_from_utf8(ngx_pool_t *pool, ngx_buf_t *buf,
682 ngx_http_charset_ctx_t *ctx)
684 size_t len, size;
685 u_char c, *p, *src, *dst, *saved, **table;
686 uint32_t n;
687 ngx_buf_t *b;
688 ngx_uint_t i;
689 ngx_chain_t *out, *cl, **ll;
691 src = buf->pos;
693 if (ctx->saved_len == 0) {
695 for ( /* void */ ; src < buf->last; src++) {
697 if (*src < 0x80) {
698 continue;
701 len = src - buf->pos;
703 if (len > 512) {
704 out = ngx_http_charset_get_buf(pool, ctx);
705 if (out == NULL) {
706 return NULL;
709 b = out->buf;
711 b->temporary = buf->temporary;
712 b->memory = buf->memory;
713 b->mmap = buf->mmap;
714 b->flush = buf->flush;
716 b->pos = buf->pos;
717 b->last = src;
719 out->buf = b;
720 out->next = NULL;
722 size = buf->last - src;
724 saved = src;
725 n = ngx_utf8_decode(&saved, size);
727 if (n == 0xfffffffe) {
728 /* incomplete UTF-8 symbol */
730 ngx_memcpy(ctx->saved, src, size);
731 ctx->saved_len = size;
733 b->shadow = buf;
735 return out;
738 } else {
739 out = NULL;
740 size = len + buf->last - src;
741 src = buf->pos;
744 if (size < NGX_HTML_ENTITY_LEN) {
745 size += NGX_HTML_ENTITY_LEN;
748 cl = ngx_http_charset_get_buffer(pool, ctx, size);
749 if (cl == NULL) {
750 return NULL;
753 if (out) {
754 out->next = cl;
756 } else {
757 out = cl;
760 b = cl->buf;
761 dst = b->pos;
763 goto recode;
766 out = ngx_alloc_chain_link(pool);
767 if (out == NULL) {
768 return NULL;
771 out->buf = buf;
772 out->next = NULL;
774 return out;
777 /* process incomplete UTF sequence from previous buffer */
779 ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pool->log, 0,
780 "http charset utf saved: %z", ctx->saved_len);
782 p = src;
784 for (i = ctx->saved_len; i < NGX_UTF_LEN; i++) {
785 ctx->saved[i] = *p++;
787 if (p == buf->last) {
788 break;
792 saved = ctx->saved;
793 n = ngx_utf8_decode(&saved, i);
795 c = '\0';
797 if (n < 0x10000) {
798 table = (u_char **) ctx->table;
799 p = table[n >> 8];
801 if (p) {
802 c = p[n & 0xff];
805 } else if (n == 0xfffffffe) {
807 /* incomplete UTF-8 symbol */
809 if (i < NGX_UTF_LEN) {
810 out = ngx_http_charset_get_buf(pool, ctx);
811 if (out == NULL) {
812 return NULL;
815 b = out->buf;
817 b->pos = buf->pos;
818 b->last = buf->last;
819 b->sync = 1;
820 b->shadow = buf;
822 ngx_memcpy(&ctx->saved[ctx->saved_len], src, i);
823 ctx->saved_len += i;
825 return out;
829 size = buf->last - buf->pos;
831 if (size < NGX_HTML_ENTITY_LEN) {
832 size += NGX_HTML_ENTITY_LEN;
835 cl = ngx_http_charset_get_buffer(pool, ctx, size);
836 if (cl == NULL) {
837 return NULL;
840 out = cl;
842 b = cl->buf;
843 dst = b->pos;
845 if (c) {
846 *dst++ = c;
848 } else if (n == 0xfffffffe) {
849 *dst++ = '?';
851 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
852 "http charset invalid utf 0");
854 saved = &ctx->saved[NGX_UTF_LEN];
856 } else if (n > 0x10ffff) {
857 *dst++ = '?';
859 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
860 "http charset invalid utf 1");
862 } else {
863 dst = ngx_sprintf(dst, "&#%uD;", n);
866 src += (saved - ctx->saved) - ctx->saved_len;
867 ctx->saved_len = 0;
869 recode:
871 ll = &cl->next;
873 table = (u_char **) ctx->table;
875 while (src < buf->last) {
877 if ((size_t) (b->end - dst) < NGX_HTML_ENTITY_LEN) {
878 b->last = dst;
880 size = buf->last - src + NGX_HTML_ENTITY_LEN;
882 cl = ngx_http_charset_get_buffer(pool, ctx, size);
883 if (cl == NULL) {
884 return NULL;
887 *ll = cl;
888 ll = &cl->next;
890 b = cl->buf;
891 dst = b->pos;
894 if (*src < 0x80) {
895 *dst++ = *src++;
896 continue;
899 len = buf->last - src;
901 n = ngx_utf8_decode(&src, len);
903 if (n < 0x10000) {
905 p = table[n >> 8];
907 if (p) {
908 c = p[n & 0xff];
910 if (c) {
911 *dst++ = c;
912 continue;
916 dst = ngx_sprintf(dst, "&#%uD;", n);
918 continue;
921 if (n == 0xfffffffe) {
922 /* incomplete UTF-8 symbol */
924 ngx_memcpy(ctx->saved, src, len);
925 ctx->saved_len = len;
927 if (b->pos == dst) {
928 b->sync = 1;
929 b->temporary = 0;
932 break;
935 if (n > 0x10ffff) {
936 *dst++ = '?';
938 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
939 "http charset invalid utf 2");
941 continue;
944 /* n > 0xffff */
946 dst = ngx_sprintf(dst, "&#%uD;", n);
949 b->last = dst;
951 b->last_buf = buf->last_buf;
952 b->last_in_chain = buf->last_in_chain;
953 b->flush = buf->flush;
955 b->shadow = buf;
957 return out;
961 static ngx_chain_t *
962 ngx_http_charset_recode_to_utf8(ngx_pool_t *pool, ngx_buf_t *buf,
963 ngx_http_charset_ctx_t *ctx)
965 size_t len, size;
966 u_char *p, *src, *dst, *table;
967 ngx_buf_t *b;
968 ngx_chain_t *out, *cl, **ll;
970 table = ctx->table;
972 for (src = buf->pos; src < buf->last; src++) {
973 if (table[*src * NGX_UTF_LEN] == '\1') {
974 continue;
977 goto recode;
980 out = ngx_alloc_chain_link(pool);
981 if (out == NULL) {
982 return NULL;
985 out->buf = buf;
986 out->next = NULL;
988 return out;
990 recode:
993 * we assume that there are about half of characters to be recoded,
994 * so we preallocate "size / 2 + size / 2 * ctx->length"
997 len = src - buf->pos;
999 if (len > 512) {
1000 out = ngx_http_charset_get_buf(pool, ctx);
1001 if (out == NULL) {
1002 return NULL;
1005 b = out->buf;
1007 b->temporary = buf->temporary;
1008 b->memory = buf->memory;
1009 b->mmap = buf->mmap;
1010 b->flush = buf->flush;
1012 b->pos = buf->pos;
1013 b->last = src;
1015 out->buf = b;
1016 out->next = NULL;
1018 size = buf->last - src;
1019 size = size / 2 + size / 2 * ctx->length;
1021 } else {
1022 out = NULL;
1024 size = buf->last - src;
1025 size = len + size / 2 + size / 2 * ctx->length;
1027 src = buf->pos;
1030 cl = ngx_http_charset_get_buffer(pool, ctx, size);
1031 if (cl == NULL) {
1032 return NULL;
1035 if (out) {
1036 out->next = cl;
1038 } else {
1039 out = cl;
1042 ll = &cl->next;
1044 b = cl->buf;
1045 dst = b->pos;
1047 while (src < buf->last) {
1049 p = &table[*src++ * NGX_UTF_LEN];
1050 len = *p++;
1052 if ((size_t) (b->end - dst) < len) {
1053 b->last = dst;
1055 size = buf->last - src;
1056 size = len + size / 2 + size / 2 * ctx->length;
1058 cl = ngx_http_charset_get_buffer(pool, ctx, size);
1059 if (cl == NULL) {
1060 return NULL;
1063 *ll = cl;
1064 ll = &cl->next;
1066 b = cl->buf;
1067 dst = b->pos;
1070 while (len) {
1071 *dst++ = *p++;
1072 len--;
1076 b->last = dst;
1078 b->last_buf = buf->last_buf;
1079 b->last_in_chain = buf->last_in_chain;
1080 b->flush = buf->flush;
1082 b->shadow = buf;
1084 return out;
1088 static ngx_chain_t *
1089 ngx_http_charset_get_buf(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx)
1091 ngx_chain_t *cl;
1093 cl = ctx->free_bufs;
1095 if (cl) {
1096 ctx->free_bufs = cl->next;
1098 cl->buf->shadow = NULL;
1099 cl->next = NULL;
1101 return cl;
1104 cl = ngx_alloc_chain_link(pool);
1105 if (cl == NULL) {
1106 return NULL;
1109 cl->buf = ngx_calloc_buf(pool);
1110 if (cl->buf == NULL) {
1111 return NULL;
1114 cl->next = NULL;
1116 cl->buf->tag = (ngx_buf_tag_t) &ngx_http_charset_filter_module;
1118 return cl;
1122 static ngx_chain_t *
1123 ngx_http_charset_get_buffer(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx,
1124 size_t size)
1126 ngx_buf_t *b;
1127 ngx_chain_t *cl, **ll;
1129 for (ll = &ctx->free_buffers, cl = ctx->free_buffers;
1131 ll = &cl->next, cl = cl->next)
1133 b = cl->buf;
1135 if ((size_t) (b->end - b->start) >= size) {
1136 *ll = cl->next;
1137 cl->next = NULL;
1139 b->pos = b->start;
1140 b->temporary = 1;
1141 b->shadow = NULL;
1143 return cl;
1147 cl = ngx_alloc_chain_link(pool);
1148 if (cl == NULL) {
1149 return NULL;
1152 cl->buf = ngx_create_temp_buf(pool, size);
1153 if (cl->buf == NULL) {
1154 return NULL;
1157 cl->next = NULL;
1159 cl->buf->temporary = 1;
1160 cl->buf->tag = (ngx_buf_tag_t) &ngx_http_charset_filter_module;
1162 return cl;
1166 static char *
1167 ngx_http_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
1169 ngx_http_charset_main_conf_t *mcf = conf;
1171 char *rv;
1172 u_char *p, *dst2src, **pp;
1173 ngx_int_t src, dst;
1174 ngx_uint_t i, n;
1175 ngx_str_t *value;
1176 ngx_conf_t pvcf;
1177 ngx_http_charset_t *charset;
1178 ngx_http_charset_tables_t *table;
1179 ngx_http_charset_conf_ctx_t ctx;
1181 value = cf->args->elts;
1183 src = ngx_http_add_charset(&mcf->charsets, &value[1]);
1184 if (src == NGX_ERROR) {
1185 return NGX_CONF_ERROR;
1188 dst = ngx_http_add_charset(&mcf->charsets, &value[2]);
1189 if (dst == NGX_ERROR) {
1190 return NGX_CONF_ERROR;
1193 if (src == dst) {
1194 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
1195 "\"charset_map\" between the same charsets "
1196 "\"%V\" and \"%V\"", &value[1], &value[2]);
1197 return NGX_CONF_ERROR;
1200 table = mcf->tables.elts;
1201 for (i = 0; i < mcf->tables.nelts; i++) {
1202 if ((src == table->src && dst == table->dst)
1203 || (src == table->dst && dst == table->src))
1205 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
1206 "duplicate \"charset_map\" between "
1207 "\"%V\" and \"%V\"", &value[1], &value[2]);
1208 return NGX_CONF_ERROR;
1212 table = ngx_array_push(&mcf->tables);
1213 if (table == NULL) {
1214 return NGX_CONF_ERROR;
1217 table->src = src;
1218 table->dst = dst;
1220 if (ngx_strcasecmp(value[2].data, (u_char *) "utf-8") == 0) {
1221 table->src2dst = ngx_pcalloc(cf->pool, 256 * NGX_UTF_LEN);
1222 if (table->src2dst == NULL) {
1223 return NGX_CONF_ERROR;
1226 table->dst2src = ngx_pcalloc(cf->pool, 256 * sizeof(void *));
1227 if (table->dst2src == NULL) {
1228 return NGX_CONF_ERROR;
1231 dst2src = ngx_pcalloc(cf->pool, 256);
1232 if (dst2src == NULL) {
1233 return NGX_CONF_ERROR;
1236 pp = (u_char **) &table->dst2src[0];
1237 pp[0] = dst2src;
1239 for (i = 0; i < 128; i++) {
1240 p = &table->src2dst[i * NGX_UTF_LEN];
1241 p[0] = '\1';
1242 p[1] = (u_char) i;
1243 dst2src[i] = (u_char) i;
1246 for (/* void */; i < 256; i++) {
1247 p = &table->src2dst[i * NGX_UTF_LEN];
1248 p[0] = '\1';
1249 p[1] = '?';
1252 } else {
1253 table->src2dst = ngx_palloc(cf->pool, 256);
1254 if (table->src2dst == NULL) {
1255 return NGX_CONF_ERROR;
1258 table->dst2src = ngx_palloc(cf->pool, 256);
1259 if (table->dst2src == NULL) {
1260 return NGX_CONF_ERROR;
1263 for (i = 0; i < 128; i++) {
1264 table->src2dst[i] = (u_char) i;
1265 table->dst2src[i] = (u_char) i;
1268 for (/* void */; i < 256; i++) {
1269 table->src2dst[i] = '?';
1270 table->dst2src[i] = '?';
1274 charset = mcf->charsets.elts;
1276 ctx.table = table;
1277 ctx.charset = &charset[dst];
1278 ctx.characters = 0;
1280 pvcf = *cf;
1281 cf->ctx = &ctx;
1282 cf->handler = ngx_http_charset_map;
1283 cf->handler_conf = conf;
1285 rv = ngx_conf_parse(cf, NULL);
1287 *cf = pvcf;
1289 if (ctx.characters) {
1290 n = ctx.charset->length;
1291 ctx.charset->length /= ctx.characters;
1293 if (((n * 10) / ctx.characters) % 10 > 4) {
1294 ctx.charset->length++;
1298 return rv;
1302 static char *
1303 ngx_http_charset_map(ngx_conf_t *cf, ngx_command_t *dummy, void *conf)
1305 u_char *p, *dst2src, **pp;
1306 uint32_t n;
1307 ngx_int_t src, dst;
1308 ngx_str_t *value;
1309 ngx_uint_t i;
1310 ngx_http_charset_tables_t *table;
1311 ngx_http_charset_conf_ctx_t *ctx;
1313 if (cf->args->nelts != 2) {
1314 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "invalid parameters number");
1315 return NGX_CONF_ERROR;
1318 value = cf->args->elts;
1320 src = ngx_hextoi(value[0].data, value[0].len);
1321 if (src == NGX_ERROR || src > 255) {
1322 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
1323 "invalid value \"%V\"", &value[0]);
1324 return NGX_CONF_ERROR;
1327 ctx = cf->ctx;
1328 table = ctx->table;
1330 if (ctx->charset->utf8) {
1331 p = &table->src2dst[src * NGX_UTF_LEN];
1333 *p++ = (u_char) (value[1].len / 2);
1335 for (i = 0; i < value[1].len; i += 2) {
1336 dst = ngx_hextoi(&value[1].data[i], 2);
1337 if (dst == NGX_ERROR || dst > 255) {
1338 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
1339 "invalid value \"%V\"", &value[1]);
1340 return NGX_CONF_ERROR;
1343 *p++ = (u_char) dst;
1346 i /= 2;
1348 ctx->charset->length += i;
1349 ctx->characters++;
1351 p = &table->src2dst[src * NGX_UTF_LEN] + 1;
1353 n = ngx_utf8_decode(&p, i);
1355 if (n > 0xffff) {
1356 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
1357 "invalid value \"%V\"", &value[1]);
1358 return NGX_CONF_ERROR;
1361 pp = (u_char **) &table->dst2src[0];
1363 dst2src = pp[n >> 8];
1365 if (dst2src == NULL) {
1366 dst2src = ngx_pcalloc(cf->pool, 256);
1367 if (dst2src == NULL) {
1368 return NGX_CONF_ERROR;
1371 pp[n >> 8] = dst2src;
1374 dst2src[n & 0xff] = (u_char) src;
1376 } else {
1377 dst = ngx_hextoi(value[1].data, value[1].len);
1378 if (dst == NGX_ERROR || dst > 255) {
1379 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
1380 "invalid value \"%V\"", &value[1]);
1381 return NGX_CONF_ERROR;
1384 table->src2dst[src] = (u_char) dst;
1385 table->dst2src[dst] = (u_char) src;
1388 return NGX_CONF_OK;
1392 static char *
1393 ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
1395 char *p = conf;
1397 ngx_int_t *cp;
1398 ngx_str_t *value, var;
1399 ngx_http_charset_main_conf_t *mcf;
1401 cp = (ngx_int_t *) (p + cmd->offset);
1403 if (*cp != NGX_CONF_UNSET) {
1404 return "is duplicate";
1407 value = cf->args->elts;
1409 if (cmd->offset == offsetof(ngx_http_charset_loc_conf_t, charset)
1410 && ngx_strcmp(value[1].data, "off") == 0)
1412 *cp = NGX_HTTP_CHARSET_OFF;
1413 return NGX_CONF_OK;
1417 if (value[1].data[0] == '$') {
1418 var.len = value[1].len - 1;
1419 var.data = value[1].data + 1;
1421 *cp = ngx_http_get_variable_index(cf, &var);
1423 if (*cp == NGX_ERROR) {
1424 return NGX_CONF_ERROR;
1427 *cp += NGX_HTTP_CHARSET_VAR;
1429 return NGX_CONF_OK;
1432 mcf = ngx_http_conf_get_module_main_conf(cf,
1433 ngx_http_charset_filter_module);
1435 *cp = ngx_http_add_charset(&mcf->charsets, &value[1]);
1436 if (*cp == NGX_ERROR) {
1437 return NGX_CONF_ERROR;
1440 return NGX_CONF_OK;
1444 static ngx_int_t
1445 ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name)
1447 ngx_uint_t i;
1448 ngx_http_charset_t *c;
1450 c = charsets->elts;
1451 for (i = 0; i < charsets->nelts; i++) {
1452 if (name->len != c[i].name.len) {
1453 continue;
1456 if (ngx_strcasecmp(name->data, c[i].name.data) == 0) {
1457 break;
1461 if (i < charsets->nelts) {
1462 return i;
1465 c = ngx_array_push(charsets);
1466 if (c == NULL) {
1467 return NGX_ERROR;
1470 c->tables = NULL;
1471 c->name = *name;
1472 c->length = 0;
1474 if (ngx_strcasecmp(name->data, (u_char *) "utf-8") == 0) {
1475 c->utf8 = 1;
1477 } else {
1478 c->utf8 = 0;
1481 return i;
1485 static void *
1486 ngx_http_charset_create_main_conf(ngx_conf_t *cf)
1488 ngx_http_charset_main_conf_t *mcf;
1490 mcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_main_conf_t));
1491 if (mcf == NULL) {
1492 return NULL;
1495 if (ngx_array_init(&mcf->charsets, cf->pool, 2, sizeof(ngx_http_charset_t))
1496 != NGX_OK)
1498 return NULL;
1501 if (ngx_array_init(&mcf->tables, cf->pool, 1,
1502 sizeof(ngx_http_charset_tables_t))
1503 != NGX_OK)
1505 return NULL;
1508 if (ngx_array_init(&mcf->recodes, cf->pool, 2,
1509 sizeof(ngx_http_charset_recode_t))
1510 != NGX_OK)
1512 return NULL;
1515 return mcf;
1519 static void *
1520 ngx_http_charset_create_loc_conf(ngx_conf_t *cf)
1522 ngx_http_charset_loc_conf_t *lcf;
1524 lcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_loc_conf_t));
1525 if (lcf == NULL) {
1526 return NULL;
1530 * set by ngx_pcalloc():
1532 * lcf->types = { NULL };
1533 * lcf->types_keys = NULL;
1536 lcf->charset = NGX_CONF_UNSET;
1537 lcf->source_charset = NGX_CONF_UNSET;
1538 lcf->override_charset = NGX_CONF_UNSET;
1540 return lcf;
1544 static char *
1545 ngx_http_charset_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child)
1547 ngx_http_charset_loc_conf_t *prev = parent;
1548 ngx_http_charset_loc_conf_t *conf = child;
1550 ngx_uint_t i;
1551 ngx_http_charset_recode_t *recode;
1552 ngx_http_charset_main_conf_t *mcf;
1554 if (ngx_http_merge_types(cf, &conf->types_keys, &conf->types,
1555 &prev->types_keys, &prev->types,
1556 ngx_http_charset_default_types)
1557 != NGX_OK)
1559 return NGX_CONF_ERROR;
1562 ngx_conf_merge_value(conf->override_charset, prev->override_charset, 0);
1563 ngx_conf_merge_value(conf->charset, prev->charset, NGX_HTTP_CHARSET_OFF);
1564 ngx_conf_merge_value(conf->source_charset, prev->source_charset,
1565 NGX_HTTP_CHARSET_OFF);
1567 if (conf->charset == NGX_HTTP_CHARSET_OFF
1568 || conf->source_charset == NGX_HTTP_CHARSET_OFF
1569 || conf->charset == conf->source_charset)
1571 return NGX_CONF_OK;
1574 if (conf->source_charset >= NGX_HTTP_CHARSET_VAR
1575 || conf->charset >= NGX_HTTP_CHARSET_VAR)
1577 return NGX_CONF_OK;
1580 mcf = ngx_http_conf_get_module_main_conf(cf,
1581 ngx_http_charset_filter_module);
1582 recode = mcf->recodes.elts;
1583 for (i = 0; i < mcf->recodes.nelts; i++) {
1584 if (conf->source_charset == recode[i].src
1585 && conf->charset == recode[i].dst)
1587 return NGX_CONF_OK;
1591 recode = ngx_array_push(&mcf->recodes);
1592 if (recode == NULL) {
1593 return NGX_CONF_ERROR;
1596 recode->src = conf->source_charset;
1597 recode->dst = conf->charset;
1599 return NGX_CONF_OK;
1603 static ngx_int_t
1604 ngx_http_charset_postconfiguration(ngx_conf_t *cf)
1606 u_char **src, **dst;
1607 ngx_int_t c;
1608 ngx_uint_t i, t;
1609 ngx_http_charset_t *charset;
1610 ngx_http_charset_recode_t *recode;
1611 ngx_http_charset_tables_t *tables;
1612 ngx_http_charset_main_conf_t *mcf;
1614 mcf = ngx_http_conf_get_module_main_conf(cf,
1615 ngx_http_charset_filter_module);
1617 recode = mcf->recodes.elts;
1618 tables = mcf->tables.elts;
1619 charset = mcf->charsets.elts;
1621 for (i = 0; i < mcf->recodes.nelts; i++) {
1623 c = recode[i].src;
1625 for (t = 0; t < mcf->tables.nelts; t++) {
1627 if (c == tables[t].src && recode[i].dst == tables[t].dst) {
1628 goto next;
1631 if (c == tables[t].dst && recode[i].dst == tables[t].src) {
1632 goto next;
1636 ngx_log_error(NGX_LOG_EMERG, cf->log, 0,
1637 "no \"charset_map\" between the charsets \"%V\" and \"%V\"",
1638 &charset[c].name, &charset[recode[i].dst].name);
1639 return NGX_ERROR;
1641 next:
1642 continue;
1646 for (t = 0; t < mcf->tables.nelts; t++) {
1648 src = charset[tables[t].src].tables;
1650 if (src == NULL) {
1651 src = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts);
1652 if (src == NULL) {
1653 return NGX_ERROR;
1656 charset[tables[t].src].tables = src;
1659 dst = charset[tables[t].dst].tables;
1661 if (dst == NULL) {
1662 dst = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts);
1663 if (dst == NULL) {
1664 return NGX_ERROR;
1667 charset[tables[t].dst].tables = dst;
1670 src[tables[t].dst] = tables[t].src2dst;
1671 dst[tables[t].src] = tables[t].dst2src;
1674 ngx_http_next_header_filter = ngx_http_top_header_filter;
1675 ngx_http_top_header_filter = ngx_http_charset_header_filter;
1677 ngx_http_next_body_filter = ngx_http_top_body_filter;
1678 ngx_http_top_body_filter = ngx_http_charset_body_filter;
1680 return NGX_OK;