1 /************************ tell Emacs this is a -*-C-*- file *************
6 * Copyright (C) 2018 SIPE Project <http://sipe.sourceforge.net/>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Simple parser to extract plain text from RTF and transform it to HTML
26 /* generated module */
27 %option outfile="sipe-rtf.c"
28 %option prefix="sipe_rtf_lexer_"
30 /* flex configuration options */
49 #include "sipe-common.h"
50 #include "sipe-backend.h"
54 * small string buffer to avoid memory allocations
56 * Must be length of longest interesting keyword + 1
57 * Currently that would be "par"
59 #define SIPE_RTF_LEXER_KEYWORD_SIZE 3 + 1 + 1
61 /* lexer token value type */
62 struct parser_lval_type {
64 gchar keyword_buffer[SIPE_RTF_LEXER_KEYWORD_SIZE];
66 #define YYSTYPE struct parser_lval_type
70 #define KEYWORD_PARAMETER 257
71 #define KEYWORD_END 258
72 #define LEXER_ERROR 259
77 guint unicode_ignore_length;
79 struct parser_lval_type lval;
82 static void sipe_rtf_add_char(struct parser_state *state, gchar c);
83 static void sipe_rtf_add_text(struct parser_state *state, const gchar *text);
84 static void sipe_rtf_add_unichar(struct parser_state *state, gunichar c);
94 /* stuff that we simply throw away */
95 [\r\n]+ /* line endings */
96 "{"\\\*[^}]*"}" /* comments */
97 /* font definitions */
98 "{"\\f{DIGIT}+\\[^;]+;"}"
99 "{" /* section start */
100 "}" /* section end */
102 /* pass plain text to output buffer */
103 /* escaped special characters */
104 \\\\ { sipe_rtf_add_char(yyextra, '\\'); }
105 \\"{" { sipe_rtf_add_char(yyextra, '{'); }
106 \\"}" { sipe_rtf_add_char(yyextra, '}'); }
107 \\\'{HEX}{2} { /* 2 digit hex to 8-bit character */
109 sscanf(yytext + 2, "%x", &c);
110 sipe_rtf_add_char(yyextra, c);
112 \\u{DIGIT}+ { /* Unicode character */
114 sscanf(yytext + 2, "%d", &c);
115 sipe_rtf_add_unichar(yyextra, c);
117 /* all other plain text */
118 [^{}\\\n\r]+ { sipe_rtf_add_text(yyextra, yytext); }
120 /* stuff passed to parser for further processing */
123 g_strlcpy(yylval->keyword_buffer,
125 SIPE_RTF_LEXER_KEYWORD_SIZE);
128 <RTF_KEYWORD>{DIGIT}+ {
129 sscanf(yytext, "%d", &yylval->number);
130 return(KEYWORD_PARAMETER);
132 <RTF_KEYWORD>(;|[^0-9][^;\\]*;|[ ])? {
133 /* reset <keyword start condition and throw away */
137 <RTF_KEYWORD>.|\n { /* reset <RTF_KEYWORD> start condition */
143 /* indicate anything else as error to parser */
144 <INITIAL,RTF_KEYWORD>.|\n {
149 /* memory allocation for flex code */
150 void *sipe_rtf_lexer_alloc(yy_size_t size,
151 SIPE_UNUSED_PARAMETER yyscan_t yyscanner)
153 return g_malloc(size);
156 void *sipe_rtf_lexer_realloc(void *ptr, yy_size_t size,
157 SIPE_UNUSED_PARAMETER yyscan_t yyscanner)
159 return g_realloc(ptr, size);
162 void sipe_rtf_lexer_free(void *ptr,
163 SIPE_UNUSED_PARAMETER yyscan_t yyscanner)
168 /* add text to buffer */
169 static void sipe_rtf_add_char(struct parser_state *state, gchar c)
171 /* ignored characters after unicode sequence */
175 g_string_append_c(state->text, c);
179 static void sipe_rtf_add_text(struct parser_state *state, const gchar *text)
181 /* ignored characters after unicode sequence */
183 while (*text && state->ignore--) text++;
188 /* add the remainder to the text buffer */
189 g_string_append(state->text, text);
192 static void sipe_rtf_add_unichar(struct parser_state *state, gunichar c)
194 /* ignored characters after unicode sequence */
195 state->ignore = state->unicode_ignore_length;
197 g_string_append_unichar(state->text, c);
200 static void sipe_rtf_parse_keyword(struct parser_state *state,
201 const gchar *keyword) {
202 if (strcmp(keyword, "par") == 0) {
203 sipe_rtf_add_text(state, "<br/>");
207 static void sipe_rtf_parse_keyword_parameter(struct parser_state *state,
208 const gchar *keyword,
209 unsigned int parameter) {
210 if (strcmp(keyword, "uc") == 0) {
211 state->unicode_ignore_length = parameter;
215 /****************************************************************************
219 * based on Bison parser
221 * %output "sipe-rtf-parser.c"
222 * %defines "sipe-rtf-parser.h"
224 * %define api.pure full
225 * %define api.prefix {sipe_rtf_parser_}
227 * %param {yyscan_t scanner}
228 * %parse-param {struct parser_state *state}
235 * gchar keyword_buffer[SIPE_RTF_LEXER_KEYWORD_SIZE];
239 * %token <keyword_buffer> KEYWORD
240 * %token <number> KEYWORD_PARAMETER
247 * | sequence KEYWORD KEYWORD_PARAMETER KEYWORD_END {
248 * const char *keyword = $2;
249 * unsigned int parameter = $3;
250 * sipe_rtf_parse_keyword_parameter(state, keyword, parameter);
252 * | sequence KEYWORD KEYWORD_END {
253 * const char *keyword = $2;
254 * sipe_rtf_parse_keyword(state, keyword);
256 * | sequence LEXER_ERROR {
257 * yyerror(scanner, state, "lexer error");
263 static void sipe_rtf_parser_error(const gchar *msg)
265 SIPE_DEBUG_ERROR("sipe_rtf_parser_error: %s", msg);
268 static int sipe_rtf_parser_get_token(yyscan_t scanner,
269 struct parser_state *state,
272 int token = sipe_rtf_lexer_lex(&state->lval, scanner);
274 if (required && (token < 1)) {
275 sipe_rtf_parser_error("unexpected end of RTF");
282 static gboolean sipe_rtf_parser(yyscan_t scanner,
283 struct parser_state *state)
285 struct parser_lval_type *lval = &state->lval;
288 /* read tokens from parser until it returns EOF */
290 if ((token = sipe_rtf_parser_get_token(scanner,
296 case 0: /* parse succeeded */
302 const gchar *keyword = lval->keyword_buffer;
304 if ((token = sipe_rtf_parser_get_token(scanner,
311 sipe_rtf_parse_keyword(state, keyword);
314 case KEYWORD_PARAMETER:
316 guint parameter = lval->number;
318 if ((token = sipe_rtf_parser_get_token(scanner,
322 } else if (token == KEYWORD_END) {
323 sipe_rtf_parse_keyword_parameter(state,
327 sipe_rtf_parser_error("unexpected token");
334 sipe_rtf_parser_error("broken keyword");
342 sipe_rtf_parser_error("unexpected token");
348 gchar *sipe_rtf_to_html(const gchar *rtf)
352 (void)sipe_rtf_parser;