From 91770dc5bd7abc55510757b5ff8d652619fd5f38 Mon Sep 17 00:00:00 2001 From: ketmar Date: Sun, 25 Aug 2013 10:20:58 +0300 Subject: [PATCH] cosmetix fixes in regexp parser --- src/libre9/re9.c | 58 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/src/libre9/re9.c b/src/libre9/re9.c index 10b976f..55c3218 100644 --- a/src/libre9/re9.c +++ b/src/libre9/re9.c @@ -475,19 +475,29 @@ static void add_to_class_two (re9_compiler_t *ci, re9_rune r0, re9_rune r1) { static int nextc (re9_compiler_t *ci, re9_rune *rp) { if (ci->expr >= ci->expr_eol) { *rp = RE9_RUNE_SPEC_EOL; return 1; } if ((ci->flags&RE9_FLAG_NONUTF8) == 0) { - if (ci->expr[0] == '\\' && ci->expr+1 < ci->expr_eol) { - ci->expr += re9_char2rune(rp, ci->expr+1, ci->expr_eol)+1; + if (ci->expr[0] == '\\') { + if (ci->expr+1 < ci->expr_eol) { + ci->expr += re9_char2rune(rp, ci->expr+1, ci->expr_eol)+1; + } else { + ++ci->expr; + *rp = '\\'; + } return 1; } else { ci->expr += re9_char2rune(rp, ci->expr, ci->expr_eol); } } else { - if (ci->expr[0] == '\\' && ci->expr+1 < ci->expr_eol) { - *rp = (unsigned char)(ci->expr[1]); - ci->expr += 2; + if (ci->expr[0] == '\\') { + if (ci->expr+1 < ci->expr_eol) { + *rp = (unsigned char)(ci->expr[1]); + ci->expr += 2; + } else { + ++ci->expr; + *rp = '\\'; + } return 1; } else { - *rp = (uint8_t)(*ci->expr++); + *rp = (unsigned char)(*ci->expr++); if (ci->expr > ci->expr_eol) ci->expr = ci->expr_eol; } } @@ -685,22 +695,22 @@ static int bldcclass (re9_compiler_t *ci) { break; } #ifndef RE9_DISABLE_POSIX_CLASSES - } else if ((ci->yyc_used&0x01) == 0 && !quoted && rune == '[' && ci->expr+6 < ci->expr_eol && ci->expr[0] == ':') { - if (ci->expr+7 < ci->expr_eol) { - if (strncmp(ci->expr, ":alnum:]", 8) == 0) { ci->expr += 8; addmeta_alpha(ci); addmeta_digit(ci, 0); } - else if (strncmp(ci->expr, ":alpha:]", 8) == 0) { ci->expr += 8; addmeta_alpha(ci); } - else if (strncmp(ci->expr, ":ascii:]", 8) == 0) { ci->expr += 8; addmeta_ascii(ci); } - else if (strncmp(ci->expr, ":blank:]", 8) == 0) { ci->expr += 8; addmeta_blank(ci); } - else if (strncmp(ci->expr, ":cntrl:]", 8) == 0) { ci->expr += 8; addmeta_ctrl(ci); } - else if (strncmp(ci->expr, ":digit:]", 8) == 0) { ci->expr += 8; addmeta_digit(ci, 0); } - else if (strncmp(ci->expr, ":graph:]", 8) == 0) { ci->expr += 8; addmeta_graph(ci); } - else if (strncmp(ci->expr, ":lower:]", 8) == 0) { ci->expr += 8; addmeta_lower(ci); } - else if (strncmp(ci->expr, ":print:]", 8) == 0) { ci->expr += 8; addmeta_print(ci); } - else if (strncmp(ci->expr, ":punct:]", 8) == 0) { ci->expr += 8; addmeta_punct(ci); } - else if (strncmp(ci->expr, ":space:]", 8) == 0) { ci->expr += 8; addmeta_space(ci, 0); } - else if (strncmp(ci->expr, ":upper:]", 8) == 0) { ci->expr += 8; addmeta_upper(ci); } - } else if (strncmp(ci->expr, ":word:]", 7) == 0) { ci->expr += 7; addmeta_word(ci, 0); } + } else if ((ci->yyc_used&0x01) == 0 && !quoted && rune == '[' && ci->expr+7 < ci->expr_eol && ci->expr[0] == ':') { + if (strncmp(ci->expr, ":alnum:]", 8) == 0) { ci->expr += 8; addmeta_alpha(ci); addmeta_digit(ci, 0); } + else if (strncmp(ci->expr, ":alpha:]", 8) == 0) { ci->expr += 8; addmeta_alpha(ci); } + else if (strncmp(ci->expr, ":ascii:]", 8) == 0) { ci->expr += 8; addmeta_ascii(ci); } + else if (strncmp(ci->expr, ":blank:]", 8) == 0) { ci->expr += 8; addmeta_blank(ci); } + else if (strncmp(ci->expr, ":cntrl:]", 8) == 0) { ci->expr += 8; addmeta_ctrl(ci); } + else if (strncmp(ci->expr, ":digit:]", 8) == 0) { ci->expr += 8; addmeta_digit(ci, 0); } + else if (strncmp(ci->expr, ":graph:]", 8) == 0) { ci->expr += 8; addmeta_graph(ci); } + else if (strncmp(ci->expr, ":lower:]", 8) == 0) { ci->expr += 8; addmeta_lower(ci); } + else if (strncmp(ci->expr, ":print:]", 8) == 0) { ci->expr += 8; addmeta_print(ci); } + else if (strncmp(ci->expr, ":punct:]", 8) == 0) { ci->expr += 8; addmeta_punct(ci); } + else if (strncmp(ci->expr, ":space:]", 8) == 0) { ci->expr += 8; addmeta_space(ci, 0); } + else if (strncmp(ci->expr, ":upper:]", 8) == 0) { ci->expr += 8; addmeta_upper(ci); } + else if (strncmp(ci->expr, ":wordc:]", 8) == 0) { ci->expr += 7; addmeta_word(ci, 0); } /*non-standard!*/ else if (ci->expr+8 < ci->expr_eol && strncmp(ci->expr, ":xdigit:]", 9) == 0) { ci->expr += 9; addmeta_xdigit(ci); } + else rcerror(ci, "invalid POSIX range"); #endif } else { if (ci->yyc_used&0x01 && rune < ci->yyclass[ci->yyc_used-1]) rcerror(ci, "invalid range in '[]'"); @@ -1531,7 +1541,7 @@ static int regexec1 (const re9_prog_t *progp, re9_sub_t *mp, int ms, re9_ljunk_t case PRG_STARTS_WITH_CLASS: opc = (const vmop_class_t *)(progp->code+progp->startrange); while (s < j->eol) { - r = *(const uint8_t *)s; + r = *(const unsigned char *)s; rune_size = (r < RE9_RUNE_SELF || (flags&RE9_FLAG_NONUTF8) ? 1 : re9_char2rune(&r, s, j->eol)); if (flags&RE9_FLAG_CASEINSENS) r = UPPER(r); for (f = 0; f < opc->spi_count; f += 2) if (r >= opc->spi[f] && r <= opc->spi[f+1]) break; @@ -1552,7 +1562,7 @@ static int regexec1 (const re9_prog_t *progp, re9_sub_t *mp, int ms, re9_ljunk_t if (progp->startflags&PRG_STARTS_WITH_EOL) break; } if (progp->startflags&(PRG_STARTS_WITH_RUNE|PRG_STARTS_WITH_CLASS)) { - r = *(const uint8_t *)s; + r = *(const unsigned char *)s; rune_size = (r < RE9_RUNE_SELF || (flags&RE9_FLAG_NONUTF8) ? 1 : re9_char2rune(&r, s, j->eol)); if (flags&RE9_FLAG_CASEINSENS) r = UPPER(r); if (progp->startflags&PRG_STARTS_WITH_RUNE) { @@ -1574,7 +1584,7 @@ difficult_found: } #endif if (s < j->eol) { - r = *(const uint8_t *)s; + r = *(const unsigned char *)s; rune_size = (r < RE9_RUNE_SELF || (flags&RE9_FLAG_NONUTF8) ? 1 : re9_char2rune(&r, s, j->eol)); if (flags&RE9_FLAG_CASEINSENS) r = UPPER(r); } else { -- 2.11.4.GIT