From 38c389a29e8ff582ba32c7737bb90f0f0df12078 Mon Sep 17 00:00:00 2001 From: Steve Bennett Date: Sun, 27 Oct 2019 20:46:56 +1000 Subject: [PATCH] regexp, regsub: -start is a character index Not a byte index Reported-by: dbohdan Signed-off-by: Steve Bennett --- jim-regexp.c | 5 ++++- tests/regexp2.test | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/jim-regexp.c b/jim-regexp.c index 771773a..81f3207 100644 --- a/jim-regexp.c +++ b/jim-regexp.c @@ -55,6 +55,7 @@ #include #endif #include "jim.h" +#include "utf8.h" static void FreeRegexpInternalRep(Jim_Interp *interp, Jim_Obj *objPtr) { @@ -226,7 +227,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) source_str += source_len; } else if (offset > 0) { - source_str += offset; + source_str += utf8_index(source_str, offset); } eflags |= REG_NOTBOL; } @@ -441,6 +442,8 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) offset = 0; } } + /* Convert from character offset to byte offset */ + offset = utf8_index(source_str, offset); /* Copy the part before -start */ Jim_AppendString(interp, resultObj, source_str, offset); diff --git a/tests/regexp2.test b/tests/regexp2.test index f7cf516..76735e6 100644 --- a/tests/regexp2.test +++ b/tests/regexp2.test @@ -627,7 +627,12 @@ test regexpComp-16.3 {regsub -start} { # lappend out [regsub -start 0 -all {\A(\w)} {abcde} {/\1} x] $x # lappend out [regsub -start 2 -all {\A(\w)} {abcde} {/\1} x] $x #} {5 /a/b/c/d/e 3 ab/c/d/e} - +test regexpComp-16.5 {regexp -start with utf8} utf8 { + regexp -inline -start 1 . \u0442\u0435\u0441\u0442 +} \u0435 +test regexpComp-16.6 {regexp -start with utf8} utf8 { + regsub -start 1 . \u0442\u0435\u0441\u0442 x +} \u0442x\u0441\u0442 test regexpComp-17.1 {regexp -inline} { regexp -inline b ababa } {b} -- 2.11.4.GIT