From 3324591e6cb3af729bad654b1772e3bc34d2986e Mon Sep 17 00:00:00 2001 From: Pieter de Bie Date: Mon, 14 Sep 2009 13:02:36 +0200 Subject: [PATCH] Fix UTF-8 bug in NSString_RegEx This class would use the location information provided by regex(3) as range for for a substring. However, the information regex(3) returns is a byte-based, while NSString works on characters. This can cause a problem when there are UTF-8 characters in the string, as the wrong subsstring will be returned. This is fixed by taking the UTF bytesequence, and extracting a substring from that, rather than using NSString's own substring method --- NSString_RegEx.m | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/NSString_RegEx.m b/NSString_RegEx.m index ee0e9bd..38d3848 100644 --- a/NSString_RegEx.m +++ b/NSString_RegEx.m @@ -57,7 +57,9 @@ break; NSRange range = NSMakeRange(pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so); - NSString * substring = [self substringWithRange:range]; + NSString * substring = [[[NSString alloc] initWithBytes:[self UTF8String] + range.location + length:range.length + encoding:NSUTF8StringEncoding] autorelease]; [outMatches addObject:substring]; if (ranges) -- 2.11.4.GIT