3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
19 """Encoding related utilities."""
25 _cescape_utf8_to_str
= [chr(i
) for i
in xrange(0, 256)]
26 _cescape_utf8_to_str
[9] = r
'\t'
27 _cescape_utf8_to_str
[10] = r
'\n'
28 _cescape_utf8_to_str
[13] = r
'\r'
29 _cescape_utf8_to_str
[39] = r
"\'"
31 _cescape_utf8_to_str
[34] = r
'\"'
32 _cescape_utf8_to_str
[92] = r
'\\'
35 _cescape_byte_to_str
= ([r
'\%03o' % i
for i
in xrange(0, 32)] +
36 [chr(i
) for i
in xrange(32, 127)] +
37 [r
'\%03o' % i
for i
in xrange(127, 256)])
38 _cescape_byte_to_str
[9] = r
'\t'
39 _cescape_byte_to_str
[10] = r
'\n'
40 _cescape_byte_to_str
[13] = r
'\r'
41 _cescape_byte_to_str
[39] = r
"\'"
43 _cescape_byte_to_str
[34] = r
'\"'
44 _cescape_byte_to_str
[92] = r
'\\'
47 def CEscape(text
, as_utf8
):
48 """Escape a bytes string for use in an ascii protocol buffer.
50 text.encode('string_escape') does not seem to satisfy our needs as it
51 encodes unprintable characters using two-digit hex escapes whereas our
52 C++ unescaping function allows hex escapes to be any length. So,
53 "\0011".encode('string_escape') ends up being "\\x011", which will be
54 decoded in C++ as a single-character string with char code 0x11.
57 text: A byte string to be escaped
58 as_utf8: Specifies if result should be returned in UTF-8 encoding
64 Ord
= ord if isinstance(text
, basestring
) else lambda x
: x
66 return ''.join(_cescape_utf8_to_str
[Ord(c
)] for c
in text
)
67 return ''.join(_cescape_byte_to_str
[Ord(c
)] for c
in text
)
70 _CUNESCAPE_HEX
= re
.compile(r
'(\\+)x([0-9a-fA-F])(?![0-9a-fA-F])')
71 _cescape_highbit_to_str
= ([chr(i
) for i
in range(0, 127)] +
72 [r
'\%03o' % i
for i
in range(127, 256)])
76 """Unescape a text string with C-style escape sequences to UTF-8 bytes."""
81 if len(m
.group(1)) & 1:
82 return m
.group(1) + 'x0' + m
.group(2)
87 result
= _CUNESCAPE_HEX
.sub(ReplaceHex
, text
)
89 if sys
.version_info
[0] < 3:
91 return result
.decode('string_escape')
92 result
= ''.join(_cescape_highbit_to_str
[ord(c
)] for c
in result
)
93 return (result
.encode('ascii')
94 .decode('unicode_escape')
96 .encode('raw_unicode_escape'))