App Engine Python SDK version 1.8.9
[gae.git] / python / google / net / proto2 / python / public / text_encoding.py
blob418141ef748004f79c6986155c32b201394117d1
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
19 """Encoding related utilities."""
21 import re
22 import sys
25 _cescape_utf8_to_str = [chr(i) for i in xrange(0, 256)]
26 _cescape_utf8_to_str[9] = r'\t'
27 _cescape_utf8_to_str[10] = r'\n'
28 _cescape_utf8_to_str[13] = r'\r'
29 _cescape_utf8_to_str[39] = r"\'"
31 _cescape_utf8_to_str[34] = r'\"'
32 _cescape_utf8_to_str[92] = r'\\'
35 _cescape_byte_to_str = ([r'\%03o' % i for i in xrange(0, 32)] +
36 [chr(i) for i in xrange(32, 127)] +
37 [r'\%03o' % i for i in xrange(127, 256)])
38 _cescape_byte_to_str[9] = r'\t'
39 _cescape_byte_to_str[10] = r'\n'
40 _cescape_byte_to_str[13] = r'\r'
41 _cescape_byte_to_str[39] = r"\'"
43 _cescape_byte_to_str[34] = r'\"'
44 _cescape_byte_to_str[92] = r'\\'
47 def CEscape(text, as_utf8):
48 """Escape a bytes string for use in an ascii protocol buffer.
50 text.encode('string_escape') does not seem to satisfy our needs as it
51 encodes unprintable characters using two-digit hex escapes whereas our
52 C++ unescaping function allows hex escapes to be any length. So,
53 "\0011".encode('string_escape') ends up being "\\x011", which will be
54 decoded in C++ as a single-character string with char code 0x11.
56 Args:
57 text: A byte string to be escaped
58 as_utf8: Specifies if result should be returned in UTF-8 encoding
59 Returns:
60 Escaped string
61 """
64 Ord = ord if isinstance(text, basestring) else lambda x: x
65 if as_utf8:
66 return ''.join(_cescape_utf8_to_str[Ord(c)] for c in text)
67 return ''.join(_cescape_byte_to_str[Ord(c)] for c in text)
70 _CUNESCAPE_HEX = re.compile(r'(\\+)x([0-9a-fA-F])(?![0-9a-fA-F])')
71 _cescape_highbit_to_str = ([chr(i) for i in range(0, 127)] +
72 [r'\%03o' % i for i in range(127, 256)])
75 def CUnescape(text):
76 """Unescape a text string with C-style escape sequences to UTF-8 bytes."""
78 def ReplaceHex(m):
81 if len(m.group(1)) & 1:
82 return m.group(1) + 'x0' + m.group(2)
83 return m.group(0)
87 result = _CUNESCAPE_HEX.sub(ReplaceHex, text)
89 if sys.version_info[0] < 3:
91 return result.decode('string_escape')
92 result = ''.join(_cescape_highbit_to_str[ord(c)] for c in result)
93 return (result.encode('ascii')
94 .decode('unicode_escape')
96 .encode('raw_unicode_escape'))