Merged revisions 79260 via svnmerge from
[python/dscho.git] / Lib / plistlib.py
blobfbba791ce57d7cae57a2c6d6056f48fcf89c2ad6
1 r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
3 The property list (.plist) file format is a simple XML pickle supporting
4 basic object types, like dictionaries, lists, numbers and strings.
5 Usually the top level object is a dictionary.
7 To write out a plist file, use the writePlist(rootObject, pathOrFile)
8 function. 'rootObject' is the top level object, 'pathOrFile' is a
9 filename or a (writable) file object.
11 To parse a plist from a file, use the readPlist(pathOrFile) function,
12 with a file name or a (readable) file object as the only argument. It
13 returns the top level object (again, usually a dictionary).
15 To work with plist data in bytes objects, you can use readPlistFromBytes()
16 and writePlistToBytes().
18 Values can be strings, integers, floats, booleans, tuples, lists,
19 dictionaries (but only with string keys), Data or datetime.datetime objects.
20 String values (including dictionary keys) have to be unicode strings -- they
21 will be written out as UTF-8.
23 The <data> plist type is supported through the Data class. This is a
24 thin wrapper around a Python bytes object. Use 'Data' if your strings
25 contain control characters.
27 Generate Plist example:
29 pl = dict(
30 aString = "Doodah",
31 aList = ["A", "B", 12, 32.1, [1, 2, 3]],
32 aFloat = 0.1,
33 anInt = 728,
34 aDict = dict(
35 anotherString = "<hello & hi there!>",
36 aUnicodeValue = "M\xe4ssig, Ma\xdf",
37 aTrueValue = True,
38 aFalseValue = False,
40 someData = Data(b"<binary gunk>"),
41 someMoreData = Data(b"<lots of binary gunk>" * 10),
42 aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
44 writePlist(pl, fileName)
46 Parse Plist example:
48 pl = readPlist(pathOrFile)
49 print pl["aKey"]
50 """
53 __all__ = [
54 "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
55 "Plist", "Data", "Dict"
57 # Note: the Plist and Dict classes have been deprecated.
59 import binascii
60 import datetime
61 from io import BytesIO
62 import re
65 def readPlist(pathOrFile):
66 """Read a .plist file. 'pathOrFile' may either be a file name or a
67 (readable) file object. Return the unpacked root object (which
68 usually is a dictionary).
69 """
70 didOpen = False
71 if isinstance(pathOrFile, str):
72 pathOrFile = open(pathOrFile, 'rb')
73 didOpen = True
74 p = PlistParser()
75 rootObject = p.parse(pathOrFile)
76 if didOpen:
77 pathOrFile.close()
78 return rootObject
81 def writePlist(rootObject, pathOrFile):
82 """Write 'rootObject' to a .plist file. 'pathOrFile' may either be a
83 file name or a (writable) file object.
84 """
85 didOpen = False
86 if isinstance(pathOrFile, str):
87 pathOrFile = open(pathOrFile, 'wb')
88 didOpen = True
89 writer = PlistWriter(pathOrFile)
90 writer.writeln("<plist version=\"1.0\">")
91 writer.writeValue(rootObject)
92 writer.writeln("</plist>")
93 if didOpen:
94 pathOrFile.close()
97 def readPlistFromBytes(data):
98 """Read a plist data from a bytes object. Return the root object.
99 """
100 return readPlist(BytesIO(data))
103 def writePlistToBytes(rootObject):
104 """Return 'rootObject' as a plist-formatted bytes object.
106 f = BytesIO()
107 writePlist(rootObject, f)
108 return f.getvalue()
111 class DumbXMLWriter:
112 def __init__(self, file, indentLevel=0, indent="\t"):
113 self.file = file
114 self.stack = []
115 self.indentLevel = indentLevel
116 self.indent = indent
118 def beginElement(self, element):
119 self.stack.append(element)
120 self.writeln("<%s>" % element)
121 self.indentLevel += 1
123 def endElement(self, element):
124 assert self.indentLevel > 0
125 assert self.stack.pop() == element
126 self.indentLevel -= 1
127 self.writeln("</%s>" % element)
129 def simpleElement(self, element, value=None):
130 if value is not None:
131 value = _escape(value)
132 self.writeln("<%s>%s</%s>" % (element, value, element))
133 else:
134 self.writeln("<%s/>" % element)
136 def writeln(self, line):
137 if line:
138 # plist has fixed encoding of utf-8
139 if isinstance(line, str):
140 line = line.encode('utf-8')
141 self.file.write(self.indentLevel * self.indent)
142 self.file.write(line)
143 self.file.write(b'\n')
146 # Contents should conform to a subset of ISO 8601
147 # (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units may be omitted with
148 # a loss of precision)
149 _dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
151 def _dateFromString(s):
152 order = ('year', 'month', 'day', 'hour', 'minute', 'second')
153 gd = _dateParser.match(s).groupdict()
154 lst = []
155 for key in order:
156 val = gd[key]
157 if val is None:
158 break
159 lst.append(int(val))
160 return datetime.datetime(*lst)
162 def _dateToString(d):
163 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
164 d.year, d.month, d.day,
165 d.hour, d.minute, d.second
169 # Regex to find any control chars, except for \t \n and \r
170 _controlCharPat = re.compile(
171 r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
172 r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
174 def _escape(text):
175 m = _controlCharPat.search(text)
176 if m is not None:
177 raise ValueError("strings can't contains control characters; "
178 "use plistlib.Data instead")
179 text = text.replace("\r\n", "\n") # convert DOS line endings
180 text = text.replace("\r", "\n") # convert Mac line endings
181 text = text.replace("&", "&amp;") # escape '&'
182 text = text.replace("<", "&lt;") # escape '<'
183 text = text.replace(">", "&gt;") # escape '>'
184 return text
187 PLISTHEADER = b"""\
188 <?xml version="1.0" encoding="UTF-8"?>
189 <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
192 class PlistWriter(DumbXMLWriter):
194 def __init__(self, file, indentLevel=0, indent=b"\t", writeHeader=1):
195 if writeHeader:
196 file.write(PLISTHEADER)
197 DumbXMLWriter.__init__(self, file, indentLevel, indent)
199 def writeValue(self, value):
200 if isinstance(value, str):
201 self.simpleElement("string", value)
202 elif isinstance(value, bool):
203 # must switch for bool before int, as bool is a
204 # subclass of int...
205 if value:
206 self.simpleElement("true")
207 else:
208 self.simpleElement("false")
209 elif isinstance(value, int):
210 self.simpleElement("integer", "%d" % value)
211 elif isinstance(value, float):
212 self.simpleElement("real", repr(value))
213 elif isinstance(value, dict):
214 self.writeDict(value)
215 elif isinstance(value, Data):
216 self.writeData(value)
217 elif isinstance(value, datetime.datetime):
218 self.simpleElement("date", _dateToString(value))
219 elif isinstance(value, (tuple, list)):
220 self.writeArray(value)
221 else:
222 raise TypeError("unsupported type: %s" % type(value))
224 def writeData(self, data):
225 self.beginElement("data")
226 self.indentLevel -= 1
227 maxlinelength = 76 - len(self.indent.replace(b"\t", b" " * 8) *
228 self.indentLevel)
229 for line in data.asBase64(maxlinelength).split(b"\n"):
230 if line:
231 self.writeln(line)
232 self.indentLevel += 1
233 self.endElement("data")
235 def writeDict(self, d):
236 self.beginElement("dict")
237 items = sorted(d.items())
238 for key, value in items:
239 if not isinstance(key, str):
240 raise TypeError("keys must be strings")
241 self.simpleElement("key", key)
242 self.writeValue(value)
243 self.endElement("dict")
245 def writeArray(self, array):
246 self.beginElement("array")
247 for value in array:
248 self.writeValue(value)
249 self.endElement("array")
252 class _InternalDict(dict):
254 # This class is needed while Dict is scheduled for deprecation:
255 # we only need to warn when a *user* instantiates Dict or when
256 # the "attribute notation for dict keys" is used.
258 def __getattr__(self, attr):
259 try:
260 value = self[attr]
261 except KeyError:
262 raise AttributeError(attr)
263 from warnings import warn
264 warn("Attribute access from plist dicts is deprecated, use d[key] "
265 "notation instead", PendingDeprecationWarning, 2)
266 return value
268 def __setattr__(self, attr, value):
269 from warnings import warn
270 warn("Attribute access from plist dicts is deprecated, use d[key] "
271 "notation instead", PendingDeprecationWarning, 2)
272 self[attr] = value
274 def __delattr__(self, attr):
275 try:
276 del self[attr]
277 except KeyError:
278 raise AttributeError(attr)
279 from warnings import warn
280 warn("Attribute access from plist dicts is deprecated, use d[key] "
281 "notation instead", PendingDeprecationWarning, 2)
283 class Dict(_InternalDict):
285 def __init__(self, **kwargs):
286 from warnings import warn
287 warn("The plistlib.Dict class is deprecated, use builtin dict instead",
288 PendingDeprecationWarning, 2)
289 super().__init__(**kwargs)
292 class Plist(_InternalDict):
294 """This class has been deprecated. Use readPlist() and writePlist()
295 functions instead, together with regular dict objects.
298 def __init__(self, **kwargs):
299 from warnings import warn
300 warn("The Plist class is deprecated, use the readPlist() and "
301 "writePlist() functions instead", PendingDeprecationWarning, 2)
302 super().__init__(**kwargs)
304 def fromFile(cls, pathOrFile):
305 """Deprecated. Use the readPlist() function instead."""
306 rootObject = readPlist(pathOrFile)
307 plist = cls()
308 plist.update(rootObject)
309 return plist
310 fromFile = classmethod(fromFile)
312 def write(self, pathOrFile):
313 """Deprecated. Use the writePlist() function instead."""
314 writePlist(self, pathOrFile)
317 def _encodeBase64(s, maxlinelength=76):
318 # copied from base64.encodebytes(), with added maxlinelength argument
319 maxbinsize = (maxlinelength//4)*3
320 pieces = []
321 for i in range(0, len(s), maxbinsize):
322 chunk = s[i : i + maxbinsize]
323 pieces.append(binascii.b2a_base64(chunk))
324 return b''.join(pieces)
326 class Data:
328 """Wrapper for binary data."""
330 def __init__(self, data):
331 if not isinstance(data, bytes):
332 raise TypeError("data must be as bytes")
333 self.data = data
335 @classmethod
336 def fromBase64(cls, data):
337 # base64.decodebytes just calls binascii.a2b_base64;
338 # it seems overkill to use both base64 and binascii.
339 return cls(binascii.a2b_base64(data))
341 def asBase64(self, maxlinelength=76):
342 return _encodeBase64(self.data, maxlinelength)
344 def __eq__(self, other):
345 if isinstance(other, self.__class__):
346 return self.data == other.data
347 elif isinstance(other, str):
348 return self.data == other
349 else:
350 return id(self) == id(other)
352 def __repr__(self):
353 return "%s(%s)" % (self.__class__.__name__, repr(self.data))
356 class PlistParser:
358 def __init__(self):
359 self.stack = []
360 self.currentKey = None
361 self.root = None
363 def parse(self, fileobj):
364 from xml.parsers.expat import ParserCreate
365 parser = ParserCreate()
366 parser.StartElementHandler = self.handleBeginElement
367 parser.EndElementHandler = self.handleEndElement
368 parser.CharacterDataHandler = self.handleData
369 parser.ParseFile(fileobj)
370 return self.root
372 def handleBeginElement(self, element, attrs):
373 self.data = []
374 handler = getattr(self, "begin_" + element, None)
375 if handler is not None:
376 handler(attrs)
378 def handleEndElement(self, element):
379 handler = getattr(self, "end_" + element, None)
380 if handler is not None:
381 handler()
383 def handleData(self, data):
384 self.data.append(data)
386 def addObject(self, value):
387 if self.currentKey is not None:
388 self.stack[-1][self.currentKey] = value
389 self.currentKey = None
390 elif not self.stack:
391 # this is the root object
392 self.root = value
393 else:
394 self.stack[-1].append(value)
396 def getData(self):
397 data = ''.join(self.data)
398 self.data = []
399 return data
401 # element handlers
403 def begin_dict(self, attrs):
404 d = _InternalDict()
405 self.addObject(d)
406 self.stack.append(d)
407 def end_dict(self):
408 self.stack.pop()
410 def end_key(self):
411 self.currentKey = self.getData()
413 def begin_array(self, attrs):
414 a = []
415 self.addObject(a)
416 self.stack.append(a)
417 def end_array(self):
418 self.stack.pop()
420 def end_true(self):
421 self.addObject(True)
422 def end_false(self):
423 self.addObject(False)
424 def end_integer(self):
425 self.addObject(int(self.getData()))
426 def end_real(self):
427 self.addObject(float(self.getData()))
428 def end_string(self):
429 self.addObject(self.getData())
430 def end_data(self):
431 self.addObject(Data.fromBase64(self.getData().encode("utf-8")))
432 def end_date(self):
433 self.addObject(_dateFromString(self.getData()))