Initialized merge tracking via "svnmerge" with revisions "1-73579" from
[python/dscho.git] / Lib / plistlib.py
blob5460e2b5dfecf75f71c2f7519c20636f83367599
1 r"""plistlib.py -- a tool to generate and parse MacOSX .plist files.
3 The PropertList (.plist) file format is a simple XML pickle supporting
4 basic object types, like dictionaries, lists, numbers and strings.
5 Usually the top level object is a dictionary.
7 To write out a plist file, use the writePlist(rootObject, pathOrFile)
8 function. 'rootObject' is the top level object, 'pathOrFile' is a
9 filename or a (writable) file object.
11 To parse a plist from a file, use the readPlist(pathOrFile) function,
12 with a file name or a (readable) file object as the only argument. It
13 returns the top level object (again, usually a dictionary).
15 To work with plist data in bytes objects, you can use readPlistFromBytes()
16 and writePlistToBytes().
18 Values can be strings, integers, floats, booleans, tuples, lists,
19 dictionaries, Data or datetime.datetime objects. String values (including
20 dictionary keys) may be unicode strings -- they will be written out as
21 UTF-8.
23 The <data> plist type is supported through the Data class. This is a
24 thin wrapper around a Python bytes object.
26 Generate Plist example:
28 pl = dict(
29 aString="Doodah",
30 aList=["A", "B", 12, 32.1, [1, 2, 3]],
31 aFloat = 0.1,
32 anInt = 728,
33 aDict=dict(
34 anotherString="<hello & hi there!>",
35 aUnicodeValue=u'M\xe4ssig, Ma\xdf',
36 aTrueValue=True,
37 aFalseValue=False,
39 someData = Data(b"<binary gunk>"),
40 someMoreData = Data(b"<lots of binary gunk>" * 10),
41 aDate = datetime.datetime.fromtimestamp(time.mktime(time.gmtime())),
43 # unicode keys are possible, but a little awkward to use:
44 pl[u'\xc5benraa'] = "That was a unicode key."
45 writePlist(pl, fileName)
47 Parse Plist example:
49 pl = readPlist(pathOrFile)
50 print pl["aKey"]
51 """
54 __all__ = [
55 "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes",
56 "Plist", "Data", "Dict"
58 # Note: the Plist and Dict classes have been deprecated.
60 import binascii
61 import datetime
62 from io import BytesIO
63 import re
66 def readPlist(pathOrFile):
67 """Read a .plist file. 'pathOrFile' may either be a file name or a
68 (readable) file object. Return the unpacked root object (which
69 usually is a dictionary).
70 """
71 didOpen = False
72 if isinstance(pathOrFile, str):
73 pathOrFile = open(pathOrFile, 'rb')
74 didOpen = True
75 p = PlistParser()
76 rootObject = p.parse(pathOrFile)
77 if didOpen:
78 pathOrFile.close()
79 return rootObject
82 def writePlist(rootObject, pathOrFile):
83 """Write 'rootObject' to a .plist file. 'pathOrFile' may either be a
84 file name or a (writable) file object.
85 """
86 didOpen = False
87 if isinstance(pathOrFile, str):
88 pathOrFile = open(pathOrFile, 'wb')
89 didOpen = True
90 writer = PlistWriter(pathOrFile)
91 writer.writeln("<plist version=\"1.0\">")
92 writer.writeValue(rootObject)
93 writer.writeln("</plist>")
94 if didOpen:
95 pathOrFile.close()
98 def readPlistFromBytes(data):
99 """Read a plist data from a bytes object. Return the root object.
101 return readPlist(BytesIO(data))
104 def writePlistToBytes(rootObject):
105 """Return 'rootObject' as a plist-formatted bytes object.
107 f = BytesIO()
108 writePlist(rootObject, f)
109 return f.getvalue()
112 class DumbXMLWriter:
113 def __init__(self, file, indentLevel=0, indent="\t"):
114 self.file = file
115 self.stack = []
116 self.indentLevel = indentLevel
117 self.indent = indent
119 def beginElement(self, element):
120 self.stack.append(element)
121 self.writeln("<%s>" % element)
122 self.indentLevel += 1
124 def endElement(self, element):
125 assert self.indentLevel > 0
126 assert self.stack.pop() == element
127 self.indentLevel -= 1
128 self.writeln("</%s>" % element)
130 def simpleElement(self, element, value=None):
131 if value is not None:
132 value = _escape(value)
133 self.writeln("<%s>%s</%s>" % (element, value, element))
134 else:
135 self.writeln("<%s/>" % element)
137 def writeln(self, line):
138 if line:
139 # plist has fixed encoding of utf-8
140 if isinstance(line, str):
141 line = line.encode('utf-8')
142 self.file.write(self.indentLevel * self.indent)
143 self.file.write(line)
144 self.file.write(b'\n')
147 # Contents should conform to a subset of ISO 8601
148 # (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units may be omitted with
149 # a loss of precision)
150 _dateParser = re.compile(r"(?P<year>\d\d\d\d)(?:-(?P<month>\d\d)(?:-(?P<day>\d\d)(?:T(?P<hour>\d\d)(?::(?P<minute>\d\d)(?::(?P<second>\d\d))?)?)?)?)?Z", re.ASCII)
152 def _dateFromString(s):
153 order = ('year', 'month', 'day', 'hour', 'minute', 'second')
154 gd = _dateParser.match(s).groupdict()
155 lst = []
156 for key in order:
157 val = gd[key]
158 if val is None:
159 break
160 lst.append(int(val))
161 return datetime.datetime(*lst)
163 def _dateToString(d):
164 return '%04d-%02d-%02dT%02d:%02d:%02dZ' % (
165 d.year, d.month, d.day,
166 d.hour, d.minute, d.second
170 # Regex to find any control chars, except for \t \n and \r
171 _controlCharPat = re.compile(
172 r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f"
173 r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]")
175 def _escape(text):
176 m = _controlCharPat.search(text)
177 if m is not None:
178 raise ValueError("strings can't contains control characters; "
179 "use plistlib.Data instead")
180 text = text.replace("\r\n", "\n") # convert DOS line endings
181 text = text.replace("\r", "\n") # convert Mac line endings
182 text = text.replace("&", "&amp;") # escape '&'
183 text = text.replace("<", "&lt;") # escape '<'
184 text = text.replace(">", "&gt;") # escape '>'
185 return text
188 PLISTHEADER = b"""\
189 <?xml version="1.0" encoding="UTF-8"?>
190 <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
193 class PlistWriter(DumbXMLWriter):
195 def __init__(self, file, indentLevel=0, indent=b"\t", writeHeader=1):
196 if writeHeader:
197 file.write(PLISTHEADER)
198 DumbXMLWriter.__init__(self, file, indentLevel, indent)
200 def writeValue(self, value):
201 if isinstance(value, str):
202 self.simpleElement("string", value)
203 elif isinstance(value, bool):
204 # must switch for bool before int, as bool is a
205 # subclass of int...
206 if value:
207 self.simpleElement("true")
208 else:
209 self.simpleElement("false")
210 elif isinstance(value, int):
211 self.simpleElement("integer", "%d" % value)
212 elif isinstance(value, float):
213 self.simpleElement("real", repr(value))
214 elif isinstance(value, dict):
215 self.writeDict(value)
216 elif isinstance(value, Data):
217 self.writeData(value)
218 elif isinstance(value, datetime.datetime):
219 self.simpleElement("date", _dateToString(value))
220 elif isinstance(value, (tuple, list)):
221 self.writeArray(value)
222 else:
223 raise TypeError("unsuported type: %s" % type(value))
225 def writeData(self, data):
226 self.beginElement("data")
227 self.indentLevel -= 1
228 maxlinelength = 76 - len(self.indent.replace(b"\t", b" " * 8) *
229 self.indentLevel)
230 for line in data.asBase64(maxlinelength).split(b"\n"):
231 if line:
232 self.writeln(line)
233 self.indentLevel += 1
234 self.endElement("data")
236 def writeDict(self, d):
237 self.beginElement("dict")
238 items = sorted(d.items())
239 for key, value in items:
240 if not isinstance(key, str):
241 raise TypeError("keys must be strings")
242 self.simpleElement("key", key)
243 self.writeValue(value)
244 self.endElement("dict")
246 def writeArray(self, array):
247 self.beginElement("array")
248 for value in array:
249 self.writeValue(value)
250 self.endElement("array")
253 class _InternalDict(dict):
255 # This class is needed while Dict is scheduled for deprecation:
256 # we only need to warn when a *user* instantiates Dict or when
257 # the "attribute notation for dict keys" is used.
259 def __getattr__(self, attr):
260 try:
261 value = self[attr]
262 except KeyError:
263 raise AttributeError(attr)
264 from warnings import warn
265 warn("Attribute access from plist dicts is deprecated, use d[key] "
266 "notation instead", PendingDeprecationWarning, 2)
267 return value
269 def __setattr__(self, attr, value):
270 from warnings import warn
271 warn("Attribute access from plist dicts is deprecated, use d[key] "
272 "notation instead", PendingDeprecationWarning, 2)
273 self[attr] = value
275 def __delattr__(self, attr):
276 try:
277 del self[attr]
278 except KeyError:
279 raise AttributeError(attr)
280 from warnings import warn
281 warn("Attribute access from plist dicts is deprecated, use d[key] "
282 "notation instead", PendingDeprecationWarning, 2)
284 class Dict(_InternalDict):
286 def __init__(self, **kwargs):
287 from warnings import warn
288 warn("The plistlib.Dict class is deprecated, use builtin dict instead",
289 PendingDeprecationWarning, 2)
290 super().__init__(**kwargs)
293 class Plist(_InternalDict):
295 """This class has been deprecated. Use readPlist() and writePlist()
296 functions instead, together with regular dict objects.
299 def __init__(self, **kwargs):
300 from warnings import warn
301 warn("The Plist class is deprecated, use the readPlist() and "
302 "writePlist() functions instead", PendingDeprecationWarning, 2)
303 super().__init__(**kwargs)
305 def fromFile(cls, pathOrFile):
306 """Deprecated. Use the readPlist() function instead."""
307 rootObject = readPlist(pathOrFile)
308 plist = cls()
309 plist.update(rootObject)
310 return plist
311 fromFile = classmethod(fromFile)
313 def write(self, pathOrFile):
314 """Deprecated. Use the writePlist() function instead."""
315 writePlist(self, pathOrFile)
318 def _encodeBase64(s, maxlinelength=76):
319 # copied from base64.encodebytes(), with added maxlinelength argument
320 maxbinsize = (maxlinelength//4)*3
321 pieces = []
322 for i in range(0, len(s), maxbinsize):
323 chunk = s[i : i + maxbinsize]
324 pieces.append(binascii.b2a_base64(chunk))
325 return b''.join(pieces)
327 class Data:
329 """Wrapper for binary data."""
331 def __init__(self, data):
332 if not isinstance(data, bytes):
333 raise TypeError("data must be as bytes")
334 self.data = data
336 @classmethod
337 def fromBase64(cls, data):
338 # base64.decodebytes just calls binascii.a2b_base64;
339 # it seems overkill to use both base64 and binascii.
340 return cls(binascii.a2b_base64(data))
342 def asBase64(self, maxlinelength=76):
343 return _encodeBase64(self.data, maxlinelength)
345 def __eq__(self, other):
346 if isinstance(other, self.__class__):
347 return self.data == other.data
348 elif isinstance(other, str):
349 return self.data == other
350 else:
351 return id(self) == id(other)
353 def __repr__(self):
354 return "%s(%s)" % (self.__class__.__name__, repr(self.data))
357 class PlistParser:
359 def __init__(self):
360 self.stack = []
361 self.currentKey = None
362 self.root = None
364 def parse(self, fileobj):
365 from xml.parsers.expat import ParserCreate
366 parser = ParserCreate()
367 parser.StartElementHandler = self.handleBeginElement
368 parser.EndElementHandler = self.handleEndElement
369 parser.CharacterDataHandler = self.handleData
370 parser.ParseFile(fileobj)
371 return self.root
373 def handleBeginElement(self, element, attrs):
374 self.data = []
375 handler = getattr(self, "begin_" + element, None)
376 if handler is not None:
377 handler(attrs)
379 def handleEndElement(self, element):
380 handler = getattr(self, "end_" + element, None)
381 if handler is not None:
382 handler()
384 def handleData(self, data):
385 self.data.append(data)
387 def addObject(self, value):
388 if self.currentKey is not None:
389 self.stack[-1][self.currentKey] = value
390 self.currentKey = None
391 elif not self.stack:
392 # this is the root object
393 self.root = value
394 else:
395 self.stack[-1].append(value)
397 def getData(self):
398 data = ''.join(self.data)
399 self.data = []
400 return data
402 # element handlers
404 def begin_dict(self, attrs):
405 d = _InternalDict()
406 self.addObject(d)
407 self.stack.append(d)
408 def end_dict(self):
409 self.stack.pop()
411 def end_key(self):
412 self.currentKey = self.getData()
414 def begin_array(self, attrs):
415 a = []
416 self.addObject(a)
417 self.stack.append(a)
418 def end_array(self):
419 self.stack.pop()
421 def end_true(self):
422 self.addObject(True)
423 def end_false(self):
424 self.addObject(False)
425 def end_integer(self):
426 self.addObject(int(self.getData()))
427 def end_real(self):
428 self.addObject(float(self.getData()))
429 def end_string(self):
430 self.addObject(self.getData())
431 def end_data(self):
432 self.addObject(Data.fromBase64(self.getData().encode("utf-8")))
433 def end_date(self):
434 self.addObject(_dateFromString(self.getData()))