1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 # TODO: Eventually consolidate with mozpack.pkg module. This is kept separate
6 # for now because of the vast difference in API, and to avoid churn for the
7 # users of this module (docker images, macos SDK artifacts) when changes are
8 # necessary in mozpack.pkg
10 import concurrent
.futures
16 from collections
import deque
, namedtuple
17 from xml
.etree
.ElementTree
import XML
20 class ZlibFile(object):
21 def __init__(self
, fileobj
):
22 self
.fileobj
= fileobj
23 self
.decompressor
= zlib
.decompressobj()
26 def read(self
, length
):
27 cutoff
= min(length
, len(self
.buf
))
28 result
= self
.buf
[:cutoff
]
29 self
.buf
= self
.buf
[cutoff
:]
30 while len(result
) < length
:
31 buf
= self
.fileobj
.read(io
.DEFAULT_BUFFER_SIZE
)
34 buf
= self
.decompressor
.decompress(buf
)
35 cutoff
= min(length
- len(result
), len(buf
))
36 result
+= buf
[:cutoff
]
37 self
.buf
+= buf
[cutoff
:]
42 magic
= fileobj
.read(4)
44 raise Exception("Not a XAR?")
46 header_size
= fileobj
.read(2)
47 header_size
= struct
.unpack(">H", header_size
)[0]
50 f
"Don't know how to handle a {header_size} bytes XAR header size"
52 header_size
-= 6 # what we've read so far.
53 header
= fileobj
.read(header_size
)
54 if len(header
) != header_size
:
55 raise Exception("Failed to read XAR header")
61 ) = struct
.unpack(">HQQL", header
[:22])
63 raise Exception(f
"XAR version {version} not supported")
64 toc
= fileobj
.read(compressed_toc_len
)
66 if len(toc
) != compressed_toc_len
:
67 raise Exception("Failed to read XAR TOC")
68 toc
= zlib
.decompress(toc
)
69 if len(toc
) != uncompressed_toc_len
:
70 raise Exception("Corrupted XAR?")
71 toc
= XML(toc
).find("toc")
72 queue
= deque(toc
.findall("file"))
75 queue
.extend(f
.iterfind("file"))
76 if f
.find("type").text
!= "file":
78 filename
= f
.find("name").text
80 length
= int(data
.find("length").text
)
81 size
= int(data
.find("size").text
)
82 offset
= int(data
.find("offset").text
)
83 encoding
= data
.find("encoding").get("style")
84 fileobj
.seek(base
+ offset
, os
.SEEK_SET
)
85 content
= Take(fileobj
, length
)
86 if encoding
== "application/octet-stream":
88 raise Exception(f
"{length} != {size}")
89 elif encoding
== "application/x-bzip2":
90 content
= bz2
.BZ2File(content
)
91 elif encoding
== "application/x-gzip":
92 # Despite the encoding saying gzip, it is in fact, a raw zlib stream.
93 content
= ZlibFile(content
)
95 raise Exception(f
"XAR encoding {encoding} not supported")
97 yield filename
, content
101 def __init__(self
, fileobj
):
102 magic
= fileobj
.read(4)
104 raise Exception("Not a PBZX payload?")
105 # The first thing in the file looks like the size of each
106 # decompressed chunk except the last one. It should match
107 # decompressed_size in all cases except last, but we don't
109 chunk_size
= fileobj
.read(8)
110 chunk_size
= struct
.unpack(">Q", chunk_size
)[0]
111 executor
= concurrent
.futures
.ThreadPoolExecutor(max_workers
=os
.cpu_count())
112 self
.chunk_getter
= executor
.map(self
._uncompress
_chunk
, self
._chunker
(fileobj
))
113 self
._init
_one
_chunk
()
116 def _chunker(fileobj
):
118 header
= fileobj
.read(16)
121 if len(header
) != 16:
122 raise Exception("Corrupted PBZX payload?")
123 decompressed_size
, compressed_size
= struct
.unpack(">QQ", header
)
124 chunk
= fileobj
.read(compressed_size
)
125 yield decompressed_size
, compressed_size
, chunk
128 def _uncompress_chunk(data
):
129 decompressed_size
, compressed_size
, chunk
= data
130 if compressed_size
!= decompressed_size
:
131 chunk
= lzma
.decompress(chunk
)
132 if len(chunk
) != decompressed_size
:
133 raise Exception("Corrupted PBZX payload?")
136 def _init_one_chunk(self
):
138 self
.chunk
= next(self
.chunk_getter
, "")
140 def read(self
, length
=None):
143 if length
and len(self
.chunk
) >= self
.offset
+ length
:
145 self
.offset
+= length
146 return self
.chunk
[start
: self
.offset
]
148 result
= self
.chunk
[self
.offset
:]
149 self
._init
_one
_chunk
()
151 # XXX: suboptimal if length is larger than the chunk size
152 result
+= self
.read(None if length
is None else length
- len(result
))
158 File object wrapper that allows to read at most a certain length.
161 def __init__(self
, fileobj
, limit
):
162 self
.fileobj
= fileobj
165 def read(self
, length
=None):
169 length
= min(length
, self
.limit
)
170 result
= self
.fileobj
.read(length
)
171 self
.limit
-= len(result
)
175 CpioInfo
= namedtuple("CpioInfo", ["mode", "nlink", "dev", "ino"])
180 magic
= fileobj
.read(6)
181 # CPIO payloads in mac pkg files are using the portable ASCII format.
182 if magic
!= b
"070707":
183 if magic
.startswith(b
"0707"):
184 raise Exception("Unsupported CPIO format")
185 raise Exception("Not a CPIO header")
186 header
= fileobj
.read(70)
198 ) = struct
.unpack(">6s6s6s6s6s6s6s11s6s11s", header
)
202 nlink
= int(nlink
, 8)
203 namesize
= int(namesize
, 8)
204 filesize
= int(filesize
, 8)
205 name
= fileobj
.read(namesize
)
207 raise Exception("File name is not NUL terminated")
209 if name
== b
"TRAILER!!!":
212 if b
"/../" in name
or name
.startswith(b
"../") or name
== b
"..":
213 raise Exception(".. is forbidden in file name")
214 if name
.startswith(b
"."):
216 if name
.startswith(b
"/"):
218 content
= Take(fileobj
, filesize
)
219 yield name
, CpioInfo(mode
=mode
, nlink
=nlink
, dev
=dev
, ino
=ino
), content
220 # Ensure the content is totally consumed
221 while content
.read(4096):