1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef nsZipArchive_h_
7 #define nsZipArchive_h_
9 #include "mozilla/Attributes.h"
11 #define ZIP_TABSIZE 256
13 (4 * 1024) /* Used as output buffer when deflating items to a file */
16 #include "zipstruct.h"
18 #include "nsISupportsImpl.h" // For mozilla::ThreadSafeAutoRefCnt
19 #include "mozilla/ArenaAllocator.h"
20 #include "mozilla/FileUtils.h"
21 #include "mozilla/FileLocation.h"
22 #include "mozilla/Mutex.h"
23 #include "mozilla/UniquePtr.h"
29 * This file defines some of the basic structures used by libjar to
30 * read Zip files. It makes use of zlib in order to do the decompression.
32 * A few notes on the classes/structs:
33 * nsZipArchive represents a single Zip file, and maintains an index
34 * of all the items in the file.
35 * nsZipItem represents a single item (file) in the Zip archive.
36 * nsZipFind represents the metadata involved in doing a search,
37 * and current state of the iteration of found objects.
38 * 'MT''safe' reading from the zipfile is performed through JARInputStream,
39 * which maintains its own file descriptor, allowing for multiple reads
40 * concurrently from the same zip file.
42 * nsZipArchives are accessed from multiple threads.
46 * nsZipItem -- a helper struct for nsZipArchive
48 * each nsZipItem represents one file in the archive and all the
49 * information needed to manipulate it.
51 class nsZipItem final
{
55 const char* Name() { return ((const char*)central
) + ZIPCENTRAL_SIZE
; }
57 uint32_t LocalOffset();
63 uint16_t Compression();
66 const uint8_t* GetExtraField(uint16_t aTag
, uint16_t* aBlockSize
);
70 const ZipCentral
* central
;
78 * nsZipArchive -- a class for reading the PKZIP file format.
81 class nsZipArchive final
{
82 friend class nsZipFind
;
84 /** destructing the object closes the archive */
88 static const char* sFileCorruptedReason
;
93 * @param aZipHandle The nsZipHandle used to access the zip
94 * @param aFd Optional PRFileDesc for Windows readahead optimization
97 static already_AddRefed
<nsZipArchive
> OpenArchive(nsZipHandle
* aZipHandle
,
98 PRFileDesc
* aFd
= nullptr);
103 * Convenience function that generates nsZipHandle
105 * @param aFile The file used to access the zip
106 * @return status code
108 static already_AddRefed
<nsZipArchive
> OpenArchive(nsIFile
* aFile
);
111 * Test the integrity of items in this archive by running
112 * a CRC check after extracting each item into a memory
113 * buffer. If an entry name is supplied only the
114 * specified item is tested. Else, if null is supplied
115 * then all the items in the archive are tested.
117 * @return status code
119 nsresult
Test(const char* aEntryName
);
123 * @param aEntryName Name of file in the archive
124 * @return pointer to nsZipItem
126 nsZipItem
* GetItem(const char* aEntryName
);
131 * @param zipEntry Name of file in archive to extract
132 * @param outFD Filedescriptor to write contents to
133 * @param outname Name of file to write to
134 * @return status code
136 nsresult
ExtractFile(nsZipItem
* zipEntry
, nsIFile
* outFile
,
142 * Initializes a search for files in the archive. FindNext() returns
143 * the actual matches. The nsZipFind must be deleted when you're done
145 * @param aPattern a string or RegExp pattern to search for
146 * (may be nullptr to find all files in archive)
147 * @param aFind a pointer to a pointer to a structure used
148 * in FindNext. In the case of an error this
149 * will be set to nullptr.
150 * @return status code
152 nsresult
FindInit(const char* aPattern
, nsZipFind
** aFind
);
155 * Gets an undependent handle to the mapped file.
157 nsZipHandle
* GetFD() const;
160 * Gets the data offset.
161 * @param aItem Pointer to nsZipItem
162 * returns 0 on failure.
164 uint32_t GetDataOffset(nsZipItem
* aItem
);
167 * Get pointer to the data of the item.
168 * @param aItem Pointer to nsZipItem
169 * reutrns null when zip file is corrupt.
171 const uint8_t* GetData(nsZipItem
* aItem
);
174 * Gets the amount of memory taken up by the archive's mapping.
177 int64_t SizeOfMapping();
182 NS_METHOD_(MozExternalRefCountType
) AddRef(void);
183 NS_METHOD_(MozExternalRefCountType
) Release(void);
186 nsZipArchive(nsZipHandle
* aZipHandle
, PRFileDesc
* aFd
, nsresult
& aRv
);
188 //--- private members ---
189 mozilla::ThreadSafeAutoRefCnt mRefCnt
; /* ref count */
192 // These fields are all effectively const after the constructor
194 const RefPtr
<nsZipHandle
> mFd
;
195 // file URI, for logging
197 // Is true if we use zipLog to log accesses in jar/zip archives. This helper
198 // variable avoids grabbing zipLog's lock when not necessary.
199 // Effectively const after constructor
202 mozilla::Mutex mLock
{"nsZipArchive"};
203 // all of the following members are guarded by mLock:
204 nsZipItem
* mFiles
[ZIP_TABSIZE
] MOZ_GUARDED_BY(mLock
);
205 mozilla::ArenaAllocator
<1024, sizeof(void*)> mArena
MOZ_GUARDED_BY(mLock
);
206 // Whether we synthesized the directory entries
207 bool mBuiltSynthetics
MOZ_GUARDED_BY(mLock
);
210 //--- private methods ---
211 nsZipItem
* CreateZipItem() MOZ_REQUIRES(mLock
);
212 nsresult
BuildFileList(PRFileDesc
* aFd
= nullptr);
213 nsresult
BuildSynthetics();
215 nsZipArchive
& operator=(const nsZipArchive
& rhs
) = delete;
216 nsZipArchive(const nsZipArchive
& rhs
) = delete;
222 * a helper class for nsZipArchive, representing a search
224 class nsZipFind final
{
226 nsZipFind(nsZipArchive
* aZip
, char* aPattern
, bool regExp
);
229 nsresult
FindNext(const char** aResult
, uint16_t* aNameLen
);
232 RefPtr
<nsZipArchive
> mArchive
;
238 nsZipFind
& operator=(const nsZipFind
& rhs
) = delete;
239 nsZipFind(const nsZipFind
& rhs
) = delete;
243 * nsZipCursor -- a low-level class for reading the individual items in a zip.
245 class nsZipCursor final
{
248 * Initializes the cursor
250 * @param aItem Item of interest
251 * @param aZip Archive
252 * @param aBuf Buffer used for decompression.
253 * This determines the maximum Read() size in the
255 * @param aBufSize Buffer size
256 * @param doCRC When set to true Read() will check crc
258 nsZipCursor(nsZipItem
* aItem
, nsZipArchive
* aZip
, uint8_t* aBuf
= nullptr,
259 uint32_t aBufSize
= 0, bool doCRC
= false);
264 * Performs reads. In the compressed case it uses aBuf(passed in constructor),
265 * for stored files it returns a zero-copy buffer.
267 * @param aBytesRead Outparam for number of bytes read.
268 * @return data read or nullptr if item is corrupted.
270 uint8_t* Read(uint32_t* aBytesRead
) { return ReadOrCopy(aBytesRead
, false); }
273 * Performs a copy. It always uses aBuf(passed in constructor).
275 * @param aBytesRead Outparam for number of bytes read.
276 * @return data read or nullptr if item is corrupted.
278 uint8_t* Copy(uint32_t* aBytesRead
) { return ReadOrCopy(aBytesRead
, true); }
281 /* Actual implementation for both Read and Copy above */
282 uint8_t* ReadOrCopy(uint32_t* aBytesRead
, bool aCopy
);
293 * nsZipItemPtr - a RAII convenience class for reading the individual items in a
294 * zip. It reads whole files and does zero-copy IO for stored files. A buffer is
295 * allocated for decompression. Do not use when the file may be very large.
297 class nsZipItemPtr_base
{
300 * Initializes the reader
302 * @param aZip Archive
303 * @param aEntryName Archive membername
304 * @param doCRC When set to true Read() will check crc
306 nsZipItemPtr_base(nsZipArchive
* aZip
, const char* aEntryName
, bool doCRC
);
308 uint32_t Length() const { return mReadlen
; }
311 RefPtr
<nsZipHandle
> mZipHandle
;
312 mozilla::UniquePtr
<uint8_t[]> mAutoBuf
;
318 class nsZipItemPtr final
: public nsZipItemPtr_base
{
319 static_assert(sizeof(T
) == sizeof(char),
320 "This class cannot be used with larger T without re-examining"
321 " a number of assumptions.");
324 nsZipItemPtr(nsZipArchive
* aZip
, const char* aEntryName
, bool doCRC
= false)
325 : nsZipItemPtr_base(aZip
, aEntryName
, doCRC
) {}
327 * @return buffer containing the whole zip member or nullptr on error.
328 * The returned buffer is owned by nsZipItemReader.
330 const T
* Buffer() const { return (const T
*)mReturnBuf
; }
332 operator const T
*() const { return Buffer(); }
335 * Relinquish ownership of zip member if compressed.
336 * Copy member into a new buffer if uncompressed.
337 * @return a buffer with whole zip member. It is caller's responsibility to
340 mozilla::UniquePtr
<T
[]> Forget() {
341 if (!mReturnBuf
) return nullptr;
342 // In uncompressed mmap case, give up buffer
343 if (mAutoBuf
.get() == mReturnBuf
) {
344 mReturnBuf
= nullptr;
345 return mozilla::UniquePtr
<T
[]>(reinterpret_cast<T
*>(mAutoBuf
.release()));
347 auto ret
= mozilla::MakeUnique
<T
[]>(Length());
348 memcpy(ret
.get(), mReturnBuf
, Length());
349 mReturnBuf
= nullptr;
354 class nsZipHandle final
{
355 friend class nsZipArchive
;
356 friend class nsZipFind
;
357 friend class mozilla::FileLocation
;
358 friend class nsJARInputStream
;
359 #if defined(XP_UNIX) && !defined(XP_DARWIN)
360 friend class MmapAccessScope
;
364 static nsresult
Init(nsIFile
* file
, nsZipHandle
** ret
,
365 PRFileDesc
** aFd
= nullptr);
366 static nsresult
Init(nsZipArchive
* zip
, const char* entry
, nsZipHandle
** ret
);
367 static nsresult
Init(const uint8_t* aData
, uint32_t aLen
, nsZipHandle
** aRet
);
369 NS_METHOD_(MozExternalRefCountType
) AddRef(void);
370 NS_METHOD_(MozExternalRefCountType
) Release(void);
372 int64_t SizeOfMapping();
374 nsresult
GetNSPRFileDesc(PRFileDesc
** aNSPRFileDesc
);
377 const uint8_t* mFileData
; /* pointer to zip data */
378 uint32_t mLen
; /* length of zip data */
379 mozilla::FileLocation mFile
; /* source file if any, for logging */
385 nsresult
findDataStart();
387 PRFileMap
* mMap
; /* nspr datastructure for mmap */
388 mozilla::AutoFDClose mNSPRFileDesc
;
389 mozilla::UniquePtr
<nsZipItemPtr
<uint8_t> > mBuf
;
390 mozilla::ThreadSafeAutoRefCnt mRefCnt
; /* ref count */
393 const uint8_t* mFileStart
; /* pointer to mmaped file */
394 uint32_t mTotalLen
; /* total length of the mmaped file */
396 /* Magic number for CRX type expressed in Big Endian since it is a literal */
397 static const uint32_t kCRXMagic
= 0x34327243;
400 nsresult
gZlibInit(z_stream
* zs
);
402 #endif /* nsZipArchive_h_ */