Merged revisions 75928 via svnmerge from
[python/dscho.git] / Modules / zipimport.c
blob770f18f69d9499d9682953bb968d4784e1e0eba7
1 #include "Python.h"
2 #include "structmember.h"
3 #include "osdefs.h"
4 #include "marshal.h"
5 #include <time.h>
8 #define IS_SOURCE 0x0
9 #define IS_BYTECODE 0x1
10 #define IS_PACKAGE 0x2
12 struct st_zip_searchorder {
13 char suffix[14];
14 int type;
17 /* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
22 static struct st_zip_searchorder zip_searchorder[] = {
23 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
32 /* zipimporter object definition and support */
34 typedef struct _zipimporter ZipImporter;
36 struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
43 static PyObject *ZipImportError;
44 static PyObject *zip_directory_cache = NULL;
46 /* forward decls */
47 static PyObject *read_directory(char *archive);
48 static PyObject *get_data(char *archive, PyObject *toc_entry);
49 static PyObject *get_module_code(ZipImporter *self, char *fullname,
50 int *p_ispackage, char **p_modpath);
53 #define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56 /* zipimporter.__init__
57 Split the "subdirectory" from the Zip archive path, lookup a matching
58 entry in sys.path_importer_cache, fetch the file directory from there
59 if found, or else read it from the archive. */
60 static int
61 zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
63 char *path, *p, *prefix, buf[MAXPATHLEN+2];
64 size_t len;
66 if (!_PyArg_NoKeywords("zipimporter()", kwds))
67 return -1;
69 if (!PyArg_ParseTuple(args, "s:zipimporter", &path))
70 return -1;
72 len = strlen(path);
73 if (len == 0) {
74 PyErr_SetString(ZipImportError, "archive path is empty");
75 return -1;
77 if (len >= MAXPATHLEN) {
78 PyErr_SetString(ZipImportError,
79 "archive path too long");
80 return -1;
82 strcpy(buf, path);
84 #ifdef ALTSEP
85 for (p = buf; *p; p++) {
86 if (*p == ALTSEP)
87 *p = SEP;
89 #endif
91 path = NULL;
92 prefix = NULL;
93 for (;;) {
94 struct stat statbuf;
95 int rv;
97 rv = stat(buf, &statbuf);
98 if (rv == 0) {
99 /* it exists */
100 if (S_ISREG(statbuf.st_mode))
101 /* it's a file */
102 path = buf;
103 break;
105 /* back up one path element */
106 p = strrchr(buf, SEP);
107 if (prefix != NULL)
108 *prefix = SEP;
109 if (p == NULL)
110 break;
111 *p = '\0';
112 prefix = p;
114 if (path != NULL) {
115 PyObject *files;
116 files = PyDict_GetItemString(zip_directory_cache, path);
117 if (files == NULL) {
118 files = read_directory(buf);
119 if (files == NULL)
120 return -1;
121 if (PyDict_SetItemString(zip_directory_cache, path,
122 files) != 0)
123 return -1;
125 else
126 Py_INCREF(files);
127 self->files = files;
129 else {
130 PyErr_SetString(ZipImportError, "not a Zip file");
131 return -1;
134 if (prefix == NULL)
135 prefix = "";
136 else {
137 prefix++;
138 len = strlen(prefix);
139 if (prefix[len-1] != SEP) {
140 /* add trailing SEP */
141 prefix[len] = SEP;
142 prefix[len + 1] = '\0';
146 self->archive = PyUnicode_FromString(buf);
147 if (self->archive == NULL)
148 return -1;
150 self->prefix = PyUnicode_FromString(prefix);
151 if (self->prefix == NULL)
152 return -1;
154 return 0;
157 /* GC support. */
158 static int
159 zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
161 ZipImporter *self = (ZipImporter *)obj;
162 Py_VISIT(self->files);
163 return 0;
166 static void
167 zipimporter_dealloc(ZipImporter *self)
169 PyObject_GC_UnTrack(self);
170 Py_XDECREF(self->archive);
171 Py_XDECREF(self->prefix);
172 Py_XDECREF(self->files);
173 Py_TYPE(self)->tp_free((PyObject *)self);
176 static PyObject *
177 zipimporter_repr(ZipImporter *self)
179 char *archive = "???";
180 char *prefix = "";
182 if (self->archive != NULL && PyUnicode_Check(self->archive))
183 archive = _PyUnicode_AsString(self->archive);
184 if (self->prefix != NULL && PyUnicode_Check(self->prefix))
185 prefix = _PyUnicode_AsString(self->prefix);
186 if (prefix != NULL && *prefix)
187 return PyUnicode_FromFormat("<zipimporter object \"%.300s%c%.150s\">",
188 archive, SEP, prefix);
189 else
190 return PyUnicode_FromFormat("<zipimporter object \"%.300s\">",
191 archive);
194 /* return fullname.split(".")[-1] */
195 static char *
196 get_subname(char *fullname)
198 char *subname = strrchr(fullname, '.');
199 if (subname == NULL)
200 subname = fullname;
201 else
202 subname++;
203 return subname;
206 /* Given a (sub)modulename, write the potential file path in the
207 archive (without extension) to the path buffer. Return the
208 length of the resulting string. */
209 static int
210 make_filename(char *prefix, char *name, char *path)
212 size_t len;
213 char *p;
215 len = strlen(prefix);
217 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
218 if (len + strlen(name) + 13 >= MAXPATHLEN) {
219 PyErr_SetString(ZipImportError, "path too long");
220 return -1;
223 strcpy(path, prefix);
224 strcpy(path + len, name);
225 for (p = path + len; *p; p++) {
226 if (*p == '.')
227 *p = SEP;
229 len += strlen(name);
230 assert(len < INT_MAX);
231 return (int)len;
234 enum zi_module_info {
235 MI_ERROR,
236 MI_NOT_FOUND,
237 MI_MODULE,
238 MI_PACKAGE
241 /* Return some information about a module. */
242 static enum zi_module_info
243 get_module_info(ZipImporter *self, char *fullname)
245 char *subname, path[MAXPATHLEN + 1];
246 int len;
247 struct st_zip_searchorder *zso;
249 subname = get_subname(fullname);
251 len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
252 if (len < 0)
253 return MI_ERROR;
255 for (zso = zip_searchorder; *zso->suffix; zso++) {
256 strcpy(path + len, zso->suffix);
257 if (PyDict_GetItemString(self->files, path) != NULL) {
258 if (zso->type & IS_PACKAGE)
259 return MI_PACKAGE;
260 else
261 return MI_MODULE;
264 return MI_NOT_FOUND;
267 /* Check whether we can satisfy the import of the module named by
268 'fullname'. Return self if we can, None if we can't. */
269 static PyObject *
270 zipimporter_find_module(PyObject *obj, PyObject *args)
272 ZipImporter *self = (ZipImporter *)obj;
273 PyObject *path = NULL;
274 char *fullname;
275 enum zi_module_info mi;
277 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
278 &fullname, &path))
279 return NULL;
281 mi = get_module_info(self, fullname);
282 if (mi == MI_ERROR)
283 return NULL;
284 if (mi == MI_NOT_FOUND) {
285 Py_INCREF(Py_None);
286 return Py_None;
288 Py_INCREF(self);
289 return (PyObject *)self;
292 /* Load and return the module named by 'fullname'. */
293 static PyObject *
294 zipimporter_load_module(PyObject *obj, PyObject *args)
296 ZipImporter *self = (ZipImporter *)obj;
297 PyObject *code, *mod, *dict;
298 char *fullname, *modpath;
299 int ispackage;
301 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
302 &fullname))
303 return NULL;
305 code = get_module_code(self, fullname, &ispackage, &modpath);
306 if (code == NULL)
307 return NULL;
309 mod = PyImport_AddModule(fullname);
310 if (mod == NULL) {
311 Py_DECREF(code);
312 return NULL;
314 dict = PyModule_GetDict(mod);
316 /* mod.__loader__ = self */
317 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
318 goto error;
320 if (ispackage) {
321 /* add __path__ to the module *before* the code gets
322 executed */
323 PyObject *pkgpath, *fullpath;
324 char *prefix = _PyUnicode_AsString(self->prefix);
325 char *subname = get_subname(fullname);
326 int err;
328 fullpath = PyUnicode_FromFormat("%s%c%s%s",
329 _PyUnicode_AsString(self->archive),
330 SEP,
331 prefix ? prefix : "",
332 subname);
333 if (fullpath == NULL)
334 goto error;
336 pkgpath = Py_BuildValue("[O]", fullpath);
337 Py_DECREF(fullpath);
338 if (pkgpath == NULL)
339 goto error;
340 err = PyDict_SetItemString(dict, "__path__", pkgpath);
341 Py_DECREF(pkgpath);
342 if (err != 0)
343 goto error;
345 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
346 Py_DECREF(code);
347 if (Py_VerboseFlag)
348 PySys_WriteStderr("import %s # loaded from Zip %s\n",
349 fullname, modpath);
350 return mod;
351 error:
352 Py_DECREF(code);
353 Py_DECREF(mod);
354 return NULL;
357 /* Return a string matching __file__ for the named module */
358 static PyObject *
359 zipimporter_get_filename(PyObject *obj, PyObject *args)
361 ZipImporter *self = (ZipImporter *)obj;
362 PyObject *code;
363 char *fullname, *modpath;
364 int ispackage;
366 if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
367 &fullname))
368 return NULL;
370 /* Deciding the filename requires working out where the code
371 would come from if the module was actually loaded */
372 code = get_module_code(self, fullname, &ispackage, &modpath);
373 if (code == NULL)
374 return NULL;
375 Py_DECREF(code); /* Only need the path info */
377 return PyUnicode_FromString(modpath);
380 /* Return a bool signifying whether the module is a package or not. */
381 static PyObject *
382 zipimporter_is_package(PyObject *obj, PyObject *args)
384 ZipImporter *self = (ZipImporter *)obj;
385 char *fullname;
386 enum zi_module_info mi;
388 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
389 &fullname))
390 return NULL;
392 mi = get_module_info(self, fullname);
393 if (mi == MI_ERROR)
394 return NULL;
395 if (mi == MI_NOT_FOUND) {
396 PyErr_Format(ZipImportError, "can't find module '%.200s'",
397 fullname);
398 return NULL;
400 return PyBool_FromLong(mi == MI_PACKAGE);
403 static PyObject *
404 zipimporter_get_data(PyObject *obj, PyObject *args)
406 ZipImporter *self = (ZipImporter *)obj;
407 char *path;
408 #ifdef ALTSEP
409 char *p, buf[MAXPATHLEN + 1];
410 #endif
411 PyObject *toc_entry;
412 Py_ssize_t len;
413 char *archive_str;
415 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
416 return NULL;
418 #ifdef ALTSEP
419 if (strlen(path) >= MAXPATHLEN) {
420 PyErr_SetString(ZipImportError, "path too long");
421 return NULL;
423 strcpy(buf, path);
424 for (p = buf; *p; p++) {
425 if (*p == ALTSEP)
426 *p = SEP;
428 path = buf;
429 #endif
430 archive_str = _PyUnicode_AsStringAndSize(self->archive, &len);
431 if ((size_t)len < strlen(path) &&
432 strncmp(path, archive_str, len) == 0 &&
433 path[len] == SEP) {
434 path = path + len + 1;
437 toc_entry = PyDict_GetItemString(self->files, path);
438 if (toc_entry == NULL) {
439 PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
440 return NULL;
442 return get_data(archive_str, toc_entry);
445 static PyObject *
446 zipimporter_get_code(PyObject *obj, PyObject *args)
448 ZipImporter *self = (ZipImporter *)obj;
449 char *fullname;
451 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
452 return NULL;
454 return get_module_code(self, fullname, NULL, NULL);
457 static PyObject *
458 zipimporter_get_source(PyObject *obj, PyObject *args)
460 ZipImporter *self = (ZipImporter *)obj;
461 PyObject *toc_entry;
462 char *fullname, *subname, path[MAXPATHLEN+1];
463 int len;
464 enum zi_module_info mi;
466 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
467 return NULL;
469 mi = get_module_info(self, fullname);
470 if (mi == MI_ERROR)
471 return NULL;
472 if (mi == MI_NOT_FOUND) {
473 PyErr_Format(ZipImportError, "can't find module '%.200s'",
474 fullname);
475 return NULL;
477 subname = get_subname(fullname);
479 len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
480 if (len < 0)
481 return NULL;
483 if (mi == MI_PACKAGE) {
484 path[len] = SEP;
485 strcpy(path + len + 1, "__init__.py");
487 else
488 strcpy(path + len, ".py");
490 toc_entry = PyDict_GetItemString(self->files, path);
491 if (toc_entry != NULL) {
492 PyObject *bytes = get_data(_PyUnicode_AsString(self->archive), toc_entry);
493 PyObject *res = PyUnicode_FromString(PyBytes_AsString(bytes));
494 Py_XDECREF(bytes);
495 return res;
498 /* we have the module, but no source */
499 Py_INCREF(Py_None);
500 return Py_None;
503 PyDoc_STRVAR(doc_find_module,
504 "find_module(fullname, path=None) -> self or None.\n\
506 Search for a module specified by 'fullname'. 'fullname' must be the\n\
507 fully qualified (dotted) module name. It returns the zipimporter\n\
508 instance itself if the module was found, or None if it wasn't.\n\
509 The optional 'path' argument is ignored -- it's there for compatibility\n\
510 with the importer protocol.");
512 PyDoc_STRVAR(doc_load_module,
513 "load_module(fullname) -> module.\n\
515 Load the module specified by 'fullname'. 'fullname' must be the\n\
516 fully qualified (dotted) module name. It returns the imported\n\
517 module, or raises ZipImportError if it wasn't found.");
519 PyDoc_STRVAR(doc_get_data,
520 "get_data(pathname) -> string with file data.\n\
522 Return the data associated with 'pathname'. Raise IOError if\n\
523 the file wasn't found.");
525 PyDoc_STRVAR(doc_is_package,
526 "is_package(fullname) -> bool.\n\
528 Return True if the module specified by fullname is a package.\n\
529 Raise ZipImportError is the module couldn't be found.");
531 PyDoc_STRVAR(doc_get_code,
532 "get_code(fullname) -> code object.\n\
534 Return the code object for the specified module. Raise ZipImportError\n\
535 is the module couldn't be found.");
537 PyDoc_STRVAR(doc_get_source,
538 "get_source(fullname) -> source string.\n\
540 Return the source code for the specified module. Raise ZipImportError\n\
541 is the module couldn't be found, return None if the archive does\n\
542 contain the module, but has no source for it.");
545 PyDoc_STRVAR(doc_get_filename,
546 "get_filename(fullname) -> filename string.\n\
548 Return the filename for the specified module.");
550 static PyMethodDef zipimporter_methods[] = {
551 {"find_module", zipimporter_find_module, METH_VARARGS,
552 doc_find_module},
553 {"load_module", zipimporter_load_module, METH_VARARGS,
554 doc_load_module},
555 {"get_data", zipimporter_get_data, METH_VARARGS,
556 doc_get_data},
557 {"get_code", zipimporter_get_code, METH_VARARGS,
558 doc_get_code},
559 {"get_source", zipimporter_get_source, METH_VARARGS,
560 doc_get_source},
561 {"get_filename", zipimporter_get_filename, METH_VARARGS,
562 doc_get_filename},
563 {"is_package", zipimporter_is_package, METH_VARARGS,
564 doc_is_package},
565 {NULL, NULL} /* sentinel */
568 static PyMemberDef zipimporter_members[] = {
569 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
570 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
571 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
572 {NULL}
575 PyDoc_STRVAR(zipimporter_doc,
576 "zipimporter(archivepath) -> zipimporter object\n\
578 Create a new zipimporter instance. 'archivepath' must be a path to\n\
579 a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
580 '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
581 valid directory inside the archive.\n\
583 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
584 archive.\n\
586 The 'archive' attribute of zipimporter objects contains the name of the\n\
587 zipfile targeted.");
589 #define DEFERRED_ADDRESS(ADDR) 0
591 static PyTypeObject ZipImporter_Type = {
592 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
593 "zipimport.zipimporter",
594 sizeof(ZipImporter),
595 0, /* tp_itemsize */
596 (destructor)zipimporter_dealloc, /* tp_dealloc */
597 0, /* tp_print */
598 0, /* tp_getattr */
599 0, /* tp_setattr */
600 0, /* tp_reserved */
601 (reprfunc)zipimporter_repr, /* tp_repr */
602 0, /* tp_as_number */
603 0, /* tp_as_sequence */
604 0, /* tp_as_mapping */
605 0, /* tp_hash */
606 0, /* tp_call */
607 0, /* tp_str */
608 PyObject_GenericGetAttr, /* tp_getattro */
609 0, /* tp_setattro */
610 0, /* tp_as_buffer */
611 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
612 Py_TPFLAGS_HAVE_GC, /* tp_flags */
613 zipimporter_doc, /* tp_doc */
614 zipimporter_traverse, /* tp_traverse */
615 0, /* tp_clear */
616 0, /* tp_richcompare */
617 0, /* tp_weaklistoffset */
618 0, /* tp_iter */
619 0, /* tp_iternext */
620 zipimporter_methods, /* tp_methods */
621 zipimporter_members, /* tp_members */
622 0, /* tp_getset */
623 0, /* tp_base */
624 0, /* tp_dict */
625 0, /* tp_descr_get */
626 0, /* tp_descr_set */
627 0, /* tp_dictoffset */
628 (initproc)zipimporter_init, /* tp_init */
629 PyType_GenericAlloc, /* tp_alloc */
630 PyType_GenericNew, /* tp_new */
631 PyObject_GC_Del, /* tp_free */
635 /* implementation */
637 /* Given a buffer, return the long that is represented by the first
638 4 bytes, encoded as little endian. This partially reimplements
639 marshal.c:r_long() */
640 static long
641 get_long(unsigned char *buf) {
642 long x;
643 x = buf[0];
644 x |= (long)buf[1] << 8;
645 x |= (long)buf[2] << 16;
646 x |= (long)buf[3] << 24;
647 #if SIZEOF_LONG > 4
648 /* Sign extension for 64-bit machines */
649 x |= -(x & 0x80000000L);
650 #endif
651 return x;
655 read_directory(archive) -> files dict (new reference)
657 Given a path to a Zip archive, build a dict, mapping file names
658 (local to the archive, using SEP as a separator) to toc entries.
660 A toc_entry is a tuple:
662 (__file__, # value to use for __file__, available for all files
663 compress, # compression kind; 0 for uncompressed
664 data_size, # size of compressed data on disk
665 file_size, # size of decompressed data
666 file_offset, # offset of file header from start of archive
667 time, # mod time of file (in dos format)
668 date, # mod data of file (in dos format)
669 crc, # crc checksum of the data
672 Directories can be recognized by the trailing SEP in the name,
673 data_size and file_offset are 0.
675 static PyObject *
676 read_directory(char *archive)
678 PyObject *files = NULL;
679 FILE *fp;
680 long compress, crc, data_size, file_size, file_offset, date, time;
681 long header_offset, name_size, header_size, header_position;
682 long i, l, count;
683 size_t length;
684 char path[MAXPATHLEN + 5];
685 char name[MAXPATHLEN + 5];
686 char *p, endof_central_dir[22];
687 long arc_offset; /* offset from beginning of file to start of zip-archive */
689 if (strlen(archive) > MAXPATHLEN) {
690 PyErr_SetString(PyExc_OverflowError,
691 "Zip path name is too long");
692 return NULL;
694 strcpy(path, archive);
696 fp = fopen(archive, "rb");
697 if (fp == NULL) {
698 PyErr_Format(ZipImportError, "can't open Zip file: "
699 "'%.200s'", archive);
700 return NULL;
702 fseek(fp, -22, SEEK_END);
703 header_position = ftell(fp);
704 if (fread(endof_central_dir, 1, 22, fp) != 22) {
705 fclose(fp);
706 PyErr_Format(ZipImportError, "can't read Zip file: "
707 "'%.200s'", archive);
708 return NULL;
710 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
711 /* Bad: End of Central Dir signature */
712 fclose(fp);
713 PyErr_Format(ZipImportError, "not a Zip file: "
714 "'%.200s'", archive);
715 return NULL;
718 header_size = get_long((unsigned char *)endof_central_dir + 12);
719 header_offset = get_long((unsigned char *)endof_central_dir + 16);
720 arc_offset = header_position - header_offset - header_size;
721 header_offset += arc_offset;
723 files = PyDict_New();
724 if (files == NULL)
725 goto error;
727 length = (long)strlen(path);
728 path[length] = SEP;
730 /* Start of Central Directory */
731 count = 0;
732 for (;;) {
733 PyObject *t;
734 int err;
736 fseek(fp, header_offset, 0); /* Start of file header */
737 l = PyMarshal_ReadLongFromFile(fp);
738 if (l != 0x02014B50)
739 break; /* Bad: Central Dir File Header */
740 fseek(fp, header_offset + 10, 0);
741 compress = PyMarshal_ReadShortFromFile(fp);
742 time = PyMarshal_ReadShortFromFile(fp);
743 date = PyMarshal_ReadShortFromFile(fp);
744 crc = PyMarshal_ReadLongFromFile(fp);
745 data_size = PyMarshal_ReadLongFromFile(fp);
746 file_size = PyMarshal_ReadLongFromFile(fp);
747 name_size = PyMarshal_ReadShortFromFile(fp);
748 header_size = 46 + name_size +
749 PyMarshal_ReadShortFromFile(fp) +
750 PyMarshal_ReadShortFromFile(fp);
751 fseek(fp, header_offset + 42, 0);
752 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
753 if (name_size > MAXPATHLEN)
754 name_size = MAXPATHLEN;
756 p = name;
757 for (i = 0; i < name_size; i++) {
758 *p = (char)getc(fp);
759 if (*p == '/')
760 *p = SEP;
761 p++;
763 *p = 0; /* Add terminating null byte */
764 header_offset += header_size;
766 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
768 t = Py_BuildValue("siiiiiii", path, compress, data_size,
769 file_size, file_offset, time, date, crc);
770 if (t == NULL)
771 goto error;
772 err = PyDict_SetItemString(files, name, t);
773 Py_DECREF(t);
774 if (err != 0)
775 goto error;
776 count++;
778 fclose(fp);
779 if (Py_VerboseFlag)
780 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
781 count, archive);
782 return files;
783 error:
784 fclose(fp);
785 Py_XDECREF(files);
786 return NULL;
789 /* Return the zlib.decompress function object, or NULL if zlib couldn't
790 be imported. The function is cached when found, so subsequent calls
791 don't import zlib again. Returns a *borrowed* reference.
792 XXX This makes zlib.decompress immortal. */
793 static PyObject *
794 get_decompress_func(void)
796 static PyObject *decompress = NULL;
798 if (decompress == NULL) {
799 PyObject *zlib;
800 static int importing_zlib = 0;
802 if (importing_zlib != 0)
803 /* Someone has a zlib.py[co] in their Zip file;
804 let's avoid a stack overflow. */
805 return NULL;
806 importing_zlib = 1;
807 zlib = PyImport_ImportModuleNoBlock("zlib");
808 importing_zlib = 0;
809 if (zlib != NULL) {
810 decompress = PyObject_GetAttrString(zlib,
811 "decompress");
812 Py_DECREF(zlib);
814 else
815 PyErr_Clear();
816 if (Py_VerboseFlag)
817 PySys_WriteStderr("# zipimport: zlib %s\n",
818 zlib != NULL ? "available": "UNAVAILABLE");
820 return decompress;
823 /* Given a path to a Zip file and a toc_entry, return the (uncompressed)
824 data as a new reference. */
825 static PyObject *
826 get_data(char *archive, PyObject *toc_entry)
828 PyObject *raw_data, *data = NULL, *decompress;
829 char *buf;
830 FILE *fp;
831 int err;
832 Py_ssize_t bytes_read = 0;
833 long l;
834 char *datapath;
835 long compress, data_size, file_size, file_offset, bytes_size;
836 long time, date, crc;
838 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
839 &data_size, &file_size, &file_offset, &time,
840 &date, &crc)) {
841 return NULL;
844 fp = fopen(archive, "rb");
845 if (!fp) {
846 PyErr_Format(PyExc_IOError,
847 "zipimport: can not open file %s", archive);
848 return NULL;
851 /* Check to make sure the local file header is correct */
852 fseek(fp, file_offset, 0);
853 l = PyMarshal_ReadLongFromFile(fp);
854 if (l != 0x04034B50) {
855 /* Bad: Local File Header */
856 PyErr_Format(ZipImportError,
857 "bad local file header in %s",
858 archive);
859 fclose(fp);
860 return NULL;
862 fseek(fp, file_offset + 26, 0);
863 l = 30 + PyMarshal_ReadShortFromFile(fp) +
864 PyMarshal_ReadShortFromFile(fp); /* local header size */
865 file_offset += l; /* Start of file data */
867 bytes_size = compress == 0 ? data_size : data_size + 1;
868 if (bytes_size == 0)
869 bytes_size++;
870 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
872 if (raw_data == NULL) {
873 fclose(fp);
874 return NULL;
876 buf = PyBytes_AsString(raw_data);
878 err = fseek(fp, file_offset, 0);
879 if (err == 0)
880 bytes_read = fread(buf, 1, data_size, fp);
881 fclose(fp);
882 if (err || bytes_read != data_size) {
883 PyErr_SetString(PyExc_IOError,
884 "zipimport: can't read data");
885 Py_DECREF(raw_data);
886 return NULL;
889 if (compress != 0) {
890 buf[data_size] = 'Z'; /* saw this in zipfile.py */
891 data_size++;
893 buf[data_size] = '\0';
895 if (compress == 0) { /* data is not compressed */
896 data = PyBytes_FromStringAndSize(buf, data_size);
897 Py_DECREF(raw_data);
898 return data;
901 /* Decompress with zlib */
902 decompress = get_decompress_func();
903 if (decompress == NULL) {
904 PyErr_SetString(ZipImportError,
905 "can't decompress data; "
906 "zlib not available");
907 goto error;
909 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
910 error:
911 Py_DECREF(raw_data);
912 return data;
915 /* Lenient date/time comparison function. The precision of the mtime
916 in the archive is lower than the mtime stored in a .pyc: we
917 must allow a difference of at most one second. */
918 static int
919 eq_mtime(time_t t1, time_t t2)
921 time_t d = t1 - t2;
922 if (d < 0)
923 d = -d;
924 /* dostime only stores even seconds, so be lenient */
925 return d <= 1;
928 /* Given the contents of a .py[co] file in a buffer, unmarshal the data
929 and return the code object. Return None if it the magic word doesn't
930 match (we do this instead of raising an exception as we fall back
931 to .py if available and we don't want to mask other errors).
932 Returns a new reference. */
933 static PyObject *
934 unmarshal_code(char *pathname, PyObject *data, time_t mtime)
936 PyObject *code;
937 char *buf = PyBytes_AsString(data);
938 Py_ssize_t size = PyBytes_Size(data);
940 if (size <= 9) {
941 PyErr_SetString(ZipImportError,
942 "bad pyc data");
943 return NULL;
946 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
947 if (Py_VerboseFlag)
948 PySys_WriteStderr("# %s has bad magic\n",
949 pathname);
950 Py_INCREF(Py_None);
951 return Py_None; /* signal caller to try alternative */
954 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
955 mtime)) {
956 if (Py_VerboseFlag)
957 PySys_WriteStderr("# %s has bad mtime\n",
958 pathname);
959 Py_INCREF(Py_None);
960 return Py_None; /* signal caller to try alternative */
963 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
964 if (code == NULL)
965 return NULL;
966 if (!PyCode_Check(code)) {
967 Py_DECREF(code);
968 PyErr_Format(PyExc_TypeError,
969 "compiled module %.200s is not a code object",
970 pathname);
971 return NULL;
973 return code;
976 /* Replace any occurances of "\r\n?" in the input string with "\n".
977 This converts DOS and Mac line endings to Unix line endings.
978 Also append a trailing "\n" to be compatible with
979 PyParser_SimpleParseFile(). Returns a new reference. */
980 static PyObject *
981 normalize_line_endings(PyObject *source)
983 char *buf, *q, *p = PyBytes_AsString(source);
984 PyObject *fixed_source;
985 int len = 0;
987 if (!p) {
988 return PyBytes_FromStringAndSize("\n\0", 2);
991 /* one char extra for trailing \n and one for terminating \0 */
992 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
993 if (buf == NULL) {
994 PyErr_SetString(PyExc_MemoryError,
995 "zipimport: no memory to allocate "
996 "source buffer");
997 return NULL;
999 /* replace "\r\n?" by "\n" */
1000 for (q = buf; *p != '\0'; p++) {
1001 if (*p == '\r') {
1002 *q++ = '\n';
1003 if (*(p + 1) == '\n')
1004 p++;
1006 else
1007 *q++ = *p;
1008 len++;
1010 *q++ = '\n'; /* add trailing \n */
1011 *q = '\0';
1012 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1013 PyMem_Free(buf);
1014 return fixed_source;
1017 /* Given a string buffer containing Python source code, compile it
1018 return and return a code object as a new reference. */
1019 static PyObject *
1020 compile_source(char *pathname, PyObject *source)
1022 PyObject *code, *fixed_source;
1024 fixed_source = normalize_line_endings(source);
1025 if (fixed_source == NULL)
1026 return NULL;
1028 code = Py_CompileString(PyBytes_AsString(fixed_source), pathname,
1029 Py_file_input);
1030 Py_DECREF(fixed_source);
1031 return code;
1034 /* Convert the date/time values found in the Zip archive to a value
1035 that's compatible with the time stamp stored in .pyc files. */
1036 static time_t
1037 parse_dostime(int dostime, int dosdate)
1039 struct tm stm;
1041 memset((void *) &stm, '\0', sizeof(stm));
1043 stm.tm_sec = (dostime & 0x1f) * 2;
1044 stm.tm_min = (dostime >> 5) & 0x3f;
1045 stm.tm_hour = (dostime >> 11) & 0x1f;
1046 stm.tm_mday = dosdate & 0x1f;
1047 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1048 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1049 stm.tm_isdst = -1; /* wday/yday is ignored */
1051 return mktime(&stm);
1054 /* Given a path to a .pyc or .pyo file in the archive, return the
1055 modifictaion time of the matching .py file, or 0 if no source
1056 is available. */
1057 static time_t
1058 get_mtime_of_source(ZipImporter *self, char *path)
1060 PyObject *toc_entry;
1061 time_t mtime = 0;
1062 Py_ssize_t lastchar = strlen(path) - 1;
1063 char savechar = path[lastchar];
1064 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1065 toc_entry = PyDict_GetItemString(self->files, path);
1066 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1067 PyTuple_Size(toc_entry) == 8) {
1068 /* fetch the time stamp of the .py file for comparison
1069 with an embedded pyc time stamp */
1070 int time, date;
1071 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1072 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1073 mtime = parse_dostime(time, date);
1075 path[lastchar] = savechar;
1076 return mtime;
1079 /* Return the code object for the module named by 'fullname' from the
1080 Zip archive as a new reference. */
1081 static PyObject *
1082 get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1083 time_t mtime, PyObject *toc_entry)
1085 PyObject *data, *code;
1086 char *modpath;
1087 char *archive = _PyUnicode_AsString(self->archive);
1089 if (archive == NULL)
1090 return NULL;
1092 data = get_data(archive, toc_entry);
1093 if (data == NULL)
1094 return NULL;
1096 modpath = _PyUnicode_AsString(PyTuple_GetItem(toc_entry, 0));
1098 if (isbytecode) {
1099 code = unmarshal_code(modpath, data, mtime);
1101 else {
1102 code = compile_source(modpath, data);
1104 Py_DECREF(data);
1105 return code;
1108 /* Get the code object assoiciated with the module specified by
1109 'fullname'. */
1110 static PyObject *
1111 get_module_code(ZipImporter *self, char *fullname,
1112 int *p_ispackage, char **p_modpath)
1114 PyObject *toc_entry;
1115 char *subname, path[MAXPATHLEN + 1];
1116 int len;
1117 struct st_zip_searchorder *zso;
1119 subname = get_subname(fullname);
1121 len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
1122 if (len < 0)
1123 return NULL;
1125 for (zso = zip_searchorder; *zso->suffix; zso++) {
1126 PyObject *code = NULL;
1128 strcpy(path + len, zso->suffix);
1129 if (Py_VerboseFlag > 1)
1130 PySys_WriteStderr("# trying %s%c%s\n",
1131 _PyUnicode_AsString(self->archive),
1132 (int)SEP, path);
1133 toc_entry = PyDict_GetItemString(self->files, path);
1134 if (toc_entry != NULL) {
1135 time_t mtime = 0;
1136 int ispackage = zso->type & IS_PACKAGE;
1137 int isbytecode = zso->type & IS_BYTECODE;
1139 if (isbytecode)
1140 mtime = get_mtime_of_source(self, path);
1141 if (p_ispackage != NULL)
1142 *p_ispackage = ispackage;
1143 code = get_code_from_data(self, ispackage,
1144 isbytecode, mtime,
1145 toc_entry);
1146 if (code == Py_None) {
1147 /* bad magic number or non-matching mtime
1148 in byte code, try next */
1149 Py_DECREF(code);
1150 continue;
1152 if (code != NULL && p_modpath != NULL)
1153 *p_modpath = _PyUnicode_AsString(
1154 PyTuple_GetItem(toc_entry, 0));
1155 return code;
1158 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1159 return NULL;
1163 /* Module init */
1165 PyDoc_STRVAR(zipimport_doc,
1166 "zipimport provides support for importing Python modules from Zip archives.\n\
1168 This module exports three objects:\n\
1169 - zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1170 - ZipImportError: exception raised by zipimporter objects. It's a\n\
1171 subclass of ImportError, so it can be caught as ImportError, too.\n\
1172 - _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1173 info dicts, as used in zipimporter._files.\n\
1175 It is usually not needed to use the zipimport module explicitly; it is\n\
1176 used by the builtin import mechanism for sys.path items that are paths\n\
1177 to Zip archives.");
1179 static struct PyModuleDef zipimportmodule = {
1180 PyModuleDef_HEAD_INIT,
1181 "zipimport",
1182 zipimport_doc,
1184 NULL,
1185 NULL,
1186 NULL,
1187 NULL,
1188 NULL
1191 PyMODINIT_FUNC
1192 PyInit_zipimport(void)
1194 PyObject *mod;
1196 if (PyType_Ready(&ZipImporter_Type) < 0)
1197 return NULL;
1199 /* Correct directory separator */
1200 zip_searchorder[0].suffix[0] = SEP;
1201 zip_searchorder[1].suffix[0] = SEP;
1202 zip_searchorder[2].suffix[0] = SEP;
1203 if (Py_OptimizeFlag) {
1204 /* Reverse *.pyc and *.pyo */
1205 struct st_zip_searchorder tmp;
1206 tmp = zip_searchorder[0];
1207 zip_searchorder[0] = zip_searchorder[1];
1208 zip_searchorder[1] = tmp;
1209 tmp = zip_searchorder[3];
1210 zip_searchorder[3] = zip_searchorder[4];
1211 zip_searchorder[4] = tmp;
1214 mod = PyModule_Create(&zipimportmodule);
1215 if (mod == NULL)
1216 return NULL;
1218 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1219 PyExc_ImportError, NULL);
1220 if (ZipImportError == NULL)
1221 return NULL;
1223 Py_INCREF(ZipImportError);
1224 if (PyModule_AddObject(mod, "ZipImportError",
1225 ZipImportError) < 0)
1226 return NULL;
1228 Py_INCREF(&ZipImporter_Type);
1229 if (PyModule_AddObject(mod, "zipimporter",
1230 (PyObject *)&ZipImporter_Type) < 0)
1231 return NULL;
1233 zip_directory_cache = PyDict_New();
1234 if (zip_directory_cache == NULL)
1235 return NULL;
1236 Py_INCREF(zip_directory_cache);
1237 if (PyModule_AddObject(mod, "_zip_directory_cache",
1238 zip_directory_cache) < 0)
1239 return NULL;
1240 return mod;