Manual py3k backport: [svn r74158] Issue #6218: Make io.BytesIO and io.StringIO pickl...
[python.git] / Modules / zipimport.c
blobb05de24b97850918151c2cf874a2865bed44b405
1 #include "Python.h"
2 #include "structmember.h"
3 #include "osdefs.h"
4 #include "marshal.h"
5 #include <time.h>
8 #define IS_SOURCE 0x0
9 #define IS_BYTECODE 0x1
10 #define IS_PACKAGE 0x2
12 struct st_zip_searchorder {
13 char suffix[14];
14 int type;
17 /* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
22 static struct st_zip_searchorder zip_searchorder[] = {
23 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
32 /* zipimporter object definition and support */
34 typedef struct _zipimporter ZipImporter;
36 struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
43 static PyObject *ZipImportError;
44 static PyObject *zip_directory_cache = NULL;
46 /* forward decls */
47 static PyObject *read_directory(char *archive);
48 static PyObject *get_data(char *archive, PyObject *toc_entry);
49 static PyObject *get_module_code(ZipImporter *self, char *fullname,
50 int *p_ispackage, char **p_modpath);
53 #define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56 /* zipimporter.__init__
57 Split the "subdirectory" from the Zip archive path, lookup a matching
58 entry in sys.path_importer_cache, fetch the file directory from there
59 if found, or else read it from the archive. */
60 static int
61 zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
63 char *path, *p, *prefix, buf[MAXPATHLEN+2];
64 size_t len;
66 if (!_PyArg_NoKeywords("zipimporter()", kwds))
67 return -1;
69 if (!PyArg_ParseTuple(args, "s:zipimporter",
70 &path))
71 return -1;
73 len = strlen(path);
74 if (len == 0) {
75 PyErr_SetString(ZipImportError, "archive path is empty");
76 return -1;
78 if (len >= MAXPATHLEN) {
79 PyErr_SetString(ZipImportError,
80 "archive path too long");
81 return -1;
83 strcpy(buf, path);
85 #ifdef ALTSEP
86 for (p = buf; *p; p++) {
87 if (*p == ALTSEP)
88 *p = SEP;
90 #endif
92 path = NULL;
93 prefix = NULL;
94 for (;;) {
95 #ifndef RISCOS
96 struct stat statbuf;
97 int rv;
99 rv = stat(buf, &statbuf);
100 if (rv == 0) {
101 /* it exists */
102 if (S_ISREG(statbuf.st_mode))
103 /* it's a file */
104 path = buf;
105 break;
107 #else
108 if (object_exists(buf)) {
109 /* it exists */
110 if (isfile(buf))
111 /* it's a file */
112 path = buf;
113 break;
115 #endif
116 /* back up one path element */
117 p = strrchr(buf, SEP);
118 if (prefix != NULL)
119 *prefix = SEP;
120 if (p == NULL)
121 break;
122 *p = '\0';
123 prefix = p;
125 if (path != NULL) {
126 PyObject *files;
127 files = PyDict_GetItemString(zip_directory_cache, path);
128 if (files == NULL) {
129 files = read_directory(buf);
130 if (files == NULL)
131 return -1;
132 if (PyDict_SetItemString(zip_directory_cache, path,
133 files) != 0)
134 return -1;
136 else
137 Py_INCREF(files);
138 self->files = files;
140 else {
141 PyErr_SetString(ZipImportError, "not a Zip file");
142 return -1;
145 if (prefix == NULL)
146 prefix = "";
147 else {
148 prefix++;
149 len = strlen(prefix);
150 if (prefix[len-1] != SEP) {
151 /* add trailing SEP */
152 prefix[len] = SEP;
153 prefix[len + 1] = '\0';
157 self->archive = PyString_FromString(buf);
158 if (self->archive == NULL)
159 return -1;
161 self->prefix = PyString_FromString(prefix);
162 if (self->prefix == NULL)
163 return -1;
165 return 0;
168 /* GC support. */
169 static int
170 zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
172 ZipImporter *self = (ZipImporter *)obj;
173 Py_VISIT(self->files);
174 return 0;
177 static void
178 zipimporter_dealloc(ZipImporter *self)
180 PyObject_GC_UnTrack(self);
181 Py_XDECREF(self->archive);
182 Py_XDECREF(self->prefix);
183 Py_XDECREF(self->files);
184 Py_TYPE(self)->tp_free((PyObject *)self);
187 static PyObject *
188 zipimporter_repr(ZipImporter *self)
190 char buf[500];
191 char *archive = "???";
192 char *prefix = "";
194 if (self->archive != NULL && PyString_Check(self->archive))
195 archive = PyString_AsString(self->archive);
196 if (self->prefix != NULL && PyString_Check(self->prefix))
197 prefix = PyString_AsString(self->prefix);
198 if (prefix != NULL && *prefix)
199 PyOS_snprintf(buf, sizeof(buf),
200 "<zipimporter object \"%.300s%c%.150s\">",
201 archive, SEP, prefix);
202 else
203 PyOS_snprintf(buf, sizeof(buf),
204 "<zipimporter object \"%.300s\">",
205 archive);
206 return PyString_FromString(buf);
209 /* return fullname.split(".")[-1] */
210 static char *
211 get_subname(char *fullname)
213 char *subname = strrchr(fullname, '.');
214 if (subname == NULL)
215 subname = fullname;
216 else
217 subname++;
218 return subname;
221 /* Given a (sub)modulename, write the potential file path in the
222 archive (without extension) to the path buffer. Return the
223 length of the resulting string. */
224 static int
225 make_filename(char *prefix, char *name, char *path)
227 size_t len;
228 char *p;
230 len = strlen(prefix);
232 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
233 if (len + strlen(name) + 13 >= MAXPATHLEN) {
234 PyErr_SetString(ZipImportError, "path too long");
235 return -1;
238 strcpy(path, prefix);
239 strcpy(path + len, name);
240 for (p = path + len; *p; p++) {
241 if (*p == '.')
242 *p = SEP;
244 len += strlen(name);
245 assert(len < INT_MAX);
246 return (int)len;
249 enum zi_module_info {
250 MI_ERROR,
251 MI_NOT_FOUND,
252 MI_MODULE,
253 MI_PACKAGE
256 /* Return some information about a module. */
257 static enum zi_module_info
258 get_module_info(ZipImporter *self, char *fullname)
260 char *subname, path[MAXPATHLEN + 1];
261 int len;
262 struct st_zip_searchorder *zso;
264 subname = get_subname(fullname);
266 len = make_filename(PyString_AsString(self->prefix), subname, path);
267 if (len < 0)
268 return MI_ERROR;
270 for (zso = zip_searchorder; *zso->suffix; zso++) {
271 strcpy(path + len, zso->suffix);
272 if (PyDict_GetItemString(self->files, path) != NULL) {
273 if (zso->type & IS_PACKAGE)
274 return MI_PACKAGE;
275 else
276 return MI_MODULE;
279 return MI_NOT_FOUND;
282 /* Check whether we can satisfy the import of the module named by
283 'fullname'. Return self if we can, None if we can't. */
284 static PyObject *
285 zipimporter_find_module(PyObject *obj, PyObject *args)
287 ZipImporter *self = (ZipImporter *)obj;
288 PyObject *path = NULL;
289 char *fullname;
290 enum zi_module_info mi;
292 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
293 &fullname, &path))
294 return NULL;
296 mi = get_module_info(self, fullname);
297 if (mi == MI_ERROR)
298 return NULL;
299 if (mi == MI_NOT_FOUND) {
300 Py_INCREF(Py_None);
301 return Py_None;
303 Py_INCREF(self);
304 return (PyObject *)self;
307 /* Load and return the module named by 'fullname'. */
308 static PyObject *
309 zipimporter_load_module(PyObject *obj, PyObject *args)
311 ZipImporter *self = (ZipImporter *)obj;
312 PyObject *code, *mod, *dict;
313 char *fullname, *modpath;
314 int ispackage;
316 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
317 &fullname))
318 return NULL;
320 code = get_module_code(self, fullname, &ispackage, &modpath);
321 if (code == NULL)
322 return NULL;
324 mod = PyImport_AddModule(fullname);
325 if (mod == NULL) {
326 Py_DECREF(code);
327 return NULL;
329 dict = PyModule_GetDict(mod);
331 /* mod.__loader__ = self */
332 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
333 goto error;
335 if (ispackage) {
336 /* add __path__ to the module *before* the code gets
337 executed */
338 PyObject *pkgpath, *fullpath;
339 char *prefix = PyString_AsString(self->prefix);
340 char *subname = get_subname(fullname);
341 int err;
343 fullpath = PyString_FromFormat("%s%c%s%s",
344 PyString_AsString(self->archive),
345 SEP,
346 *prefix ? prefix : "",
347 subname);
348 if (fullpath == NULL)
349 goto error;
351 pkgpath = Py_BuildValue("[O]", fullpath);
352 Py_DECREF(fullpath);
353 if (pkgpath == NULL)
354 goto error;
355 err = PyDict_SetItemString(dict, "__path__", pkgpath);
356 Py_DECREF(pkgpath);
357 if (err != 0)
358 goto error;
360 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
361 Py_DECREF(code);
362 if (Py_VerboseFlag)
363 PySys_WriteStderr("import %s # loaded from Zip %s\n",
364 fullname, modpath);
365 return mod;
366 error:
367 Py_DECREF(code);
368 Py_DECREF(mod);
369 return NULL;
372 /* Return a string matching __file__ for the named module */
373 static PyObject *
374 zipimporter_get_filename(PyObject *obj, PyObject *args)
376 ZipImporter *self = (ZipImporter *)obj;
377 PyObject *code;
378 char *fullname, *modpath;
379 int ispackage;
381 if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
382 &fullname))
383 return NULL;
385 /* Deciding the filename requires working out where the code
386 would come from if the module was actually loaded */
387 code = get_module_code(self, fullname, &ispackage, &modpath);
388 if (code == NULL)
389 return NULL;
390 Py_DECREF(code); /* Only need the path info */
392 return PyString_FromString(modpath);
395 /* Return a bool signifying whether the module is a package or not. */
396 static PyObject *
397 zipimporter_is_package(PyObject *obj, PyObject *args)
399 ZipImporter *self = (ZipImporter *)obj;
400 char *fullname;
401 enum zi_module_info mi;
403 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
404 &fullname))
405 return NULL;
407 mi = get_module_info(self, fullname);
408 if (mi == MI_ERROR)
409 return NULL;
410 if (mi == MI_NOT_FOUND) {
411 PyErr_Format(ZipImportError, "can't find module '%.200s'",
412 fullname);
413 return NULL;
415 return PyBool_FromLong(mi == MI_PACKAGE);
418 static PyObject *
419 zipimporter_get_data(PyObject *obj, PyObject *args)
421 ZipImporter *self = (ZipImporter *)obj;
422 char *path;
423 #ifdef ALTSEP
424 char *p, buf[MAXPATHLEN + 1];
425 #endif
426 PyObject *toc_entry;
427 Py_ssize_t len;
429 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
430 return NULL;
432 #ifdef ALTSEP
433 if (strlen(path) >= MAXPATHLEN) {
434 PyErr_SetString(ZipImportError, "path too long");
435 return NULL;
437 strcpy(buf, path);
438 for (p = buf; *p; p++) {
439 if (*p == ALTSEP)
440 *p = SEP;
442 path = buf;
443 #endif
444 len = PyString_Size(self->archive);
445 if ((size_t)len < strlen(path) &&
446 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
447 path[len] == SEP) {
448 path = path + len + 1;
451 toc_entry = PyDict_GetItemString(self->files, path);
452 if (toc_entry == NULL) {
453 PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
454 return NULL;
456 return get_data(PyString_AsString(self->archive), toc_entry);
459 static PyObject *
460 zipimporter_get_code(PyObject *obj, PyObject *args)
462 ZipImporter *self = (ZipImporter *)obj;
463 char *fullname;
465 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
466 return NULL;
468 return get_module_code(self, fullname, NULL, NULL);
471 static PyObject *
472 zipimporter_get_source(PyObject *obj, PyObject *args)
474 ZipImporter *self = (ZipImporter *)obj;
475 PyObject *toc_entry;
476 char *fullname, *subname, path[MAXPATHLEN+1];
477 int len;
478 enum zi_module_info mi;
480 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
481 return NULL;
483 mi = get_module_info(self, fullname);
484 if (mi == MI_ERROR)
485 return NULL;
486 if (mi == MI_NOT_FOUND) {
487 PyErr_Format(ZipImportError, "can't find module '%.200s'",
488 fullname);
489 return NULL;
491 subname = get_subname(fullname);
493 len = make_filename(PyString_AsString(self->prefix), subname, path);
494 if (len < 0)
495 return NULL;
497 if (mi == MI_PACKAGE) {
498 path[len] = SEP;
499 strcpy(path + len + 1, "__init__.py");
501 else
502 strcpy(path + len, ".py");
504 toc_entry = PyDict_GetItemString(self->files, path);
505 if (toc_entry != NULL)
506 return get_data(PyString_AsString(self->archive), toc_entry);
508 /* we have the module, but no source */
509 Py_INCREF(Py_None);
510 return Py_None;
513 PyDoc_STRVAR(doc_find_module,
514 "find_module(fullname, path=None) -> self or None.\n\
516 Search for a module specified by 'fullname'. 'fullname' must be the\n\
517 fully qualified (dotted) module name. It returns the zipimporter\n\
518 instance itself if the module was found, or None if it wasn't.\n\
519 The optional 'path' argument is ignored -- it's there for compatibility\n\
520 with the importer protocol.");
522 PyDoc_STRVAR(doc_load_module,
523 "load_module(fullname) -> module.\n\
525 Load the module specified by 'fullname'. 'fullname' must be the\n\
526 fully qualified (dotted) module name. It returns the imported\n\
527 module, or raises ZipImportError if it wasn't found.");
529 PyDoc_STRVAR(doc_get_data,
530 "get_data(pathname) -> string with file data.\n\
532 Return the data associated with 'pathname'. Raise IOError if\n\
533 the file wasn't found.");
535 PyDoc_STRVAR(doc_is_package,
536 "is_package(fullname) -> bool.\n\
538 Return True if the module specified by fullname is a package.\n\
539 Raise ZipImportError is the module couldn't be found.");
541 PyDoc_STRVAR(doc_get_code,
542 "get_code(fullname) -> code object.\n\
544 Return the code object for the specified module. Raise ZipImportError\n\
545 is the module couldn't be found.");
547 PyDoc_STRVAR(doc_get_source,
548 "get_source(fullname) -> source string.\n\
550 Return the source code for the specified module. Raise ZipImportError\n\
551 is the module couldn't be found, return None if the archive does\n\
552 contain the module, but has no source for it.");
555 PyDoc_STRVAR(doc_get_filename,
556 "get_filename(fullname) -> filename string.\n\
558 Return the filename for the specified module.");
560 static PyMethodDef zipimporter_methods[] = {
561 {"find_module", zipimporter_find_module, METH_VARARGS,
562 doc_find_module},
563 {"load_module", zipimporter_load_module, METH_VARARGS,
564 doc_load_module},
565 {"get_data", zipimporter_get_data, METH_VARARGS,
566 doc_get_data},
567 {"get_code", zipimporter_get_code, METH_VARARGS,
568 doc_get_code},
569 {"get_source", zipimporter_get_source, METH_VARARGS,
570 doc_get_source},
571 {"get_filename", zipimporter_get_filename, METH_VARARGS,
572 doc_get_filename},
573 {"is_package", zipimporter_is_package, METH_VARARGS,
574 doc_is_package},
575 {NULL, NULL} /* sentinel */
578 static PyMemberDef zipimporter_members[] = {
579 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
580 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
581 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
582 {NULL}
585 PyDoc_STRVAR(zipimporter_doc,
586 "zipimporter(archivepath) -> zipimporter object\n\
588 Create a new zipimporter instance. 'archivepath' must be a path to\n\
589 a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
590 '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
591 valid directory inside the archive.\n\
593 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
594 archive.\n\
596 The 'archive' attribute of zipimporter objects contains the name of the\n\
597 zipfile targeted.");
599 #define DEFERRED_ADDRESS(ADDR) 0
601 static PyTypeObject ZipImporter_Type = {
602 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
603 "zipimport.zipimporter",
604 sizeof(ZipImporter),
605 0, /* tp_itemsize */
606 (destructor)zipimporter_dealloc, /* tp_dealloc */
607 0, /* tp_print */
608 0, /* tp_getattr */
609 0, /* tp_setattr */
610 0, /* tp_compare */
611 (reprfunc)zipimporter_repr, /* tp_repr */
612 0, /* tp_as_number */
613 0, /* tp_as_sequence */
614 0, /* tp_as_mapping */
615 0, /* tp_hash */
616 0, /* tp_call */
617 0, /* tp_str */
618 PyObject_GenericGetAttr, /* tp_getattro */
619 0, /* tp_setattro */
620 0, /* tp_as_buffer */
621 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
622 Py_TPFLAGS_HAVE_GC, /* tp_flags */
623 zipimporter_doc, /* tp_doc */
624 zipimporter_traverse, /* tp_traverse */
625 0, /* tp_clear */
626 0, /* tp_richcompare */
627 0, /* tp_weaklistoffset */
628 0, /* tp_iter */
629 0, /* tp_iternext */
630 zipimporter_methods, /* tp_methods */
631 zipimporter_members, /* tp_members */
632 0, /* tp_getset */
633 0, /* tp_base */
634 0, /* tp_dict */
635 0, /* tp_descr_get */
636 0, /* tp_descr_set */
637 0, /* tp_dictoffset */
638 (initproc)zipimporter_init, /* tp_init */
639 PyType_GenericAlloc, /* tp_alloc */
640 PyType_GenericNew, /* tp_new */
641 PyObject_GC_Del, /* tp_free */
645 /* implementation */
647 /* Given a buffer, return the long that is represented by the first
648 4 bytes, encoded as little endian. This partially reimplements
649 marshal.c:r_long() */
650 static long
651 get_long(unsigned char *buf) {
652 long x;
653 x = buf[0];
654 x |= (long)buf[1] << 8;
655 x |= (long)buf[2] << 16;
656 x |= (long)buf[3] << 24;
657 #if SIZEOF_LONG > 4
658 /* Sign extension for 64-bit machines */
659 x |= -(x & 0x80000000L);
660 #endif
661 return x;
665 read_directory(archive) -> files dict (new reference)
667 Given a path to a Zip archive, build a dict, mapping file names
668 (local to the archive, using SEP as a separator) to toc entries.
670 A toc_entry is a tuple:
672 (__file__, # value to use for __file__, available for all files
673 compress, # compression kind; 0 for uncompressed
674 data_size, # size of compressed data on disk
675 file_size, # size of decompressed data
676 file_offset, # offset of file header from start of archive
677 time, # mod time of file (in dos format)
678 date, # mod data of file (in dos format)
679 crc, # crc checksum of the data
682 Directories can be recognized by the trailing SEP in the name,
683 data_size and file_offset are 0.
685 static PyObject *
686 read_directory(char *archive)
688 PyObject *files = NULL;
689 FILE *fp;
690 long compress, crc, data_size, file_size, file_offset, date, time;
691 long header_offset, name_size, header_size, header_position;
692 long i, l, count;
693 size_t length;
694 char path[MAXPATHLEN + 5];
695 char name[MAXPATHLEN + 5];
696 char *p, endof_central_dir[22];
697 long arc_offset; /* offset from beginning of file to start of zip-archive */
699 if (strlen(archive) > MAXPATHLEN) {
700 PyErr_SetString(PyExc_OverflowError,
701 "Zip path name is too long");
702 return NULL;
704 strcpy(path, archive);
706 fp = fopen(archive, "rb");
707 if (fp == NULL) {
708 PyErr_Format(ZipImportError, "can't open Zip file: "
709 "'%.200s'", archive);
710 return NULL;
712 fseek(fp, -22, SEEK_END);
713 header_position = ftell(fp);
714 if (fread(endof_central_dir, 1, 22, fp) != 22) {
715 fclose(fp);
716 PyErr_Format(ZipImportError, "can't read Zip file: "
717 "'%.200s'", archive);
718 return NULL;
720 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
721 /* Bad: End of Central Dir signature */
722 fclose(fp);
723 PyErr_Format(ZipImportError, "not a Zip file: "
724 "'%.200s'", archive);
725 return NULL;
728 header_size = get_long((unsigned char *)endof_central_dir + 12);
729 header_offset = get_long((unsigned char *)endof_central_dir + 16);
730 arc_offset = header_position - header_offset - header_size;
731 header_offset += arc_offset;
733 files = PyDict_New();
734 if (files == NULL)
735 goto error;
737 length = (long)strlen(path);
738 path[length] = SEP;
740 /* Start of Central Directory */
741 count = 0;
742 for (;;) {
743 PyObject *t;
744 int err;
746 fseek(fp, header_offset, 0); /* Start of file header */
747 l = PyMarshal_ReadLongFromFile(fp);
748 if (l != 0x02014B50)
749 break; /* Bad: Central Dir File Header */
750 fseek(fp, header_offset + 10, 0);
751 compress = PyMarshal_ReadShortFromFile(fp);
752 time = PyMarshal_ReadShortFromFile(fp);
753 date = PyMarshal_ReadShortFromFile(fp);
754 crc = PyMarshal_ReadLongFromFile(fp);
755 data_size = PyMarshal_ReadLongFromFile(fp);
756 file_size = PyMarshal_ReadLongFromFile(fp);
757 name_size = PyMarshal_ReadShortFromFile(fp);
758 header_size = 46 + name_size +
759 PyMarshal_ReadShortFromFile(fp) +
760 PyMarshal_ReadShortFromFile(fp);
761 fseek(fp, header_offset + 42, 0);
762 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
763 if (name_size > MAXPATHLEN)
764 name_size = MAXPATHLEN;
766 p = name;
767 for (i = 0; i < name_size; i++) {
768 *p = (char)getc(fp);
769 if (*p == '/')
770 *p = SEP;
771 p++;
773 *p = 0; /* Add terminating null byte */
774 header_offset += header_size;
776 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
778 t = Py_BuildValue("siiiiiii", path, compress, data_size,
779 file_size, file_offset, time, date, crc);
780 if (t == NULL)
781 goto error;
782 err = PyDict_SetItemString(files, name, t);
783 Py_DECREF(t);
784 if (err != 0)
785 goto error;
786 count++;
788 fclose(fp);
789 if (Py_VerboseFlag)
790 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
791 count, archive);
792 return files;
793 error:
794 fclose(fp);
795 Py_XDECREF(files);
796 return NULL;
799 /* Return the zlib.decompress function object, or NULL if zlib couldn't
800 be imported. The function is cached when found, so subsequent calls
801 don't import zlib again. Returns a *borrowed* reference.
802 XXX This makes zlib.decompress immortal. */
803 static PyObject *
804 get_decompress_func(void)
806 static PyObject *decompress = NULL;
808 if (decompress == NULL) {
809 PyObject *zlib;
810 static int importing_zlib = 0;
812 if (importing_zlib != 0)
813 /* Someone has a zlib.py[co] in their Zip file;
814 let's avoid a stack overflow. */
815 return NULL;
816 importing_zlib = 1;
817 zlib = PyImport_ImportModuleNoBlock("zlib");
818 importing_zlib = 0;
819 if (zlib != NULL) {
820 decompress = PyObject_GetAttrString(zlib,
821 "decompress");
822 Py_DECREF(zlib);
824 else
825 PyErr_Clear();
826 if (Py_VerboseFlag)
827 PySys_WriteStderr("# zipimport: zlib %s\n",
828 zlib != NULL ? "available": "UNAVAILABLE");
830 return decompress;
833 /* Given a path to a Zip file and a toc_entry, return the (uncompressed)
834 data as a new reference. */
835 static PyObject *
836 get_data(char *archive, PyObject *toc_entry)
838 PyObject *raw_data, *data = NULL, *decompress;
839 char *buf;
840 FILE *fp;
841 int err;
842 Py_ssize_t bytes_read = 0;
843 long l;
844 char *datapath;
845 long compress, data_size, file_size, file_offset;
846 long time, date, crc;
848 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
849 &data_size, &file_size, &file_offset, &time,
850 &date, &crc)) {
851 return NULL;
854 fp = fopen(archive, "rb");
855 if (!fp) {
856 PyErr_Format(PyExc_IOError,
857 "zipimport: can not open file %s", archive);
858 return NULL;
861 /* Check to make sure the local file header is correct */
862 fseek(fp, file_offset, 0);
863 l = PyMarshal_ReadLongFromFile(fp);
864 if (l != 0x04034B50) {
865 /* Bad: Local File Header */
866 PyErr_Format(ZipImportError,
867 "bad local file header in %s",
868 archive);
869 fclose(fp);
870 return NULL;
872 fseek(fp, file_offset + 26, 0);
873 l = 30 + PyMarshal_ReadShortFromFile(fp) +
874 PyMarshal_ReadShortFromFile(fp); /* local header size */
875 file_offset += l; /* Start of file data */
877 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
878 data_size : data_size + 1);
879 if (raw_data == NULL) {
880 fclose(fp);
881 return NULL;
883 buf = PyString_AsString(raw_data);
885 err = fseek(fp, file_offset, 0);
886 if (err == 0)
887 bytes_read = fread(buf, 1, data_size, fp);
888 fclose(fp);
889 if (err || bytes_read != data_size) {
890 PyErr_SetString(PyExc_IOError,
891 "zipimport: can't read data");
892 Py_DECREF(raw_data);
893 return NULL;
896 if (compress != 0) {
897 buf[data_size] = 'Z'; /* saw this in zipfile.py */
898 data_size++;
900 buf[data_size] = '\0';
902 if (compress == 0) /* data is not compressed */
903 return raw_data;
905 /* Decompress with zlib */
906 decompress = get_decompress_func();
907 if (decompress == NULL) {
908 PyErr_SetString(ZipImportError,
909 "can't decompress data; "
910 "zlib not available");
911 goto error;
913 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
914 error:
915 Py_DECREF(raw_data);
916 return data;
919 /* Lenient date/time comparison function. The precision of the mtime
920 in the archive is lower than the mtime stored in a .pyc: we
921 must allow a difference of at most one second. */
922 static int
923 eq_mtime(time_t t1, time_t t2)
925 time_t d = t1 - t2;
926 if (d < 0)
927 d = -d;
928 /* dostime only stores even seconds, so be lenient */
929 return d <= 1;
932 /* Given the contents of a .py[co] file in a buffer, unmarshal the data
933 and return the code object. Return None if it the magic word doesn't
934 match (we do this instead of raising an exception as we fall back
935 to .py if available and we don't want to mask other errors).
936 Returns a new reference. */
937 static PyObject *
938 unmarshal_code(char *pathname, PyObject *data, time_t mtime)
940 PyObject *code;
941 char *buf = PyString_AsString(data);
942 Py_ssize_t size = PyString_Size(data);
944 if (size <= 9) {
945 PyErr_SetString(ZipImportError,
946 "bad pyc data");
947 return NULL;
950 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
951 if (Py_VerboseFlag)
952 PySys_WriteStderr("# %s has bad magic\n",
953 pathname);
954 Py_INCREF(Py_None);
955 return Py_None; /* signal caller to try alternative */
958 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
959 mtime)) {
960 if (Py_VerboseFlag)
961 PySys_WriteStderr("# %s has bad mtime\n",
962 pathname);
963 Py_INCREF(Py_None);
964 return Py_None; /* signal caller to try alternative */
967 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
968 if (code == NULL)
969 return NULL;
970 if (!PyCode_Check(code)) {
971 Py_DECREF(code);
972 PyErr_Format(PyExc_TypeError,
973 "compiled module %.200s is not a code object",
974 pathname);
975 return NULL;
977 return code;
980 /* Replace any occurances of "\r\n?" in the input string with "\n".
981 This converts DOS and Mac line endings to Unix line endings.
982 Also append a trailing "\n" to be compatible with
983 PyParser_SimpleParseFile(). Returns a new reference. */
984 static PyObject *
985 normalize_line_endings(PyObject *source)
987 char *buf, *q, *p = PyString_AsString(source);
988 PyObject *fixed_source;
990 if (!p)
991 return NULL;
993 /* one char extra for trailing \n and one for terminating \0 */
994 buf = (char *)PyMem_Malloc(PyString_Size(source) + 2);
995 if (buf == NULL) {
996 PyErr_SetString(PyExc_MemoryError,
997 "zipimport: no memory to allocate "
998 "source buffer");
999 return NULL;
1001 /* replace "\r\n?" by "\n" */
1002 for (q = buf; *p != '\0'; p++) {
1003 if (*p == '\r') {
1004 *q++ = '\n';
1005 if (*(p + 1) == '\n')
1006 p++;
1008 else
1009 *q++ = *p;
1011 *q++ = '\n'; /* add trailing \n */
1012 *q = '\0';
1013 fixed_source = PyString_FromString(buf);
1014 PyMem_Free(buf);
1015 return fixed_source;
1018 /* Given a string buffer containing Python source code, compile it
1019 return and return a code object as a new reference. */
1020 static PyObject *
1021 compile_source(char *pathname, PyObject *source)
1023 PyObject *code, *fixed_source;
1025 fixed_source = normalize_line_endings(source);
1026 if (fixed_source == NULL)
1027 return NULL;
1029 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
1030 Py_file_input);
1031 Py_DECREF(fixed_source);
1032 return code;
1035 /* Convert the date/time values found in the Zip archive to a value
1036 that's compatible with the time stamp stored in .pyc files. */
1037 static time_t
1038 parse_dostime(int dostime, int dosdate)
1040 struct tm stm;
1042 memset((void *) &stm, '\0', sizeof(stm));
1044 stm.tm_sec = (dostime & 0x1f) * 2;
1045 stm.tm_min = (dostime >> 5) & 0x3f;
1046 stm.tm_hour = (dostime >> 11) & 0x1f;
1047 stm.tm_mday = dosdate & 0x1f;
1048 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1049 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1050 stm.tm_isdst = -1; /* wday/yday is ignored */
1052 return mktime(&stm);
1055 /* Given a path to a .pyc or .pyo file in the archive, return the
1056 modifictaion time of the matching .py file, or 0 if no source
1057 is available. */
1058 static time_t
1059 get_mtime_of_source(ZipImporter *self, char *path)
1061 PyObject *toc_entry;
1062 time_t mtime = 0;
1063 Py_ssize_t lastchar = strlen(path) - 1;
1064 char savechar = path[lastchar];
1065 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1066 toc_entry = PyDict_GetItemString(self->files, path);
1067 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1068 PyTuple_Size(toc_entry) == 8) {
1069 /* fetch the time stamp of the .py file for comparison
1070 with an embedded pyc time stamp */
1071 int time, date;
1072 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1073 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1074 mtime = parse_dostime(time, date);
1076 path[lastchar] = savechar;
1077 return mtime;
1080 /* Return the code object for the module named by 'fullname' from the
1081 Zip archive as a new reference. */
1082 static PyObject *
1083 get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1084 time_t mtime, PyObject *toc_entry)
1086 PyObject *data, *code;
1087 char *modpath;
1088 char *archive = PyString_AsString(self->archive);
1090 if (archive == NULL)
1091 return NULL;
1093 data = get_data(archive, toc_entry);
1094 if (data == NULL)
1095 return NULL;
1097 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1099 if (isbytecode) {
1100 code = unmarshal_code(modpath, data, mtime);
1102 else {
1103 code = compile_source(modpath, data);
1105 Py_DECREF(data);
1106 return code;
1109 /* Get the code object assoiciated with the module specified by
1110 'fullname'. */
1111 static PyObject *
1112 get_module_code(ZipImporter *self, char *fullname,
1113 int *p_ispackage, char **p_modpath)
1115 PyObject *toc_entry;
1116 char *subname, path[MAXPATHLEN + 1];
1117 int len;
1118 struct st_zip_searchorder *zso;
1120 subname = get_subname(fullname);
1122 len = make_filename(PyString_AsString(self->prefix), subname, path);
1123 if (len < 0)
1124 return NULL;
1126 for (zso = zip_searchorder; *zso->suffix; zso++) {
1127 PyObject *code = NULL;
1129 strcpy(path + len, zso->suffix);
1130 if (Py_VerboseFlag > 1)
1131 PySys_WriteStderr("# trying %s%c%s\n",
1132 PyString_AsString(self->archive),
1133 SEP, path);
1134 toc_entry = PyDict_GetItemString(self->files, path);
1135 if (toc_entry != NULL) {
1136 time_t mtime = 0;
1137 int ispackage = zso->type & IS_PACKAGE;
1138 int isbytecode = zso->type & IS_BYTECODE;
1140 if (isbytecode)
1141 mtime = get_mtime_of_source(self, path);
1142 if (p_ispackage != NULL)
1143 *p_ispackage = ispackage;
1144 code = get_code_from_data(self, ispackage,
1145 isbytecode, mtime,
1146 toc_entry);
1147 if (code == Py_None) {
1148 /* bad magic number or non-matching mtime
1149 in byte code, try next */
1150 Py_DECREF(code);
1151 continue;
1153 if (code != NULL && p_modpath != NULL)
1154 *p_modpath = PyString_AsString(
1155 PyTuple_GetItem(toc_entry, 0));
1156 return code;
1159 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1160 return NULL;
1164 /* Module init */
1166 PyDoc_STRVAR(zipimport_doc,
1167 "zipimport provides support for importing Python modules from Zip archives.\n\
1169 This module exports three objects:\n\
1170 - zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1171 - ZipImportError: exception raised by zipimporter objects. It's a\n\
1172 subclass of ImportError, so it can be caught as ImportError, too.\n\
1173 - _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1174 info dicts, as used in zipimporter._files.\n\
1176 It is usually not needed to use the zipimport module explicitly; it is\n\
1177 used by the builtin import mechanism for sys.path items that are paths\n\
1178 to Zip archives.");
1180 PyMODINIT_FUNC
1181 initzipimport(void)
1183 PyObject *mod;
1185 if (PyType_Ready(&ZipImporter_Type) < 0)
1186 return;
1188 /* Correct directory separator */
1189 zip_searchorder[0].suffix[0] = SEP;
1190 zip_searchorder[1].suffix[0] = SEP;
1191 zip_searchorder[2].suffix[0] = SEP;
1192 if (Py_OptimizeFlag) {
1193 /* Reverse *.pyc and *.pyo */
1194 struct st_zip_searchorder tmp;
1195 tmp = zip_searchorder[0];
1196 zip_searchorder[0] = zip_searchorder[1];
1197 zip_searchorder[1] = tmp;
1198 tmp = zip_searchorder[3];
1199 zip_searchorder[3] = zip_searchorder[4];
1200 zip_searchorder[4] = tmp;
1203 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1204 NULL, PYTHON_API_VERSION);
1205 if (mod == NULL)
1206 return;
1208 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1209 PyExc_ImportError, NULL);
1210 if (ZipImportError == NULL)
1211 return;
1213 Py_INCREF(ZipImportError);
1214 if (PyModule_AddObject(mod, "ZipImportError",
1215 ZipImportError) < 0)
1216 return;
1218 Py_INCREF(&ZipImporter_Type);
1219 if (PyModule_AddObject(mod, "zipimporter",
1220 (PyObject *)&ZipImporter_Type) < 0)
1221 return;
1223 zip_directory_cache = PyDict_New();
1224 if (zip_directory_cache == NULL)
1225 return;
1226 Py_INCREF(zip_directory_cache);
1227 if (PyModule_AddObject(mod, "_zip_directory_cache",
1228 zip_directory_cache) < 0)
1229 return;