Issue #5262: Improved fix.
[python.git] / PC / getpathp.c
blob78f446599a26a9ff5df95b9ba73ff51ff197e622
2 /* Return the initial module search path. */
3 /* Used by DOS, OS/2, Windows 3.1, Windows 95/98, Windows NT. */
5 /* ----------------------------------------------------------------
6 PATH RULES FOR WINDOWS:
7 This describes how sys.path is formed on Windows. It describes the
8 functionality, not the implementation (ie, the order in which these
9 are actually fetched is different)
11 * Python always adds an empty entry at the start, which corresponds
12 to the current directory.
14 * If the PYTHONPATH env. var. exists, its entries are added next.
16 * We look in the registry for "application paths" - that is, sub-keys
17 under the main PythonPath registry key. These are added next (the
18 order of sub-key processing is undefined).
19 HKEY_CURRENT_USER is searched and added first.
20 HKEY_LOCAL_MACHINE is searched and added next.
21 (Note that all known installers only use HKLM, so HKCU is typically
22 empty)
24 * We attempt to locate the "Python Home" - if the PYTHONHOME env var
25 is set, we believe it. Otherwise, we use the path of our host .EXE's
26 to try and locate our "landmark" (lib\\os.py) and deduce our home.
27 - If we DO have a Python Home: The relevant sub-directories (Lib,
28 plat-win, lib-tk, etc) are based on the Python Home
29 - If we DO NOT have a Python Home, the core Python Path is
30 loaded from the registry. This is the main PythonPath key,
31 and both HKLM and HKCU are combined to form the path)
33 * Iff - we can not locate the Python Home, have not had a PYTHONPATH
34 specified, and can't locate any Registry entries (ie, we have _nothing_
35 we can assume is a good path), a default path with relative entries is
36 used (eg. .\Lib;.\plat-win, etc)
39 The end result of all this is:
40 * When running python.exe, or any other .exe in the main Python directory
41 (either an installed version, or directly from the PCbuild directory),
42 the core path is deduced, and the core paths in the registry are
43 ignored. Other "application paths" in the registry are always read.
45 * When Python is hosted in another exe (different directory, embedded via
46 COM, etc), the Python Home will not be deduced, so the core path from
47 the registry is used. Other "application paths" in the registry are
48 always read.
50 * If Python can't find its home and there is no registry (eg, frozen
51 exe, some very strange installation setup) you get a path with
52 some default, but relative, paths.
54 ---------------------------------------------------------------- */
57 #include "Python.h"
58 #include "osdefs.h"
60 #ifdef MS_WINDOWS
61 #include <windows.h>
62 #include <tchar.h>
63 #endif
65 #ifdef HAVE_SYS_TYPES_H
66 #include <sys/types.h>
67 #endif /* HAVE_SYS_TYPES_H */
69 #ifdef HAVE_SYS_STAT_H
70 #include <sys/stat.h>
71 #endif /* HAVE_SYS_STAT_H */
73 #include <string.h>
75 /* Search in some common locations for the associated Python libraries.
77 * Py_GetPath() tries to return a sensible Python module search path.
79 * The approach is an adaptation for Windows of the strategy used in
80 * ../Modules/getpath.c; it uses the Windows Registry as one of its
81 * information sources.
84 #ifndef LANDMARK
85 #define LANDMARK "lib\\os.py"
86 #endif
88 static char prefix[MAXPATHLEN+1];
89 static char progpath[MAXPATHLEN+1];
90 static char dllpath[MAXPATHLEN+1];
91 static char *module_search_path = NULL;
94 static int
95 is_sep(char ch) /* determine if "ch" is a separator character */
97 #ifdef ALTSEP
98 return ch == SEP || ch == ALTSEP;
99 #else
100 return ch == SEP;
101 #endif
104 /* assumes 'dir' null terminated in bounds. Never writes
105 beyond existing terminator.
107 static void
108 reduce(char *dir)
110 size_t i = strlen(dir);
111 while (i > 0 && !is_sep(dir[i]))
112 --i;
113 dir[i] = '\0';
117 static int
118 exists(char *filename)
120 struct stat buf;
121 return stat(filename, &buf) == 0;
124 /* Assumes 'filename' MAXPATHLEN+1 bytes long -
125 may extend 'filename' by one character.
127 static int
128 ismodule(char *filename) /* Is module -- check for .pyc/.pyo too */
130 if (exists(filename))
131 return 1;
133 /* Check for the compiled version of prefix. */
134 if (strlen(filename) < MAXPATHLEN) {
135 strcat(filename, Py_OptimizeFlag ? "o" : "c");
136 if (exists(filename))
137 return 1;
139 return 0;
142 /* Add a path component, by appending stuff to buffer.
143 buffer must have at least MAXPATHLEN + 1 bytes allocated, and contain a
144 NUL-terminated string with no more than MAXPATHLEN characters (not counting
145 the trailing NUL). It's a fatal error if it contains a string longer than
146 that (callers must be careful!). If these requirements are met, it's
147 guaranteed that buffer will still be a NUL-terminated string with no more
148 than MAXPATHLEN characters at exit. If stuff is too long, only as much of
149 stuff as fits will be appended.
151 static void
152 join(char *buffer, char *stuff)
154 size_t n, k;
155 if (is_sep(stuff[0]))
156 n = 0;
157 else {
158 n = strlen(buffer);
159 if (n > 0 && !is_sep(buffer[n-1]) && n < MAXPATHLEN)
160 buffer[n++] = SEP;
162 if (n > MAXPATHLEN)
163 Py_FatalError("buffer overflow in getpathp.c's joinpath()");
164 k = strlen(stuff);
165 if (n + k > MAXPATHLEN)
166 k = MAXPATHLEN - n;
167 strncpy(buffer+n, stuff, k);
168 buffer[n+k] = '\0';
171 /* gotlandmark only called by search_for_prefix, which ensures
172 'prefix' is null terminated in bounds. join() ensures
173 'landmark' can not overflow prefix if too long.
175 static int
176 gotlandmark(char *landmark)
178 int ok;
179 Py_ssize_t n;
181 n = strlen(prefix);
182 join(prefix, landmark);
183 ok = ismodule(prefix);
184 prefix[n] = '\0';
185 return ok;
188 /* assumes argv0_path is MAXPATHLEN+1 bytes long, already \0 term'd.
189 assumption provided by only caller, calculate_path() */
190 static int
191 search_for_prefix(char *argv0_path, char *landmark)
193 /* Search from argv0_path, until landmark is found */
194 strcpy(prefix, argv0_path);
195 do {
196 if (gotlandmark(landmark))
197 return 1;
198 reduce(prefix);
199 } while (prefix[0]);
200 return 0;
203 #ifdef MS_WINDOWS
204 #ifdef Py_ENABLE_SHARED
206 /* a string loaded from the DLL at startup.*/
207 extern const char *PyWin_DLLVersionString;
210 /* Load a PYTHONPATH value from the registry.
211 Load from either HKEY_LOCAL_MACHINE or HKEY_CURRENT_USER.
213 Works in both Unicode and 8bit environments. Only uses the
214 Ex family of functions so it also works with Windows CE.
216 Returns NULL, or a pointer that should be freed.
218 XXX - this code is pretty strange, as it used to also
219 work on Win16, where the buffer sizes werent available
220 in advance. It could be simplied now Win16/Win32s is dead!
223 static char *
224 getpythonregpath(HKEY keyBase, int skipcore)
226 HKEY newKey = 0;
227 DWORD dataSize = 0;
228 DWORD numKeys = 0;
229 LONG rc;
230 char *retval = NULL;
231 TCHAR *dataBuf = NULL;
232 static const TCHAR keyPrefix[] = _T("Software\\Python\\PythonCore\\");
233 static const TCHAR keySuffix[] = _T("\\PythonPath");
234 size_t versionLen;
235 DWORD index;
236 TCHAR *keyBuf = NULL;
237 TCHAR *keyBufPtr;
238 TCHAR **ppPaths = NULL;
240 /* Tried to use sysget("winver") but here is too early :-( */
241 versionLen = _tcslen(PyWin_DLLVersionString);
242 /* Space for all the chars, plus one \0 */
243 keyBuf = keyBufPtr = malloc(sizeof(keyPrefix) +
244 sizeof(TCHAR)*(versionLen-1) +
245 sizeof(keySuffix));
246 if (keyBuf==NULL) goto done;
248 memcpy(keyBufPtr, keyPrefix, sizeof(keyPrefix)-sizeof(TCHAR));
249 keyBufPtr += sizeof(keyPrefix)/sizeof(TCHAR) - 1;
250 memcpy(keyBufPtr, PyWin_DLLVersionString, versionLen * sizeof(TCHAR));
251 keyBufPtr += versionLen;
252 /* NULL comes with this one! */
253 memcpy(keyBufPtr, keySuffix, sizeof(keySuffix));
254 /* Open the root Python key */
255 rc=RegOpenKeyEx(keyBase,
256 keyBuf, /* subkey */
257 0, /* reserved */
258 KEY_READ,
259 &newKey);
260 if (rc!=ERROR_SUCCESS) goto done;
261 /* Find out how big our core buffer is, and how many subkeys we have */
262 rc = RegQueryInfoKey(newKey, NULL, NULL, NULL, &numKeys, NULL, NULL,
263 NULL, NULL, &dataSize, NULL, NULL);
264 if (rc!=ERROR_SUCCESS) goto done;
265 if (skipcore) dataSize = 0; /* Only count core ones if we want them! */
266 /* Allocate a temp array of char buffers, so we only need to loop
267 reading the registry once
269 ppPaths = malloc( sizeof(TCHAR *) * numKeys );
270 if (ppPaths==NULL) goto done;
271 memset(ppPaths, 0, sizeof(TCHAR *) * numKeys);
272 /* Loop over all subkeys, allocating a temp sub-buffer. */
273 for(index=0;index<numKeys;index++) {
274 TCHAR keyBuf[MAX_PATH+1];
275 HKEY subKey = 0;
276 DWORD reqdSize = MAX_PATH+1;
277 /* Get the sub-key name */
278 DWORD rc = RegEnumKeyEx(newKey, index, keyBuf, &reqdSize,
279 NULL, NULL, NULL, NULL );
280 if (rc!=ERROR_SUCCESS) goto done;
281 /* Open the sub-key */
282 rc=RegOpenKeyEx(newKey,
283 keyBuf, /* subkey */
284 0, /* reserved */
285 KEY_READ,
286 &subKey);
287 if (rc!=ERROR_SUCCESS) goto done;
288 /* Find the value of the buffer size, malloc, then read it */
289 RegQueryValueEx(subKey, NULL, 0, NULL, NULL, &reqdSize);
290 if (reqdSize) {
291 ppPaths[index] = malloc(reqdSize);
292 if (ppPaths[index]) {
293 RegQueryValueEx(subKey, NULL, 0, NULL,
294 (LPBYTE)ppPaths[index],
295 &reqdSize);
296 dataSize += reqdSize + 1; /* 1 for the ";" */
299 RegCloseKey(subKey);
302 /* return null if no path to return */
303 if (dataSize == 0) goto done;
305 /* original datasize from RegQueryInfo doesn't include the \0 */
306 dataBuf = malloc((dataSize+1) * sizeof(TCHAR));
307 if (dataBuf) {
308 TCHAR *szCur = dataBuf;
309 DWORD reqdSize = dataSize;
310 /* Copy our collected strings */
311 for (index=0;index<numKeys;index++) {
312 if (index > 0) {
313 *(szCur++) = _T(';');
314 dataSize--;
316 if (ppPaths[index]) {
317 Py_ssize_t len = _tcslen(ppPaths[index]);
318 _tcsncpy(szCur, ppPaths[index], len);
319 szCur += len;
320 assert(dataSize > (DWORD)len);
321 dataSize -= (DWORD)len;
324 if (skipcore)
325 *szCur = '\0';
326 else {
327 /* If we have no values, we dont need a ';' */
328 if (numKeys) {
329 *(szCur++) = _T(';');
330 dataSize--;
332 /* Now append the core path entries -
333 this will include the NULL
335 rc = RegQueryValueEx(newKey, NULL, 0, NULL,
336 (LPBYTE)szCur, &dataSize);
338 /* And set the result - caller must free
339 If MBCS, it is fine as is. If Unicode, allocate new
340 buffer and convert.
342 #ifdef UNICODE
343 retval = (char *)malloc(reqdSize+1);
344 if (retval)
345 WideCharToMultiByte(CP_ACP, 0,
346 dataBuf, -1, /* source */
347 retval, reqdSize+1, /* dest */
348 NULL, NULL);
349 free(dataBuf);
350 #else
351 retval = dataBuf;
352 #endif
354 done:
355 /* Loop freeing my temp buffers */
356 if (ppPaths) {
357 for(index=0;index<numKeys;index++)
358 if (ppPaths[index]) free(ppPaths[index]);
359 free(ppPaths);
361 if (newKey)
362 RegCloseKey(newKey);
363 if (keyBuf)
364 free(keyBuf);
365 return retval;
367 #endif /* Py_ENABLE_SHARED */
368 #endif /* MS_WINDOWS */
370 static void
371 get_progpath(void)
373 extern char *Py_GetProgramName(void);
374 char *path = getenv("PATH");
375 char *prog = Py_GetProgramName();
377 #ifdef MS_WINDOWS
378 extern HANDLE PyWin_DLLhModule;
379 #ifdef UNICODE
380 WCHAR wprogpath[MAXPATHLEN+1];
381 /* Windows documents that GetModuleFileName() will "truncate",
382 but makes no mention of the null terminator. Play it safe.
383 PLUS Windows itself defines MAX_PATH as the same, but anyway...
385 #ifdef Py_ENABLE_SHARED
386 wprogpath[MAXPATHLEN]=_T('\0');
387 if (PyWin_DLLhModule &&
388 GetModuleFileName(PyWin_DLLhModule, wprogpath, MAXPATHLEN)) {
389 WideCharToMultiByte(CP_ACP, 0,
390 wprogpath, -1,
391 dllpath, MAXPATHLEN+1,
392 NULL, NULL);
394 #else
395 dllpath[0] = 0;
396 #endif
397 wprogpath[MAXPATHLEN]=_T('\0');
398 if (GetModuleFileName(NULL, wprogpath, MAXPATHLEN)) {
399 WideCharToMultiByte(CP_ACP, 0,
400 wprogpath, -1,
401 progpath, MAXPATHLEN+1,
402 NULL, NULL);
403 return;
405 #else
406 /* static init of progpath ensures final char remains \0 */
407 #ifdef Py_ENABLE_SHARED
408 if (PyWin_DLLhModule)
409 if (!GetModuleFileName(PyWin_DLLhModule, dllpath, MAXPATHLEN))
410 dllpath[0] = 0;
411 #else
412 dllpath[0] = 0;
413 #endif
414 if (GetModuleFileName(NULL, progpath, MAXPATHLEN))
415 return;
416 #endif
417 #endif
418 if (prog == NULL || *prog == '\0')
419 prog = "python";
421 /* If there is no slash in the argv0 path, then we have to
422 * assume python is on the user's $PATH, since there's no
423 * other way to find a directory to start the search from. If
424 * $PATH isn't exported, you lose.
426 #ifdef ALTSEP
427 if (strchr(prog, SEP) || strchr(prog, ALTSEP))
428 #else
429 if (strchr(prog, SEP))
430 #endif
431 strncpy(progpath, prog, MAXPATHLEN);
432 else if (path) {
433 while (1) {
434 char *delim = strchr(path, DELIM);
436 if (delim) {
437 size_t len = delim - path;
438 /* ensure we can't overwrite buffer */
439 len = min(MAXPATHLEN,len);
440 strncpy(progpath, path, len);
441 *(progpath + len) = '\0';
443 else
444 strncpy(progpath, path, MAXPATHLEN);
446 /* join() is safe for MAXPATHLEN+1 size buffer */
447 join(progpath, prog);
448 if (exists(progpath))
449 break;
451 if (!delim) {
452 progpath[0] = '\0';
453 break;
455 path = delim + 1;
458 else
459 progpath[0] = '\0';
462 static void
463 calculate_path(void)
465 char argv0_path[MAXPATHLEN+1];
466 char *buf;
467 size_t bufsz;
468 char *pythonhome = Py_GetPythonHome();
469 char *envpath = Py_GETENV("PYTHONPATH");
471 #ifdef MS_WINDOWS
472 int skiphome, skipdefault;
473 char *machinepath = NULL;
474 char *userpath = NULL;
475 char zip_path[MAXPATHLEN+1];
476 size_t len;
477 #endif
479 get_progpath();
480 /* progpath guaranteed \0 terminated in MAXPATH+1 bytes. */
481 strcpy(argv0_path, progpath);
482 reduce(argv0_path);
483 if (pythonhome == NULL || *pythonhome == '\0') {
484 if (search_for_prefix(argv0_path, LANDMARK))
485 pythonhome = prefix;
486 else
487 pythonhome = NULL;
489 else
490 strncpy(prefix, pythonhome, MAXPATHLEN);
492 if (envpath && *envpath == '\0')
493 envpath = NULL;
496 #ifdef MS_WINDOWS
497 /* Calculate zip archive path */
498 if (dllpath[0]) /* use name of python DLL */
499 strncpy(zip_path, dllpath, MAXPATHLEN);
500 else /* use name of executable program */
501 strncpy(zip_path, progpath, MAXPATHLEN);
502 zip_path[MAXPATHLEN] = '\0';
503 len = strlen(zip_path);
504 if (len > 4) {
505 zip_path[len-3] = 'z'; /* change ending to "zip" */
506 zip_path[len-2] = 'i';
507 zip_path[len-1] = 'p';
509 else {
510 zip_path[0] = 0;
513 skiphome = pythonhome==NULL ? 0 : 1;
514 #ifdef Py_ENABLE_SHARED
515 machinepath = getpythonregpath(HKEY_LOCAL_MACHINE, skiphome);
516 userpath = getpythonregpath(HKEY_CURRENT_USER, skiphome);
517 #endif
518 /* We only use the default relative PYTHONPATH if we havent
519 anything better to use! */
520 skipdefault = envpath!=NULL || pythonhome!=NULL || \
521 machinepath!=NULL || userpath!=NULL;
522 #endif
524 /* We need to construct a path from the following parts.
525 (1) the PYTHONPATH environment variable, if set;
526 (2) for Win32, the zip archive file path;
527 (3) for Win32, the machinepath and userpath, if set;
528 (4) the PYTHONPATH config macro, with the leading "."
529 of each component replaced with pythonhome, if set;
530 (5) the directory containing the executable (argv0_path).
531 The length calculation calculates #4 first.
532 Extra rules:
533 - If PYTHONHOME is set (in any way) item (3) is ignored.
534 - If registry values are used, (4) and (5) are ignored.
537 /* Calculate size of return buffer */
538 if (pythonhome != NULL) {
539 char *p;
540 bufsz = 1;
541 for (p = PYTHONPATH; *p; p++) {
542 if (*p == DELIM)
543 bufsz++; /* number of DELIM plus one */
545 bufsz *= strlen(pythonhome);
547 else
548 bufsz = 0;
549 bufsz += strlen(PYTHONPATH) + 1;
550 bufsz += strlen(argv0_path) + 1;
551 #ifdef MS_WINDOWS
552 if (userpath)
553 bufsz += strlen(userpath) + 1;
554 if (machinepath)
555 bufsz += strlen(machinepath) + 1;
556 bufsz += strlen(zip_path) + 1;
557 #endif
558 if (envpath != NULL)
559 bufsz += strlen(envpath) + 1;
561 module_search_path = buf = malloc(bufsz);
562 if (buf == NULL) {
563 /* We can't exit, so print a warning and limp along */
564 fprintf(stderr, "Can't malloc dynamic PYTHONPATH.\n");
565 if (envpath) {
566 fprintf(stderr, "Using environment $PYTHONPATH.\n");
567 module_search_path = envpath;
569 else {
570 fprintf(stderr, "Using default static path.\n");
571 module_search_path = PYTHONPATH;
573 #ifdef MS_WINDOWS
574 if (machinepath)
575 free(machinepath);
576 if (userpath)
577 free(userpath);
578 #endif /* MS_WINDOWS */
579 return;
582 if (envpath) {
583 strcpy(buf, envpath);
584 buf = strchr(buf, '\0');
585 *buf++ = DELIM;
587 #ifdef MS_WINDOWS
588 if (zip_path[0]) {
589 strcpy(buf, zip_path);
590 buf = strchr(buf, '\0');
591 *buf++ = DELIM;
593 if (userpath) {
594 strcpy(buf, userpath);
595 buf = strchr(buf, '\0');
596 *buf++ = DELIM;
597 free(userpath);
599 if (machinepath) {
600 strcpy(buf, machinepath);
601 buf = strchr(buf, '\0');
602 *buf++ = DELIM;
603 free(machinepath);
605 if (pythonhome == NULL) {
606 if (!skipdefault) {
607 strcpy(buf, PYTHONPATH);
608 buf = strchr(buf, '\0');
611 #else
612 if (pythonhome == NULL) {
613 strcpy(buf, PYTHONPATH);
614 buf = strchr(buf, '\0');
616 #endif /* MS_WINDOWS */
617 else {
618 char *p = PYTHONPATH;
619 char *q;
620 size_t n;
621 for (;;) {
622 q = strchr(p, DELIM);
623 if (q == NULL)
624 n = strlen(p);
625 else
626 n = q-p;
627 if (p[0] == '.' && is_sep(p[1])) {
628 strcpy(buf, pythonhome);
629 buf = strchr(buf, '\0');
630 p++;
631 n--;
633 strncpy(buf, p, n);
634 buf += n;
635 if (q == NULL)
636 break;
637 *buf++ = DELIM;
638 p = q+1;
641 if (argv0_path) {
642 *buf++ = DELIM;
643 strcpy(buf, argv0_path);
644 buf = strchr(buf, '\0');
646 *buf = '\0';
647 /* Now to pull one last hack/trick. If sys.prefix is
648 empty, then try and find it somewhere on the paths
649 we calculated. We scan backwards, as our general policy
650 is that Python core directories are at the *end* of
651 sys.path. We assume that our "lib" directory is
652 on the path, and that our 'prefix' directory is
653 the parent of that.
655 if (*prefix=='\0') {
656 char lookBuf[MAXPATHLEN+1];
657 char *look = buf - 1; /* 'buf' is at the end of the buffer */
658 while (1) {
659 Py_ssize_t nchars;
660 char *lookEnd = look;
661 /* 'look' will end up one character before the
662 start of the path in question - even if this
663 is one character before the start of the buffer
665 while (look >= module_search_path && *look != DELIM)
666 look--;
667 nchars = lookEnd-look;
668 strncpy(lookBuf, look+1, nchars);
669 lookBuf[nchars] = '\0';
670 /* Up one level to the parent */
671 reduce(lookBuf);
672 if (search_for_prefix(lookBuf, LANDMARK)) {
673 break;
675 /* If we are out of paths to search - give up */
676 if (look < module_search_path)
677 break;
678 look--;
684 /* External interface */
686 char *
687 Py_GetPath(void)
689 if (!module_search_path)
690 calculate_path();
691 return module_search_path;
694 char *
695 Py_GetPrefix(void)
697 if (!module_search_path)
698 calculate_path();
699 return prefix;
702 char *
703 Py_GetExecPrefix(void)
705 return Py_GetPrefix();
708 char *
709 Py_GetProgramFullPath(void)
711 if (!module_search_path)
712 calculate_path();
713 return progpath;