3 * kProfiler Mark 2 - The Ring-3 Implementation.
7 * Copyright (c) 2006-2007 Knut St. Osmundsen <bird-kStuff-spamix@anduin.net>
9 * Permission is hereby granted, free of charge, to any person
10 * obtaining a copy of this software and associated documentation
11 * files (the "Software"), to deal in the Software without
12 * restriction, including without limitation the rights to use,
13 * copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following
18 * The above copyright notice and this permission notice shall be
19 * included in all copies or substantial portions of the Software.
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 * OTHER DEALINGS IN THE SOFTWARE.
31 /*******************************************************************************
33 *******************************************************************************/
35 #if K_OS == K_OS_WINDOWS
44 #elif K_OS == K_OS_LINUX || K_OS == K_OS_FREEBSD
45 # define KPRF_USE_PTHREAD
48 # define KPRF_USE_MMAN
49 # include <sys/mman.h>
50 # include <sys/fcntl.h>
57 #elif K_OS == K_OS_OS2
61 # include <sys/fmutex.h>
64 # error "not ported to this OS..."
71 * Instantiate the header.
73 #define KPRF_NAME(Suffix) KPrf##Suffix
74 #define KPRF_TYPE(Prefix,Suffix) Prefix##KPRF##Suffix
75 #if K_OS == K_OS_WINDOWS || K_OS == K_OS_OS2
76 # define KPRF_DECL_FUNC(type, name) extern "C" __declspec(dllexport) type __cdecl KPRF_NAME(name)
78 # define KPRF_DECL_FUNC(type, name) extern "C" type KPRF_NAME(name)
82 # define KPRF_ASSERT(expr) do { if (!(expr)) { __asm__ __volatile__("int3\n\tnop\n\t");} } while (0)
84 # define KPRF_ASSERT(expr) do { if (!(expr)) { __debugbreak(); } } while (0)
87 # define KPRF_ASSERT(expr) do { } while (0)
90 #include "prfcore.h.h"
94 /*******************************************************************************
95 * Structures and Typedefs *
96 *******************************************************************************/
97 /** Mutex lock type. */
98 #if defined(KPRF_USE_PTHREAD)
99 typedef pthread_mutex_t
KPRF_TYPE(,MUTEX
);
100 #elif K_OS == K_OS_WINDOWS
101 typedef CRITICAL_SECTION
KPRF_TYPE(,MUTEX
);
102 #elif K_OS == K_OS_OS2
103 typedef struct _fmutex
KPRF_TYPE(,MUTEX
);
105 /** Pointer to a mutex lock. */
106 typedef KPRF_TYPE(,MUTEX
) *KPRF_TYPE(P
,MUTEX
);
109 #if defined(KPRF_USE_PTHREAD)
110 /** Read/Write lock type. */
111 typedef pthread_rwlock_t
KPRF_TYPE(,RWLOCK
);
112 #elif K_OS == K_OS_WINDOWS || K_OS == K_OS_OS2
113 /** Read/Write lock state. */
114 typedef enum KPRF_TYPE(,RWLOCKSTATE
)
116 RWLOCK_STATE_UNINITIALIZED
= 0,
118 RWLOCK_STATE_LOCKING
,
119 RWLOCK_STATE_EXCLUSIVE
,
120 RWLOCK_STATE_32BIT_HACK
= 0x7fffffff
121 } KPRF_TYPE(,RWLOCKSTATE
);
122 /** Update the state. */
123 #define KPRF_RWLOCK_SETSTATE(pRWLock, enmNewState) \
124 kPrfAtomicSet32((volatile KU32 *)&(pRWLock)->enmState, (KU32)(enmNewState))
126 /** Read/Write lock type. */
127 typedef struct KPRF_TYPE(,RWLOCK
)
129 /** This mutex serialize the access and updating of the members
130 * of this structure. */
131 KPRF_TYPE(,MUTEX
) Mutex
;
132 /** The current number of readers. */
134 /** The number of readers waiting. */
135 KU32 cReadersWaiting
;
136 /** The current number of waiting writers. */
137 KU32 cWritersWaiting
;
138 # if K_OS == K_OS_WINDOWS
139 /** The handle of the event object on which the waiting readers block. (manual reset). */
141 /** The handle of the event object on which the waiting writers block. (manual reset). */
143 # elif K_OS == K_OS_OS2
144 /** The handle of the event semaphore on which the waiting readers block. */
146 /** The handle of the event semaphore on which the waiting writers block. */
149 /** The current state of the read-write lock. */
150 KPRF_TYPE(,RWLOCKSTATE
) enmState
;
151 } KPRF_TYPE(,RWLOCK
);
153 /** Pointer to a Read/Write lock. */
154 typedef KPRF_TYPE(,RWLOCK
) *KPRF_TYPE(P
,RWLOCK
);
158 /*******************************************************************************
160 *******************************************************************************/
161 /** The TLS index / key. */
162 #if K_OS == K_OS_WINDOWS
163 static DWORD g_dwThreadTLS
= TLS_OUT_OF_INDEXES
;
165 #elif defined(KPRF_USE_PTHREAD)
166 static pthread_key_t g_ThreadKey
= (pthread_key_t
)-1;
168 #elif K_OS == K_OS_OS2
169 static KPRF_TYPE(P
,THREAD
) *g_ppThread
= NULL
;
172 # error "Not ported to your OS - or you're missing the OS define(s)."
175 /** Pointer to the profiler header. */
176 static KPRF_TYPE(P
,HDR
) g_pHdr
= NULL
;
177 #define KPRF_GET_HDR() g_pHdr
179 /** Whether the profiler is enabled or not. */
180 static bool g_fEnabled
= false;
181 #define KPRF_IS_ACTIVE() g_fEnabled
184 /** The mutex protecting the threads in g_pHdr. */
185 static KPRF_TYPE(,MUTEX
) g_ThreadsMutex
;
187 /** The mutex protecting the module segments in g_pHdr. */
188 static KPRF_TYPE(,MUTEX
) g_ModSegsMutex
;
190 /** The read-write lock protecting the functions in g_pHdr. */
191 static KPRF_TYPE(,RWLOCK
) g_FunctionsRWLock
;
195 /*******************************************************************************
196 * Internal Functions *
197 *******************************************************************************/
198 static KPRF_TYPE(P
,THREAD
) kPrfGetThreadAutoReg(void);
199 #ifdef KPRF_USE_PTHREAD
200 static void kPrfPThreadKeyDtor(void *pvThread
);
205 * Gets the pointer to the profiler data for the current thread.
207 * This implementation automatically adds unknown threads.
209 * @returns Pointer to the profiler thread data.
210 * @returns NULL if we're out of thread space.
212 static inline KPRF_TYPE(P
,THREAD
) kPrfGetThread(void)
214 KPRF_TYPE(P
,THREAD
) pThread
;
217 #if K_OS == K_OS_WINDOWS
218 pThread
= (KPRF_TYPE(P
,THREAD
))TlsGetValue(g_dwThreadTLS
);
221 #elif defined(KPRF_USE_PTHREAD)
222 pThread
= (KPRF_TYPE(P
,THREAD
))pthread_getspecific(g_ThreadKey
);
224 #elif K_OS == K_OS_OS2
225 pThread
= *g_ppThread
;
228 # error not implemented
231 pThread
= kPrfGetThreadAutoReg();
234 #define KPRF_GET_THREAD() kPrfGetThread()
238 * The the ID of the current thread.
240 * @returns The thread id.
242 static inline KUPTR
kPrfGetThreadId(void)
245 #if K_OS == K_OS_WINDOWS
246 KUPTR ThreadId
= (KUPTR
)GetCurrentThreadId();
249 #elif defined(KPRF_USE_PTHREAD)
250 KUPTR ThreadId
= (KUPTR
)pthread_self();
252 #elif K_OS == K_OS_OS2
255 DosGetInfoBlocks(&pTib
, &pPib
);
256 ThreadId
= pTib
->tib_ptib2
->tib2_ultid
;
259 # error not implemented
264 #define KPRF_GET_THREADID() kPrfGetThreadId()
268 * The the ID of the current process.
270 * @returns The process id.
272 static inline KUPTR
kPrfGetProcessId(void)
275 #if K_OS == K_OS_WINDOWS
276 KUPTR ThreadId
= (KUPTR
)GetProcessId(GetCurrentProcess());
278 #elif K_OS == K_OS_OS2
281 DosGetInfoBlocks(&pTib
, &pPib
);
282 ThreadId
= pPib
->pib_pid
;
285 KUPTR ThreadId
= (KUPTR
)getpid();
290 #define KPRF_GET_PROCESSID() kPrfGetProcessId()
294 * Sets the pointer to the profiler data for the current thread.
296 * We require fast access to the profiler thread data, so we store
297 * it in a TLS (thread local storage) item/key where the implementation
300 * @param pThread The pointer to the profiler thread data for the current thread.
302 static inline void kPrfSetThread(KPRF_TYPE(P
,THREAD
) pThread
)
305 #if K_OS == K_OS_WINDOWS
306 BOOL fRc
= TlsSetValue(g_dwThreadTLS
, pThread
);
309 #elif defined(KPRF_USE_PTHREAD)
310 int rc
= pthread_setspecific(g_ThreadKey
, pThread
);
312 #elif K_OS == K_OS_OS2
313 *g_ppThread
= pThread
;
316 # error not implemented
319 #define KPRF_SET_THREAD(pThread) kPrfSetThread(pThread)
323 * Get the now timestamp.
324 * This must correspond to what the assembly code are doing.
326 static inline KU64
kPrfNow(void)
328 #if defined(HAVE_INTRIN)
340 # if defined(__GNUC__)
341 __asm__
__volatile__ ("rdtsc\n\t" : "=a" (u
.s
.u32Lo
), "=d" (u
.s
.u32Hi
));
354 #define KPRF_NOW() kPrfNow()
358 * Atomically set a 32-bit value.
360 static inline void kPrfAtomicSet32(volatile KU32
*pu32
, const KU32 u32
)
362 #if defined(HAVE_INTRIN)
363 _InterlockedExchange((long volatile *)pu32
, (const long)u32
);
365 #elif defined(__GNUC__)
366 __asm__
__volatile__("xchgl %0, %1\n\t"
382 #define KPRF_ATOMIC_SET32(a,b) kPrfAtomicSet32(a, b)
387 * Atomically set a 64-bit value.
389 static inline void kPrfAtomicSet64(volatile KU64
*pu64
, KU64 u64
)
391 #if defined(HAVE_INTRIN) && KPRF_BITS == 64
392 _InterlockedExchange64((KI64
*)pu64
, (const KI64
)u64
);
394 #elif defined(__GNUC__) && KPRF_BITS == 64
395 __asm__
__volatile__("xchgq %0, %1\n\t"
399 #elif defined(__GNUC__) && KPRF_BITS == 32
400 __asm__
__volatile__("1:\n\t"
401 "lock; cmpxchg8b %1\n\t"
407 "c" ( (KU32
)(u64
>> 32) ));
412 mov ebx
, dword ptr
[u64
]
413 mov ecx
, dword ptr
[u64
+ 4]
415 mov eax
, dword ptr
[esi
]
416 mov edx
, dword ptr
[esi
+ 4]
425 #define KPRF_ATOMIC_SET64(a,b) kPrfAtomicSet64(a, b)
429 * Atomically add a 32-bit integer to another.
431 static inline void kPrfAtomicAdd32(volatile KU32
*pu32
, const KU32 u32
)
433 #if defined(HAVE_INTRIN)
434 _InterlockedExchangeAdd((volatile long *)pu32
, (const long)u32
);
436 #elif defined(__GNUC__)
437 __asm__
__volatile__("lock; addl %0, %1\n\t"
445 mov eax
, dword ptr
[u32
]
453 #define KPRF_ATOMIC_ADD32(a,b) kPrfAtomicAdd32(a, b)
454 #define KPRF_ATOMIC_INC32(a) kPrfAtomicAdd32(a, 1);
455 #define KPRF_ATOMIC_DEC32(a) kPrfAtomicAdd32(a, (KU32)-1);
459 * Atomically add a 64-bit integer to another.
460 * Atomically isn't quite required, just a non-corruptive manner, assuming all updates are adds.
462 static inline void kPrfAtomicAdd64(volatile KU64
*pu64
, const KU64 u64
)
464 #if defined(HAVE_INTRIN) && KPRF_BITS == 64
465 _InterlockedExchangeAdd64((volatile KI64
*)pu64
, (const KI64
)u64
);
467 #elif defined(__GNUC__) && KPRF_BITS == 64
468 __asm__
__volatile__("lock; addq %0, %1\n\t"
472 #elif defined(__GNUC__) && KPRF_BITS == 32
473 __asm__
__volatile__("lock; addl %0, %2\n\t"
474 "lock; adcl %1, %3\n\t"
475 : "=m" (*(volatile KU32
*)pu64
),
476 "=m" (*((volatile KU32
*)pu64
+ 1))
478 "r" ((KU32
)(u64
>> 32)));
484 mov eax
, dword ptr
[u64
]
485 mov ecx
, dword ptr
[u64
+ 4]
487 lock adc
[edx
+ 4], ecx
494 #define KPRF_ATOMIC_ADD64(a,b) kPrfAtomicAdd64(a, b)
495 #define KPRF_ATOMIC_INC64(a) kPrfAtomicAdd64(a, 1);
499 * Initializes a mutex.
501 * @returns 0 on success.
502 * @returns -1 on failure.
503 * @param pMutex The mutex to init.
505 static int kPrfMutexInit(KPRF_TYPE(P
,MUTEX
) pMutex
)
507 #if defined(KPRF_USE_PTHREAD)
508 if (!pthread_mutex_init(pMutex
, NULL
));
512 #elif K_OS == K_OS_WINDOWS
513 InitializeCriticalSection(pMutex
);
516 #elif K_OS == K_OS_OS2
517 if (!_fmutex_create(pMutex
, 0))
526 * @param pMutex The mutex to delete.
528 static void kPrfMutexDelete(KPRF_TYPE(P
,MUTEX
) pMutex
)
530 #if defined(KPRF_USE_PTHREAD)
531 pthread_mutex_destroy(pMutex
);
533 #elif K_OS == K_OS_WINDOWS
534 DeleteCriticalSection(pMutex
);
536 #elif K_OS == K_OS_OS2
537 _fmutex_close(pMutex
);
543 * @param pMutex The mutex lock.
545 static inline void kPrfMutexAcquire(KPRF_TYPE(P
,MUTEX
) pMutex
)
547 #if K_OS == K_OS_WINDOWS
548 EnterCriticalSection(pMutex
);
550 #elif defined(KPRF_USE_PTHREAD)
551 pthread_mutex_lock(pMutex
);
553 #elif K_OS == K_OS_OS2
554 fmutex_request(pMutex
);
561 * @param pMutex The mutex lock.
563 static inline void kPrfMutexRelease(KPRF_TYPE(P
,MUTEX
) pMutex
)
565 #if K_OS == K_OS_WINDOWS
566 LeaveCriticalSection(pMutex
);
568 #elif defined(KPRF_USE_PTHREAD)
569 pthread_mutex_lock(pMutex
);
571 #elif K_OS == K_OS_OS2
572 fmutex_request(pMutex
);
577 #define KPRF_THREADS_LOCK() kPrfMutexAcquire(&g_ThreadsMutex)
578 #define KPRF_THREADS_UNLOCK() kPrfMutexRelease(&g_ThreadsMutex)
580 #define KPRF_MODSEGS_LOCK() kPrfMutexAcquire(&g_ModSegsMutex)
581 #define KPRF_MODSEGS_UNLOCK() kPrfMutexRelease(&g_ModSegsMutex)
585 * Initializes a read-write lock.
587 * @returns 0 on success.
588 * @returns -1 on failure.
589 * @param pRWLock The read-write lock to initialize.
591 static inline int kPrfRWLockInit(KPRF_TYPE(P
,RWLOCK
) pRWLock
)
593 #if defined(KPRF_USE_PTHREAD)
594 if (!pthread_rwlock_init(pRWLock
, NULL
))
598 #elif K_OS == K_OS_WINDOWS || K_OS == K_OS_OS2
599 if (kPrfMutexInit(&pRWLock
->Mutex
))
601 pRWLock
->cReaders
= 0;
602 pRWLock
->cReadersWaiting
= 0;
603 pRWLock
->cWritersWaiting
= 0;
604 pRWLock
->enmState
= RWLOCK_STATE_SHARED
;
605 # if K_OS == K_OS_WINDOWS
606 pRWLock
->hevReaders
= CreateEvent(NULL
, TRUE
, TRUE
, NULL
);
607 pRWLock
->hevWriters
= CreateEvent(NULL
, FALSE
, FALSE
, NULL
);
608 if ( pRWLock
->hevReaders
!= INVALID_HANDLE_VALUE
609 && pRWLock
->hevWriters
!= INVALID_HANDLE_VALUE
)
611 CloseHandle(pRWLock
->hevReaders
);
612 CloseHandle(pRWLock
->hevWriters
);
614 # elif K_OS == K_OS_OS2
615 APIRET rc
= DosCreateEventSem(NULL
, &pRWLock
->hevReaders
, 0, TRUE
);
618 rc
= DosCreateEventSem(NULL
, &pRWLock
->hevWriters
, 0, TRUE
);
621 pRWLock
->hevWriters
= NULLHANDLE
;
622 DosCloseEventSem(pRWLock
->hevReaders
);
624 pRWLock
->hevReaders
= NULLHANDLE
;
627 pRWLock
->enmState
= RWLOCK_STATE_UNINITIALIZED
;
628 kPrfMutexDelete(&pRWLock
->Mutex
);
635 * Deleters a read-write lock.
637 * @param pRWLock The read-write lock to delete.
639 static inline void kPrfRWLockDelete(KPRF_TYPE(P
,RWLOCK
) pRWLock
)
641 #if defined(KPRF_USE_PTHREAD)
642 pthread_rwlock_destroy(pRWLock
);
644 #elif K_OS == K_OS_WINDOWS || K_OS == K_OS_OS2
645 if (pRWLock
->enmState
== RWLOCK_STATE_UNINITIALIZED
)
648 pRWLock
->enmState
= RWLOCK_STATE_UNINITIALIZED
;
649 kPrfMutexDelete(&pRWLock
->Mutex
);
650 pRWLock
->cReaders
= 0;
651 pRWLock
->cReadersWaiting
= 0;
652 pRWLock
->cWritersWaiting
= 0;
653 # if K_OS == K_OS_WINDOWS
654 CloseHandle(pRWLock
->hevReaders
);
655 pRWLock
->hevReaders
= INVALID_HANDLE_VALUE
;
656 CloseHandle(pRWLock
->hevWriters
);
657 pRWLock
->hevWriters
= INVALID_HANDLE_VALUE
;
659 # elif K_OS == K_OS_OS2
660 DosCloseEventSem(pRWLock
->hevReaders
);
661 pRWLock
->hevReaders
= NULLHANDLE
;
662 DosCloseEventSem(pRWLock
->hevWriters
);
663 pRWLock
->hevWriters
= NULLHANDLE
;
670 * Acquires read access to the read-write lock.
671 * @param pRWLock The read-write lock.
673 static inline void kPrfRWLockAcquireRead(KPRF_TYPE(P
,RWLOCK
) pRWLock
)
675 #if defined(KPRF_USE_PTHREAD)
676 pthread_rwlock_rdlock(pRWLock
);
678 #elif K_OS == K_OS_WINDOWS || K_OS == K_OS_OS2
679 if (pRWLock
->enmState
== RWLOCK_STATE_UNINITIALIZED
)
682 kPrfMutexAcquire(&pRWLock
->Mutex
);
683 if (pRWLock
->enmState
== RWLOCK_STATE_SHARED
)
685 KPRF_ATOMIC_INC32(&pRWLock
->cReaders
);
686 kPrfMutexRelease(&pRWLock
->Mutex
);
693 KPRF_ATOMIC_INC32(&pRWLock
->cReadersWaiting
);
694 # if K_OS == K_OS_WINDOWS
695 HANDLE hev
= pRWLock
->hevReaders
;
698 # elif K_OS == K_OS_OS2
699 HEV hev
= pRWLock
->hevReaders
;
701 DosResetEventSem(hev
, &cIgnored
);
704 kPrfMutexRelease(&pRWLock
->Mutex
);
706 # if K_OS == K_OS_WINDOWS
707 switch (WaitForSingleObject(hev
, INFINITE
))
709 case WAIT_IO_COMPLETION
:
718 # elif K_OS == K_OS_OS2
719 switch (DosWaitEventSem(hev
, SEM_INDEFINITE_WAIT
))
722 case ERROR_SEM_TIMEOUT
:
724 case ERROR_INTERRUPT
:
731 kPrfMutexAcquire(&pRWLock
->Mutex
);
732 if (pRWLock
->enmState
== RWLOCK_STATE_SHARED
)
734 KPRF_ATOMIC_INC32(&pRWLock
->cReaders
);
735 KPRF_ATOMIC_DEC32(&pRWLock
->cReadersWaiting
);
736 kPrfMutexRelease(&pRWLock
->Mutex
);
745 * Releases read access to the read-write lock.
746 * @param pRWLock The read-write lock.
748 static inline void kPrfRWLockReleaseRead(KPRF_TYPE(P
,RWLOCK
) pRWLock
)
750 #if defined(KPRF_USE_PTHREAD)
751 pthread_rwlock_unlock(pRWLock
);
753 #elif K_OS == K_OS_WINDOWS || K_OS == K_OS_OS2
754 if (pRWLock
->enmState
== RWLOCK_STATE_UNINITIALIZED
)
758 * If we're still in the shared state, or if there
759 * are more readers out there, or if there are no
760 * waiting writers, all we have to do is decrement an leave.
762 * That's the most frequent, thing and should be fast.
764 kPrfMutexAcquire(&pRWLock
->Mutex
);
765 KPRF_ATOMIC_DEC32(&pRWLock
->cReaders
);
766 if ( pRWLock
->enmState
== RWLOCK_STATE_SHARED
768 || !pRWLock
->cWritersWaiting
)
770 kPrfMutexRelease(&pRWLock
->Mutex
);
775 * Wake up one (or more on OS/2) waiting writers.
777 # if K_OS == K_OS_WINDOWS
778 SetEvent(pRWLock
->hevWriters
);
779 # elif K_OS == K_OS_OS2
780 DosPostEvent(pRWLock
->hevwriters
);
782 kPrfMutexRelease(&pRWLock
->Mutex
);
789 * Acquires write access to the read-write lock.
790 * @param pRWLock The read-write lock.
792 static inline void kPrfRWLockAcquireWrite(KPRF_TYPE(P
,RWLOCK
) pRWLock
)
794 #if defined(KPRF_USE_PTHREAD)
795 pthread_rwlock_wrlock(pRWLock
);
797 #elif K_OS == K_OS_WINDOWS || K_OS == K_OS_OS2
798 if (pRWLock
->enmState
== RWLOCK_STATE_UNINITIALIZED
)
801 kPrfMutexAcquire(&pRWLock
->Mutex
);
802 if ( !pRWLock
->cReaders
803 && ( pRWLock
->enmState
== RWLOCK_STATE_SHARED
804 || pRWLock
->enmState
== RWLOCK_STATE_LOCKING
)
807 KPRF_RWLOCK_SETSTATE(pRWLock
, RWLOCK_STATE_EXCLUSIVE
);
808 kPrfMutexRelease(&pRWLock
->Mutex
);
813 * We'll have to wait.
815 if (pRWLock
->enmState
== RWLOCK_STATE_SHARED
)
816 KPRF_RWLOCK_SETSTATE(pRWLock
, RWLOCK_STATE_LOCKING
);
817 KPRF_ATOMIC_INC32(&pRWLock
->cWritersWaiting
);
820 # if K_OS == K_OS_WINDOWS
821 HANDLE hev
= pRWLock
->hevWriters
;
822 # elif K_OS == K_OS_OS2
823 HEV hev
= pRWLock
->hevWriters
;
825 kPrfMutexRelease(&pRWLock
->Mutex
);
826 # if K_OS == K_OS_WINDOWS
827 switch (WaitForSingleObject(hev
, INFINITE
))
829 case WAIT_IO_COMPLETION
:
835 KPRF_ATOMIC_DEC32(&pRWLock
->cWritersWaiting
);
839 # elif K_OS == K_OS_OS2
840 switch (DosWaitEventSem(hev
, SEM_INDEFINITE_WAIT
))
843 case ERROR_SEM_TIMEOUT
:
845 case ERROR_INTERRUPT
:
848 KPRF_ATOMIC_DEC32(&pRWLock
->cWritersWaiting
);
852 DosResetEventSem(hev
, &cIgnored
);
856 * Try acquire the lock.
858 kPrfMutexAcquire(&pRWLock
->Mutex
);
859 if ( !pRWLock
->cReaders
860 && ( pRWLock
->enmState
== RWLOCK_STATE_SHARED
861 || pRWLock
->enmState
== RWLOCK_STATE_LOCKING
)
864 KPRF_RWLOCK_SETSTATE(pRWLock
, RWLOCK_STATE_EXCLUSIVE
);
865 KPRF_ATOMIC_DEC32(&pRWLock
->cWritersWaiting
);
866 kPrfMutexRelease(&pRWLock
->Mutex
);
875 * Releases write access to the read-write lock.
876 * @param pRWLock The read-write lock.
878 static inline void kPrfRWLockReleaseWrite(KPRF_TYPE(P
,RWLOCK
) pRWLock
)
880 #if defined(KPRF_USE_PTHREAD)
881 pthread_rwlock_unlock(pRWLock
);
883 #elif K_OS == K_OS_WINDOWS || K_OS == K_OS_OS2
884 if (pRWLock
->enmState
== RWLOCK_STATE_UNINITIALIZED
)
888 * The common thing is that there are noone waiting.
889 * But, before that usual paranoia.
891 kPrfMutexAcquire(&pRWLock
->Mutex
);
892 if (pRWLock
->enmState
!= RWLOCK_STATE_EXCLUSIVE
)
894 kPrfMutexRelease(&pRWLock
->Mutex
);
897 if ( !pRWLock
->cReadersWaiting
898 && !pRWLock
->cWritersWaiting
)
900 KPRF_RWLOCK_SETSTATE(pRWLock
, RWLOCK_STATE_SHARED
);
901 kPrfMutexRelease(&pRWLock
->Mutex
);
906 * Someone is waiting, wake them up as we change the state.
908 # if K_OS == K_OS_WINDOWS
909 HANDLE hev
= INVALID_HANDLE_VALUE
;
910 # elif K_OS == K_OS_OS2
911 HEV hev
= NULLHANDLE
;
914 if (pRWLock
->cWritersWaiting
)
916 KPRF_RWLOCK_SETSTATE(pRWLock
, RWLOCK_STATE_LOCKING
);
917 hev
= pRWLock
->hevWriters
;
921 KPRF_RWLOCK_SETSTATE(pRWLock
, RWLOCK_STATE_SHARED
);
922 hev
= pRWLock
->hevReaders
;
924 # if K_OS == K_OS_WINDOWS
926 # elif K_OS == K_OS_OS2
927 DosPostEvent(pRWLock
->hevwriters
);
929 kPrfMutexRelease(&pRWLock
->Mutex
);
934 #define KPRF_FUNCS_WRITE_LOCK() kPrfRWLockAcquireWrite(&g_FunctionsRWLock)
935 #define KPRF_FUNCS_WRITE_UNLOCK() kPrfRWLockReleaseWrite(&g_FunctionsRWLock)
936 #define KPRF_FUNCS_READ_LOCK() kPrfRWLockAcquireRead(&g_FunctionsRWLock)
937 #define KPRF_FUNCS_READ_UNLOCK() kPrfRWLockReleaseRead(&g_FunctionsRWLock)
943 * Finds the module segment which the address belongs to.
946 static int kPrfGetModSeg(KPRF_TYPE(,UPTR
) uAddress
, char *pszPath
, KU32 cchPath
, KU32
*piSegment
,
947 KPRF_TYPE(P
,UPTR
) puBasePtr
, KPRF_TYPE(P
,UPTR
) pcbSegmentMinusOne
)
949 #if K_OS == K_OS_WINDOWS
951 * Enumerate the module handles.
953 HANDLE hProcess
= GetCurrentProcess();
956 if ( !EnumProcessModules(hProcess
, &hModIgnored
, sizeof(hModIgnored
), &cbNeeded
)
957 && GetLastError() != ERROR_BUFFER_OVERFLOW
) /** figure out what this actually returns */
958 cbNeeded
= 256 * sizeof(HMODULE
);
960 cbNeeded
+= sizeof(HMODULE
) * 32;
961 HMODULE
*pahModules
= (HMODULE
*)alloca(cbNeeded
);
962 if (EnumProcessModules(hProcess
, pahModules
, cbNeeded
, &cbNeeded
))
964 const unsigned cModules
= cbNeeded
/ sizeof(HMODULE
);
965 for (unsigned i
= 0; i
< cModules
; i
++)
969 const KUPTR uImageBase
= (KUPTR
)pahModules
[i
];
973 PIMAGE_DOS_HEADER pDos
;
974 PIMAGE_NT_HEADERS pNt
;
975 PIMAGE_NT_HEADERS32 pNt32
;
976 PIMAGE_NT_HEADERS64 pNt64
;
981 /* reject modules higher than the address. */
985 /* Skip past the MZ header */
986 if (u
.pDos
->e_magic
== IMAGE_DOS_SIGNATURE
)
987 u
.pu8
+= u
.pDos
->e_lfanew
;
989 /* Ignore anything which isn't an NT header. */
990 if (u
.pNt
->Signature
!= IMAGE_NT_SIGNATURE
)
993 /* Extract necessary info from the optional header (comes in 32-bit and 64-bit variations, we simplify a bit). */
995 PIMAGE_SECTION_HEADER paSHs
;
996 if (u
.pNt
->FileHeader
.SizeOfOptionalHeader
== sizeof(IMAGE_OPTIONAL_HEADER32
))
998 paSHs
= (PIMAGE_SECTION_HEADER
)(u
.pNt32
+ 1);
999 cbImage
= u
.pNt32
->OptionalHeader
.SizeOfImage
;
1001 else if (u
.pNt
->FileHeader
.SizeOfOptionalHeader
== sizeof(IMAGE_OPTIONAL_HEADER64
))
1003 paSHs
= (PIMAGE_SECTION_HEADER
)(u
.pNt64
+ 1);
1004 cbImage
= u
.pNt64
->OptionalHeader
.SizeOfImage
;
1009 /* Is our address within the image size */
1010 KUPTR uRVA
= uAddress
- (KUPTR
)pahModules
[i
];
1011 if (uRVA
>= cbImage
)
1015 * Iterate the section headers and figure which section we're in.
1016 * (segment == section + 1)
1018 const KU32 cSHs
= u
.pNt
->FileHeader
.NumberOfSections
;
1019 if (uRVA
< paSHs
[0].VirtualAddress
)
1021 /* the implicit header section */
1022 *puBasePtr
= uImageBase
;
1023 *pcbSegmentMinusOne
= paSHs
[0].VirtualAddress
- 1;
1033 /* this shouldn't happen, but in case it does simply deal with it. */
1034 *puBasePtr
= paSHs
[iSH
- 1].VirtualAddress
+ paSHs
[iSH
- 1].Misc
.VirtualSize
+ uImageBase
;
1035 *pcbSegmentMinusOne
= cbImage
- *puBasePtr
;
1036 *piSegment
= iSH
+ 1;
1039 if (uRVA
- paSHs
[iSH
].VirtualAddress
< paSHs
[iSH
].Misc
.VirtualSize
)
1041 *puBasePtr
= paSHs
[iSH
].VirtualAddress
+ uImageBase
;
1042 *pcbSegmentMinusOne
= paSHs
[iSH
].Misc
.VirtualSize
;
1043 *piSegment
= iSH
+ 1;
1051 * Finally, get the module name.
1052 * There are multiple ways, try them all before giving up.
1054 if ( !GetModuleFileNameEx(hProcess
, pahModules
[i
], pszPath
, cchPath
)
1055 && !GetModuleFileName(pahModules
[i
], pszPath
, cchPath
)
1056 && !GetMappedFileName(hProcess
, (PVOID
)uAddress
, pszPath
, cchPath
)
1057 && !GetModuleBaseName(hProcess
, pahModules
[i
], pszPath
, cchPath
))
1061 __except (EXCEPTION_EXECUTE_HANDLER
)
1067 #elif K_OS == K_OS_OS2
1069 * Just ask the loader.
1073 HMODULE hmod
= NULLHANDLE
;
1074 APIRET rc
= DosQueryModFromEIP(&hmod
, &iObj
, cchPath
, pszPath
, &offObj
, uAddress
);
1078 *puBasePtr
= uAddress
- offObj
;
1079 *pcbSegmentMinusOne
= KPRF_ALIGN(offObj
, 0x1000) - 1; /* minimum size */
1082 * Query the page attributes starting at the current page. The query will not enter
1083 * into the next object since PAG_BASE is requested.
1086 ULONG fFlags
= ~0UL;
1087 uAddress
&= ~(KUPTR
)0xfff;
1088 rc
= DosQueryMem((PVOID
)(uAddress
, &cb
, &fFlags
);
1091 *pcbSegmentMinusOne
= (offObj
& ~(KUPTR
)0xfff) + KPRF_ALIGN(cb
, 0x1000) - 1;
1092 if ((fFlags
& PAG_BASE
) && cb
<= 0x1000) /* don't quite remember if PAG_BASE returns one page or not */
1096 rc
= DosQueryMem((PVOID
)(uAddress
+ 0x1000), &cb
, &fFlags
);
1097 if (!rc
& !(fFlags
& (PAG_BASE
| PAG_FREE
)))
1098 *pcbSegmentMinusOne
+= KPRF_ALIGN(cb
, 0x1000);
1105 /* The common fallback */
1109 *pcbSegmentMinusOne
= ~(KPRF_TYPE(,UPTR
))0;
1112 #define KPRF_GET_MODSEG(uAddress, pszPath, cchPath, piSegment, puBasePtr, pcbSegmentMinusOne) \
1113 kPrfGetModSeg(uAddress, pszPath, cchPath, piSegment, puBasePtr, pcbSegmentMinusOne)
1119 * Instantiate the implementation
1121 #include "prfcorepre.cpp.h"
1123 #include "prfcoremodseg.cpp.h"
1124 #include "prfcorefunction.cpp.h"
1125 #include "prfcore.cpp.h"
1126 #include "prfcoreinit.cpp.h"
1127 #include "prfcoreterm.cpp.h"
1129 #include "prfcorepost.cpp.h"
1136 * Registers an unknown thread.
1138 * @returns Pointer to the registered thread.
1140 static KPRF_TYPE(P
,THREAD
) kPrfGetThreadAutoReg(void)
1142 KUPTR uStackBasePtr
;
1145 /** @todo I'm sure Win32 has a way of obtaining the top and bottom of the stack, OS/2 did...
1146 * Some limit stuff in posix / ansi also comes to mind... */
1148 #elif K_OS == K_OS_OS2
1151 DosGetInfoBlocks(&pTib
, &pPib
); /* never fails except if you give it bad input, thus 'Get' not 'Query'. */
1152 /* I never recall which of these is the right one... */
1153 uStackBasePtr
= (KUPTR
)pTib
->tib_pstack
< (KUPTR
)pTib
->tib_pstack_limit
1154 ? (KUPTR
)pTib
->tib_pstack
1155 : (KUPTR
)pTib
->tib_pstack_limit
;
1158 /* the default is top of the current stack page (assuming a page to be 4KB) */
1159 uStackBasePtr
= (KUPTR
)&uStackBasePtr
;
1160 uStackBasePtr
= (uStackBasePtr
+ 0xfff) & ~(KUPTR
)0xfff;
1163 return KPRF_NAME(RegisterThread
)(uStackBasePtr
, "");
1168 * Get a env.var. variable.
1170 * @returns pszValue.
1171 * @param pszVar The variable name.
1172 * @param pszValue Where to store the value.
1173 * @param cchValue The size of the value buffer.
1174 * @param pszDefault The default value.
1176 static char *kPrfGetEnvString(const char *pszVar
, char *pszValue
, KU32 cchValue
, const char *pszDefault
)
1178 #if K_OS == K_OS_WINDOWS
1179 if (GetEnvironmentVariable(pszVar
, pszValue
, cchValue
))
1182 #elif K_OS == K_OS_OS2
1184 if ( !DosScanEnv((PCSZ
)pszVar
, &pszValue
)
1186 pszDefault
= pszValue
;
1189 const char *pszTmp
= getenv(pszVar
);
1191 pszDefault
= pszTmp
;
1196 * Copy the result into the buffer.
1198 char *psz
= pszValue
;
1199 while (*pszDefault
&& cchValue
-- > 1)
1200 *psz
++ = *pszDefault
++;
1208 * The the value of an env.var.
1210 * @returns The value of the env.var.
1211 * @returns The default if the value was not found.
1212 * @param pszVar The variable name.
1213 * @param uDefault The default value.
1215 static KU32
kPrfGetEnvValue(const char *pszVar
, KU32 uDefault
)
1217 #if K_OS == K_OS_WINDOWS
1219 const char *pszValue
= szBuf
;
1220 if (!GetEnvironmentVariable(pszVar
, szBuf
, sizeof(szBuf
)))
1223 #elif K_OS == K_OS_OS2
1225 if (DosScanEnv((PCSZ
)pszVar
, &pszValue
))
1229 const char *pszValue
= getenv(pszVar
);
1234 * Discard the obvious stuff.
1238 while (*pszValue
== ' ' || *pszValue
== '\t')
1244 * Interpret the value.
1246 unsigned uBase
= 10;
1248 const char *psz
= pszValue
;
1250 /* prefix - only hex */
1251 if (*psz
== '0' && (psz
[1] == 'x' || psz
[1] == 'X'))
1257 /* read the value */
1260 unsigned char ch
= (unsigned char)*psz
;
1261 if (ch
>= '0' && ch
<= '9')
1263 else if ( uBase
> 10
1264 && ch
>= 'a' && ch
<= 'f')
1266 else if ( uBase
> 10
1267 && ch
>= 'a' && ch
<= 'F')
1281 uValue
*= 1024*1024;
1291 * If the value is still 0, we return the default.
1293 return uValue
? uValue
: uDefault
;
1300 * @returns Pointer to the allocated memory.
1301 * @returns NULL on failure.
1302 * @param cb The amount of memory (in bytes) to allocate.
1304 static void *kPrfAllocMem(KU32 cb
)
1306 #if K_OS == K_OS_WINDOWS
1307 void *pv
= VirtualAlloc(NULL
, cb
, MEM_COMMIT
, PAGE_EXECUTE_READWRITE
);
1309 #elif defined(KPRF_USE_MMAN)
1310 void *pv
= mmap(NULL
, cb
, PROT_READ
| PROT_WRITE
| PROT_EXEC
, MAP_PRIVATE
| MAP_ANONYMOUS
, -1, 0);
1312 #elif K_OS == K_OS_OS2
1314 # ifdef INCL_DOSEXAPIS
1315 if (DosAllocMemEx(&pv
, cb
, PAG_READ
| PAG_WRITE
| PAG_EXECUTE
| PAG_COMMIT
| OBJ_FORK
))s
1317 if (DosAllocMem(&pv
, cb
, PAG_READ
| PAG_WRITE
| PAG_EXECUTE
| PAG_COMMIT
))
1322 # error not implemented
1331 * @param pv The memory to free.
1333 static void kPrfFreeMem(void *pv
)
1335 #if K_OS == K_OS_WINDOWS
1336 VirtualFree(pv
, 0, MEM_RELEASE
);
1338 #elif defined(KPRF_USE_MMAN)
1339 munmap(pv
, 0); /** @todo check if 0 is allowed here.. */
1341 #elif K_OS == K_OS_OS2
1342 # ifdef INCL_DOSEXAPIS
1349 # error not implemented
1355 * Writes a data buffer to a new file.
1357 * Any existing file will be overwritten.
1360 * @returns 0 on success.
1361 * @returns -1 on failure.
1363 * @param pszName The name of the file.
1364 * @param pvData The data to write.
1365 * @param cbData The amount of data to write.
1367 static int kPrfWriteFile(const char *pszName
, const void *pvData
, KU32 cbData
)
1369 #if K_OS == K_OS_WINDOWS
1371 HANDLE hFile
= CreateFile(pszName
,GENERIC_WRITE
, FILE_SHARE_READ
, NULL
,
1372 CREATE_ALWAYS
, FILE_ATTRIBUTE_NORMAL
, INVALID_HANDLE_VALUE
);
1373 if (hFile
!= INVALID_HANDLE_VALUE
)
1376 if ( WriteFile(hFile
, pvData
, cbData
, &dwWritten
, NULL
)
1377 && dwWritten
== cbData
)
1383 #elif K_OS == K_OS_OS2
1386 APIRET rc
= DosOpen(pszName
, &hFile
, &ulAction
, cbData
, FILE_NORMAL
,
1387 OPEN_ACTION_REPLACE_IF_EXISTS
| OPEN_ACTION_CREATE_IF_NEW
,
1388 OPEN_ACCESS_WRITEONLY
| OPEN_SHARE_DENYWRITE
| OPEN_FLAGS_NOINHERIT
| OPEN_FLAGS_SEQUENTIAL
,
1393 rc
= DosWrite(hFile
, pvData
, cbData
, &cbWritten
);
1394 if (!rc
&& cbWritten
!= cbData
)
1402 int fd
= open(pszName
, O_WRONLY
| O_CREAT
| O_BINARY
| O_TRUNC
, 0666);
1405 if (write(fd
, pvData
, cbData
) == cbData
)
1417 * Initializes and start the profiling.
1419 * This should typically be called from some kind of module init
1420 * function, so we can start profiling upon/before entering main().
1422 * @returns 0 on success
1423 * @returns -1 on failure.
1426 int kPrfInitialize(void)
1429 * Only initialize once.
1435 * Initial suggestions.
1437 KU32 cbModSegs
= kPrfGetEnvValue("KPRF2_CBMODSEGS", 128*1024);
1438 KU32 cFunctions
= kPrfGetEnvValue("KPRF2_CFUNCTIONS", 8192);
1439 KU32 cThreads
= kPrfGetEnvValue("KPRF2_CTHREADS", 256);
1440 KU32 cStacks
= kPrfGetEnvValue("KPRF2_CSTACKS", 48);
1441 KU32 cFrames
= kPrfGetEnvValue("KPRF2_CFRAMES", 448);
1442 KU32 fAffinity
= kPrfGetEnvValue("KPRF2_AFFINITY", 0);
1444 KU32 cb
= KPRF_NAME(CalcSize
)(cFunctions
, cbModSegs
, cThreads
, cStacks
, cFrames
);
1447 * Allocate and initialize the data set.
1449 void *pvBuf
= kPrfAllocMem(cb
);
1453 KPRF_TYPE(P
,HDR
) pHdr
= KPRF_NAME(Init
)(pvBuf
, cb
, cFunctions
, cbModSegs
, cThreads
, cStacks
, cFrames
);
1457 * Initialize semaphores.
1459 if (!kPrfMutexInit(&g_ThreadsMutex
))
1461 if (!kPrfMutexInit(&g_ModSegsMutex
))
1463 if (!kPrfRWLockInit(&g_FunctionsRWLock
))
1466 * Allocate the TLS entry.
1468 #if K_OS == K_OS_WINDOWS
1469 g_dwThreadTLS
= TlsAlloc();
1470 if (g_dwThreadTLS
!= TLS_OUT_OF_INDEXES
)
1472 #elif defined(KPRF_USE_PTHREAD)
1473 int rc
= pthread_key_create(&g_ThreadKey
, kPrfPThreadKeyDtor
);
1476 #elif K_OS == K_OS_OS2
1477 int rc
= DosAllocThreadLocalMemory(sizeof(void *), (PULONG
*)&g_ppThread
); /** @todo check if this is a count or a size. */
1483 * Apply the affinity mask, if specified.
1487 #if K_OS == K_OS_WINDOWS
1488 SetProcessAffinityMask(GetCurrentProcess(), fAffinity
);
1496 kPrfRWLockDelete(&g_FunctionsRWLock
);
1498 kPrfMutexDelete(&g_ModSegsMutex
);
1500 kPrfMutexDelete(&g_ThreadsMutex
);
1509 * Stops, dumps, and terminates the profiling.
1511 * This should typically be called from some kind of module destruction
1512 * function, so we can profile parts of the termination sequence too.
1514 * @returns 0 on success
1515 * @returns -1 on failure.
1518 int kPrfTerminate(void)
1521 * Stop the profiling.
1522 * As a safety precaution, sleep a little bit to allow threads
1523 * still at large inside profiler code some time to get out.
1526 KPRF_TYPE(P
,HDR
) pHdr
= g_pHdr
;
1531 #if K_OS == K_OS_WINDOWS
1533 #elif K_OS == K_OS_OS2
1540 * Unwind all active threads and so forth.
1542 KPRF_NAME(TerminateAll
)(pHdr
);
1545 * Use the stack space to fill in process details.
1547 #if K_OS == K_OS_WINDOWS
1548 /* all is one single string */
1549 const char *pszCommandLine
= GetCommandLine();
1551 KPRF_NAME(SetCommandLine
)(pHdr
, 1, &pszCommandLine
);
1553 #elif K_OS == K_OS_OS2 || K_OS == K_OS_OS2
1556 DosGetInfoBlocks(&pTib
, &pPib
);
1557 if (pPib
->pib_pchcmd
)
1559 /* Tradition say that the commandline is made up of two zero terminate strings
1560 * - first the executable name, then the arguments. Similar to what unix does,
1561 * only completely mocked up because of the CMD.EXE tradition.
1563 const char *apszArgs
[2];
1564 apszArgs
[0] = pPib
->pib_pchcmd
;
1565 apszArgs
[1] = pPib
->pib_pchcmd
;
1566 while (apszArgs
[1][0])
1569 KPRF_NAME(SetCommandLine
)(pHdr
, 2, apszArgs
);
1573 /* linux can read /proc/self/something I guess. Don't know about the rest... */
1578 * Write the file to disk.
1580 char szName
[260 + 16];
1581 kPrfGetEnvString("KPRF2_FILE", szName
, sizeof(szName
) - 16, "kPrf2-");
1583 /* append the process id */
1584 KUPTR pid
= kPrfGetProcessId();
1589 static char s_szDigits
[0x11] = "0123456789abcdef";
1590 KU32 uShift
= KPRF_BITS
- 4;
1592 && !(pid
& (0xf << uShift
)))
1594 *psz
++ = s_szDigits
[(pid
>> uShift
) & 0xf];
1598 *psz
++ = s_szDigits
[(pid
>> uShift
) & 0xf];
1610 /* write the file. */
1611 int rc
= kPrfWriteFile(szName
, pHdr
, pHdr
->cb
);
1617 #if K_OS == K_OS_WINDOWS
1618 TlsFree(g_dwThreadTLS
);
1619 g_dwThreadTLS
= TLS_OUT_OF_INDEXES
;
1621 #elif defined(KPRF_USE_PTHREAD)
1622 pthread_key_delete(g_ThreadKey
);
1623 g_ThreadKey
= (pthread_key_t
)-1;
1625 #elif K_OS == K_OS_OS2
1626 DosFreeThreadLocalMemory((PULONG
)g_ppThread
);
1633 kPrfMutexDelete(&g_ThreadsMutex
);
1634 kPrfMutexDelete(&g_ModSegsMutex
);
1635 kPrfRWLockDelete(&g_FunctionsRWLock
);
1642 * Terminate the current thread.
1644 void kPrfTerminateThread(void)
1646 KPRF_NAME(DeregisterThread
)();
1650 #ifdef KPRF_USE_PTHREAD
1654 static void kPrfPThreadKeyDtor(void *pvThread
)
1656 KPRF_TYPE(P
,HDR
) pHdr
= KPRF_GET_HDR();
1659 KPRF_TYPE(P
,THREAD
) pThread
= (KPRF_TYPE(P
,THREAD
))pvThread
;
1660 pthread_setspecific(g_ThreadKey
, pvThread
);
1661 KPRF_NAME(TerminateThread
)(pHdr
, pThread
, KPRF_NOW());
1662 pthread_setspecific(g_ThreadKey
, NULL
);