3 ; kProfiler Mark 2 - Microsoft C/C++ Compiler Interaction, x86.
7 ; Copyright (c) 2006-2007 Knut St. Osmundsen <bird-kStuff-spamix@anduin.net>
9 ; Permission is hereby granted, free of charge, to any person
10 ; obtaining a copy of this software and associated documentation
11 ; files (the "Software"), to deal in the Software without
12 ; restriction, including without limitation the rights to use,
13 ; copy, modify, merge, publish, distribute, sublicense, and/or sell
14 ; copies of the Software, and to permit persons to whom the
15 ; Software is furnished to do so, subject to the following
18 ; The above copyright notice and this permission notice shall be
19 ; included in all copies or substantial portions of the Software.
21 ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 ; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 ; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 ; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 ; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 ; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 ; OTHER DEALINGS IN THE SOFTWARE.
47 global common_return_path
48 global common_overhead
49 global common_no_overhead
51 global calib_inner_update_minimum
52 global calib_inner_next
53 global calib_outer_dec
54 global calib_outer_inc
61 ; On x86 the call to this function has been observed to be put before
62 ; creating the stack frame, as the very first instruction in the function.
64 ; Thus the stack layout is as follows:
65 ; 24 return address of the calling function.
66 ; 20 our return address - the address of the calling function + 5.
71 ; c tsc high - param 3
73 ; 4 frame pointer - param 2
74 ; 0 function ptr - param 1
79 ; save volatile register and get the time stamp.
86 ; setting up the enter call frame (cdecl).
88 mov [esp + 0ch], edx ; Param 3 - the timestamp
90 lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us)
92 mov eax, [esp + 20h] ; Param 1 - The function address
93 sub eax, 5 ; call instruction
97 jmp common_return_path
101 ; On x86 the call to this function has been observed to be put right before
102 ; return instruction. This fact matters since since we have to calc the same
103 ; stack address as in _penter.
105 ; Thus the stack layout is as follows:
106 ; 24 return address of the calling function.
107 ; 20 our return address - the address of the calling function + 5.
112 ; c tsc high - param 3
114 ; 4 frame pointer - param 2
115 ; 0 function ptr - param 1
120 ; save volatile register and get the time stamp.
127 ; setting up the leave call frame (cdecl).
129 mov [esp + 0ch], edx ; Param 3 - the timestamp
131 lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us)
133 mov eax, [esp + 20h] ; Param 1 - Some address in the function.
134 sub eax, 5 ; call instruction
138 jmp common_return_path
142 ; This is the common return path for both the enter and exit hooks.
143 ; It's kept common because we can then use the same overhead adjustment
144 ; and save some calibration efforts. It also saves space :-)
149 jz common_no_overhead
150 cmp byte [g_fCalibrated
], 0
154 mov ecx, eax ; ecx <- pointer to overhead counter.
155 mov eax, [g_OverheadAdj
] ; apply the adjustment before reading tsc
157 sbb dword [esp + 0ch], 0
167 ; restore volatile registers.
175 ; Data esi points to while we're calibrating.
191 ; Do necessary calibrations.
199 sub esp, CALIBDATA_size
200 mov esi, esp ; esi points to the CALIBDATA
203 ; Indicate that we have finished calibrating.
206 xchg dword [g_fCalibrated
], eax
209 ; The outer loop - find the right adjustment.
211 mov ebx, 200h ; loop counter.
215 ; The inner loop - calls the function number of times to establish a
219 mov dword [esi + CALIBDATA.MinLo
], 0ffffffffh
220 mov dword [esi + CALIBDATA.MinHi
], 07fffffffh
223 ; zero the overhead and profiled times.
225 mov [esi + CALIBDATA.OverheadLo
], eax
226 mov [esi + CALIBDATA.OverheadHi
], eax
227 mov [esi + CALIBDATA.ProfiledLo
], eax
228 mov [esi + CALIBDATA.ProfiledHi
], eax
231 ; subtract the overhead
232 mov eax, [esi + CALIBDATA.ProfiledLo
]
233 mov edx, [esi + CALIBDATA.ProfiledHi
]
234 sub eax, [esi + CALIBDATA.OverheadLo
]
235 sbb edx, [esi + CALIBDATA.OverheadHi
]
237 ; update the minimum value.
239 jnz near calib_outer_dec
; if negative, just simplify and shortcut
240 cmp edx, [esi + CALIBDATA.MinHi
]
242 jl calib_inner_update_minimum
243 cmp eax, [esi + CALIBDATA.MinLo
]
245 calib_inner_update_minimum:
246 mov [esi + CALIBDATA.MinLo
], eax
247 mov [esi + CALIBDATA.MinHi
], edx
249 loop calib_inner_loop
251 ; Is the minimum value acceptable?
252 test dword [esi + CALIBDATA.MinHi
], 80000000h
253 jnz calib_outer_dec
; simplify if negative.
254 cmp dword [esi + CALIBDATA.MinHi
], 0
255 jnz calib_outer_inc
; this shouldn't be possible
256 cmp dword [esi + CALIBDATA.MinLo
], 1fh
257 jbe calib_outer_dec
; too low - 2 ticks per pair is the minimum!
258 cmp dword [esi + CALIBDATA.MinLo
], 30h
259 jbe calib_done
; this is fine!
261 inc dword [g_OverheadAdj
]
264 cmp dword [g_OverheadAdj
], 1
266 dec dword [g_OverheadAdj
]
273 add esp, CALIBDATA_size
283 ; The calibration __penter - this must be identical to the real thing except for the KPRF call.
286 ; This part must be identical
294 mov [esi + CALIBDATA.EnterTSLo
], eax
295 mov [esi + CALIBDATA.EnterTSHi
], edx
297 ; create the call frame
303 lea eax, [esi + CALIBDATA.OverheadLo
]
308 ; The calibration __pexit - this must be identical to the real thing except for the KPRF call.
311 ; This part must be identical
321 sub eax, [esi + CALIBDATA.EnterTSLo
]
322 sbb edx, [esi + CALIBDATA.EnterTSHi
]
323 add [esi + CALIBDATA.ProfiledLo
], eax
324 adc [esi + CALIBDATA.ProfiledHi
], edx
328 ; create the call frame
334 lea eax, [esi + CALIBDATA.EnterTSLo
]
339 ; The 'function' we're profiling.
340 ; The general idea is that each pair should take something like 2-10 ticks.
342 ; (Btw. If we don't use multiple pairs here, we end up with the wrong result.)