kDefs.h: Added risc-v to the K_ARCH_XXX defines.
[kstuff-mirror.git] / kProfiler2 / prfx86msc.asm
blobe5e9b4c3022b2ca03c80643a6f507dfa14f33b2d
1 ; $Id$
2 ;; @file
3 ; kProfiler Mark 2 - Microsoft C/C++ Compiler Interaction, x86.
7 ; Copyright (c) 2006-2007 Knut St. Osmundsen <bird-kStuff-spamix@anduin.net>
9 ; Permission is hereby granted, free of charge, to any person
10 ; obtaining a copy of this software and associated documentation
11 ; files (the "Software"), to deal in the Software without
12 ; restriction, including without limitation the rights to use,
13 ; copy, modify, merge, publish, distribute, sublicense, and/or sell
14 ; copies of the Software, and to permit persons to whom the
15 ; Software is furnished to do so, subject to the following
16 ; conditions:
18 ; The above copyright notice and this permission notice shall be
19 ; included in all copies or substantial portions of the Software.
21 ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 ; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 ; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 ; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 ; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 ; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 ; OTHER DEALINGS IN THE SOFTWARE.
31 [section .data]
33 g_fCalibrated:
34 dd 0
35 g_OverheadAdj:
36 dd 0
38 [section .text]
40 extern KPRF_ENTER
41 extern KPRF_LEAVE
43 global __penter
44 global __pexit
46 ;ifdef UNDEFINED
47 global common_return_path
48 global common_overhead
49 global common_no_overhead
50 global calibrate
51 global calib_inner_update_minimum
52 global calib_inner_next
53 global calib_outer_dec
54 global calib_outer_inc
55 global calib_done
56 global calib_nullproc
57 ;endif
61 ; On x86 the call to this function has been observed to be put before
62 ; creating the stack frame, as the very first instruction in the function.
64 ; Thus the stack layout is as follows:
65 ; 24 return address of the calling function.
66 ; 20 our return address - the address of the calling function + 5.
67 ; 1c eax
68 ; 18 edx
69 ; 14 eflags
70 ; 10 ecx
71 ; c tsc high - param 3
72 ; 8 tsc low
73 ; 4 frame pointer - param 2
74 ; 0 function ptr - param 1
77 align 16
78 __penter:
79 ; save volatile register and get the time stamp.
80 push eax
81 push edx
82 rdtsc
83 pushfd
84 push ecx
86 ; setting up the enter call frame (cdecl).
87 sub esp, 4 + 4 + 8
88 mov [esp + 0ch], edx ; Param 3 - the timestamp
89 mov [esp + 08h], eax
90 lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us)
91 mov [esp + 04h], edx
92 mov eax, [esp + 20h] ; Param 1 - The function address
93 sub eax, 5 ; call instruction
94 mov [esp], eax
96 call KPRF_ENTER
97 jmp common_return_path
101 ; On x86 the call to this function has been observed to be put right before
102 ; return instruction. This fact matters since since we have to calc the same
103 ; stack address as in _penter.
105 ; Thus the stack layout is as follows:
106 ; 24 return address of the calling function.
107 ; 20 our return address - the address of the calling function + 5.
108 ; 1c eax
109 ; 18 edx
110 ; 14 eflags
111 ; 10 ecx
112 ; c tsc high - param 3
113 ; 8 tsc low
114 ; 4 frame pointer - param 2
115 ; 0 function ptr - param 1
118 align 16
119 __pexit:
120 ; save volatile register and get the time stamp.
121 push eax
122 push edx
123 rdtsc
124 pushfd
125 push ecx
127 ; setting up the leave call frame (cdecl).
128 sub esp, 4 + 4 + 8
129 mov [esp + 0ch], edx ; Param 3 - the timestamp
130 mov [esp + 08h], eax
131 lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us)
132 mov [esp + 04h], edx
133 mov eax, [esp + 20h] ; Param 1 - Some address in the function.
134 sub eax, 5 ; call instruction
135 mov [esp], eax
137 call KPRF_LEAVE
138 jmp common_return_path
142 ; This is the common return path for both the enter and exit hooks.
143 ; It's kept common because we can then use the same overhead adjustment
144 ; and save some calibration efforts. It also saves space :-)
145 align 16
146 common_return_path:
147 ; Update overhead
148 test eax, eax
149 jz common_no_overhead
150 cmp byte [g_fCalibrated], 0
151 jnz common_overhead
152 call calibrate
153 common_overhead:
154 mov ecx, eax ; ecx <- pointer to overhead counter.
155 mov eax, [g_OverheadAdj] ; apply the adjustment before reading tsc
156 sub [esp + 08h], eax
157 sbb dword [esp + 0ch], 0
159 rdtsc
160 sub eax, [esp + 08h]
161 sbb edx, [esp + 0ch]
162 add [ecx], eax
163 adc [ecx + 4], edx
164 common_no_overhead:
165 add esp, 4 + 4 + 8
167 ; restore volatile registers.
168 pop ecx
169 popfd
170 pop edx
171 pop eax
175 ; Data esi points to while we're calibrating.
176 struc CALIBDATA
177 .OverheadLo resd 1
178 .OverheadHi resd 1
179 .ProfiledLo resd 1
180 .ProfiledHi resd 1
181 .EnterTSLo resd 1
182 .EnterTSHi resd 1
183 .MinLo resd 1
184 .MinHi resd 1
185 endstruc
189 align 16
191 ; Do necessary calibrations.
193 calibrate:
194 ; prolog
195 push ebp
196 mov ebp, esp
197 pushfd
198 pushad
199 sub esp, CALIBDATA_size
200 mov esi, esp ; esi points to the CALIBDATA
203 ; Indicate that we have finished calibrating.
205 mov eax, 1
206 xchg dword [g_fCalibrated], eax
209 ; The outer loop - find the right adjustment.
211 mov ebx, 200h ; loop counter.
212 calib_outer_loop:
215 ; The inner loop - calls the function number of times to establish a
216 ; good minimum value
218 mov ecx, 200h
219 mov dword [esi + CALIBDATA.MinLo], 0ffffffffh
220 mov dword [esi + CALIBDATA.MinHi], 07fffffffh
221 calib_inner_loop:
223 ; zero the overhead and profiled times.
224 xor eax, eax
225 mov [esi + CALIBDATA.OverheadLo], eax
226 mov [esi + CALIBDATA.OverheadHi], eax
227 mov [esi + CALIBDATA.ProfiledLo], eax
228 mov [esi + CALIBDATA.ProfiledHi], eax
229 call calib_nullproc
231 ; subtract the overhead
232 mov eax, [esi + CALIBDATA.ProfiledLo]
233 mov edx, [esi + CALIBDATA.ProfiledHi]
234 sub eax, [esi + CALIBDATA.OverheadLo]
235 sbb edx, [esi + CALIBDATA.OverheadHi]
237 ; update the minimum value.
238 test edx, 080000000h
239 jnz near calib_outer_dec ; if negative, just simplify and shortcut
240 cmp edx, [esi + CALIBDATA.MinHi]
241 jg calib_inner_next
242 jl calib_inner_update_minimum
243 cmp eax, [esi + CALIBDATA.MinLo]
244 jge calib_inner_next
245 calib_inner_update_minimum:
246 mov [esi + CALIBDATA.MinLo], eax
247 mov [esi + CALIBDATA.MinHi], edx
248 calib_inner_next:
249 loop calib_inner_loop
251 ; Is the minimum value acceptable?
252 test dword [esi + CALIBDATA.MinHi], 80000000h
253 jnz calib_outer_dec ; simplify if negative.
254 cmp dword [esi + CALIBDATA.MinHi], 0
255 jnz calib_outer_inc ; this shouldn't be possible
256 cmp dword [esi + CALIBDATA.MinLo], 1fh
257 jbe calib_outer_dec ; too low - 2 ticks per pair is the minimum!
258 cmp dword [esi + CALIBDATA.MinLo], 30h
259 jbe calib_done ; this is fine!
260 calib_outer_inc:
261 inc dword [g_OverheadAdj]
262 jmp calib_outer_next
263 calib_outer_dec:
264 cmp dword [g_OverheadAdj], 1
265 je calib_done
266 dec dword [g_OverheadAdj]
267 calib_outer_next:
268 dec ebx
269 jnz calib_outer_loop
270 calib_done:
272 ; epilog
273 add esp, CALIBDATA_size
274 popad
275 popfd
276 leave
283 ; The calibration __penter - this must be identical to the real thing except for the KPRF call.
284 align 16
285 calib_penter:
286 ; This part must be identical
287 push eax
288 push edx
289 rdtsc
290 pushfd
291 push ecx
293 ; store the entry
294 mov [esi + CALIBDATA.EnterTSLo], eax
295 mov [esi + CALIBDATA.EnterTSHi], edx
297 ; create the call frame
298 push edx
299 push eax
300 push 0
301 push 0
303 lea eax, [esi + CALIBDATA.OverheadLo]
304 jmp common_overhead
308 ; The calibration __pexit - this must be identical to the real thing except for the KPRF call.
309 align 16
310 calib_pexit:
311 ; This part must be identical
312 push eax
313 push edx
314 rdtsc
315 pushfd
316 push ecx
318 ; update the time
319 push eax
320 push edx
321 sub eax, [esi + CALIBDATA.EnterTSLo]
322 sbb edx, [esi + CALIBDATA.EnterTSHi]
323 add [esi + CALIBDATA.ProfiledLo], eax
324 adc [esi + CALIBDATA.ProfiledHi], edx
325 pop edx
326 pop eax
328 ; create the call frame
329 push edx
330 push eax
331 push 0
332 push 0
334 lea eax, [esi + CALIBDATA.EnterTSLo]
335 jmp common_overhead
339 ; The 'function' we're profiling.
340 ; The general idea is that each pair should take something like 2-10 ticks.
342 ; (Btw. If we don't use multiple pairs here, we end up with the wrong result.)
343 align 16
344 calib_nullproc:
345 call calib_penter ;0
346 call calib_pexit
348 call calib_penter ;1
349 call calib_pexit
351 call calib_penter ;2
352 call calib_pexit
354 call calib_penter ;3
355 call calib_pexit
357 call calib_penter ;4
358 call calib_pexit
360 call calib_penter ;5
361 call calib_pexit
363 call calib_penter ;6
364 call calib_pexit
366 call calib_penter ;7
367 call calib_pexit
369 call calib_penter ;8
370 call calib_pexit
372 call calib_penter ;9
373 call calib_pexit
375 call calib_penter ;a
376 call calib_pexit
378 call calib_penter ;b
379 call calib_pexit
381 call calib_penter ;c
382 call calib_pexit
384 call calib_penter ;d
385 call calib_pexit
387 call calib_penter ;e
388 call calib_pexit
390 call calib_penter ;f
391 call calib_pexit