1 dnl IA
-64 mpn_addmul_2
-- Multiply a n
-limb number with a
2-limb number
and
2 dnl
add the result to a
(n
+1)-limb number.
4 dnl Contributed to the GNU project by Torbjorn Granlund.
6 dnl Copyright
2004, 2005, 2011 Free Software Foundation
, Inc.
8 dnl
This file is part of the GNU MP Library.
10 dnl The GNU MP Library is free software
; you can redistribute it and/or modify
11 dnl it under the terms of
either:
13 dnl
* the GNU Lesser General
Public License as published by the Free
14 dnl Software Foundation
; either version 3 of the License, or (at your
15 dnl option
) any later version.
19 dnl
* the GNU General
Public License as published by the Free Software
20 dnl Foundation
; either version 2 of the License, or (at your option) any
23 dnl
or both
in parallel
, as here.
25 dnl The GNU MP Library is distributed
in the hope that it will be useful
, but
26 dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
27 dnl
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License
30 dnl You should have received copies of the GNU General
Public License
and the
31 dnl GNU Lesser General
Public License along with the GNU MP Library. If
not,
32 dnl see
https://www.gnu.
org/licenses
/.
34 include(`..
/config.m4
')
41 C * Clean up variable names, and try to decrease the number of distinct
43 C * Clean up feed-in code to not require zeroing several registers.
44 C * Make sure we don't depend on uninitialised predicate registers.
45 C
* Could perhaps save a few cycles by using
1 c
/l carry propagation
in
47 C
* Ultimately rewrite. The problem with
this code is that it first uses a
48 C loaded u value
in one xma pair
, then leaves it live over several unrelated
49 C xma pairs
, before it uses it again. It should actually be quite possible
50 C to just swap some aligned xma pairs around. But we should then schedule
51 C u loads further from the first use.
67 define
(`pr0_0
',`r16') define
(`pr0_1
',`r17')
68 define
(`pr0_2
',`r18') define
(`pr0_3
',`r19')
70 define
(`pr1_0
',`r20') define
(`pr1_1
',`r21')
71 define
(`pr1_2
',`r22') define
(`pr1_3
',`r23')
73 define
(`acc1_0
',`r24') define
(`acc1_1
',`r25')
74 define
(`acc1_2
',`r26') define
(`acc1_3
',`r27')
81 define
(`fp0b_0
',`f8') define
(`fp0b_1
',`f9')
82 define
(`fp0b_2
',`f10') define
(`fp0b_3
',`f11')
84 define
(`fp1a_0
',`f12') define
(`fp1a_1
',`f13')
85 define
(`fp1a_2
',`f14') define
(`fp1a_3
',`f15')
87 define
(`fp1b_0
',`f32') define
(`fp1b_1
',`f33')
88 define
(`fp1b_2
',`f34') define
(`fp1b_3
',`f35')
90 define
(`fp2a_0
',`f36') define
(`fp2a_1
',`f37')
91 define
(`fp2a_2
',`f38') define
(`fp2a_3
',`f39')
93 define
(`r_0
',`f40') define
(`r_1
',`f41')
94 define
(`r_2
',`f42') define
(`r_3
',`f43')
96 define
(`u_0
',`f44') define
(`u_1
',`f45')
97 define
(`u_2
',`f46') define
(`u_3
',`f47')
105 PROLOGUE
(mpn_addmul_2s
)
110 ifdef
(`HAVE_ABI_32
',`
111 {.mmi; addp4 rp = 0, rp C M I
112 addp4 up = 0, up C M I
113 addp4 vp = 0, vp C M I
120 {.mmi; ldf8 ux = [up], 8 C M
121 ldf8 v0 = [vp], 8 C M
123 }{.mmi; ldf8 rx = [rp], 8 C M
127 }{.mmi; ldf8 uy = [up], 8 C M
130 }{.mmi; ldf8 ry = [rp], -8 C M
131 cmp.eq p14, p0 = 1, r14 C M I
132 cmp.eq p11, p0 = 2, r14 C M I
134 }{.mmi; add srp = 16, rp C M I
135 cmp.eq p15, p0 = 3, r14 C M I
137 }{.bbb; (p14) br.dptk L(x01) C B
138 (p11) br.dptk L(x10) C B
139 (p15) br.dptk L(x11) C B
142 L
(x00
): cmp.ne p6
, p0
= r0
, r0 C suppress initial xma pair
145 L
(x01
): cmp.ne p14
, p0
= r0
, r0 C suppress initial xma pair
148 L
(x10
): cmp.ne p11
, p0
= r0
, r0 C suppress initial xma pair
151 L
(x11
): cmp.ne p15
, p0
= r0
, r0 C suppress initial xma pair
157 PROLOGUE
(mpn_addmul_2
)
162 ifdef
(`HAVE_ABI_32
',`
163 {.mmi; addp4 rp = 0, rp C M I
164 addp4 up = 0, up C M I
165 addp4 vp = 0, vp C M I
172 {.mmi; ldf8 ux = [up], 8 C M
173 ldf8 v0 = [vp], 8 C M
175 }{.mmi; ldf8 rx = [rp], 8 C M
179 }{.mmi; ldf8 uy = [up], 8 C M
182 }{.mmi; ldf8 ry = [rp], -8 C M
183 cmp.eq p14, p0 = 1, r14 C M I
184 cmp.eq p11, p0 = 2, r14 C M I
186 }{.mmi; add srp = 16, rp C M I
187 cmp.eq p15, p6 = 3, r14 C M I
189 }{.bbb; (p14) br.dptk L(b01) C B
190 (p11) br.dptk L(b10) C B
191 (p15) br.dptk L(b11) C B
196 {.mmi; ldf8 r_1 = [srp], 8
199 }{.mmi; mov pr1_2 = 0
201 cmp.ne p8, p9 = r0, r0
203 }{.mfi; ldf8 r_2 = [srp], 8
204 xma.l fp0b_3 = ux, v0, rx
205 cmp.ne p12, p13 = r0, r0
206 }{.mfb; ldf8 u_2 = [up], 8
207 xma.hu fp1b_3 = ux, v0, rx
210 xma.l fp0b_0
= uy
, v0
, ry
211 xma.hu fp1a_0
= uy
, v0
, ry
213 getfsig acc0
= fp0b_3
214 (p6
) xma.hu fp2a_3
= ux
, v1
, fp1b_3 C suppressed for addmul_2s
215 (p6
) xma.l fp1b_3
= ux
, v1
, fp1b_3 C suppressed for addmul_2s
217 xma.l fp0b_1
= u_1
, v0
, r_1
218 xma.hu fp1a_1
= u_1
, v0
, r_1
220 getfsig pr0_0
= fp0b_0
221 xma.l fp1b_0
= uy
, v1
, fp1a_0
222 xma.hu fp2a_0
= uy
, v1
, fp1a_0
224 getfsig pr1_3
= fp1b_3
225 getfsig acc1_3
= fp2a_3
226 xma.l fp0b_2
= u_2
, v0
, r_2
227 xma.hu fp1a_2
= u_2
, v0
, r_2
230 L
(gt4
): xma.l fp0b_0
= uy
, v0
, ry
231 xma.hu fp1a_0
= uy
, v0
, ry
234 getfsig acc0
= fp0b_3
235 (p6
) xma.hu fp2a_3
= ux
, v1
, fp1b_3 C suppressed for addmul_2s
237 (p6
) xma.l fp1b_3
= ux
, v1
, fp1b_3 C suppressed for addmul_2s
239 xma.l fp0b_1
= u_1
, v0
, r_1
240 xma.hu fp1a_1
= u_1
, v0
, r_1
243 getfsig pr0_0
= fp0b_0
244 xma.l fp1b_0
= uy
, v1
, fp1a_0
245 xma.hu fp2a_0
= uy
, v1
, fp1a_0
248 getfsig pr1_3
= fp1b_3
249 xma.l fp0b_2
= u_2
, v0
, r_2
251 getfsig acc1_3
= fp2a_3
252 xma.hu fp1a_2
= u_2
, v0
, r_2
258 {.mmi; ldf8 r_0 = [srp], 8 C M
259 ldf8 u_0 = [up], 8 C M
261 }{.mmi; mov pr1_1 = 0 C M I
263 cmp.ne p6, p7 = r0, r0 C M I
265 }{.mfi; ldf8 r_1 = [srp], 8 C M
266 xma.l fp0b_2 = ux, v0, rx C F
267 cmp.ne p10, p11 = r0, r0 C M I
268 }{.mfi; ldf8 u_1 = [up], 8 C M
269 xma.hu fp1b_2 = ux, v0, rx C F
272 } xma.l fp0b_3
= uy
, v0
, ry C F
273 xma.hu fp1a_3
= uy
, v0
, ry C F
275 {.mmf; getfsig acc0 = fp0b_2 C M
276 ldf8 r_2 = [srp], 8 C M
277 (p14) xma.hu fp2a_2 = ux, v1,fp1b_2 C F suppressed for addmul_2s
278 }{.mfb; ldf8 u_2 = [up], 8 C M
279 (p14) xma.l fp1b_2 = ux, v1,fp1b_2 C F suppressed for addmul_2s
282 xma.l fp0b_0
= u_0
, v0
, r_0 C F
283 xma.hu fp1a_0
= u_0
, v0
, r_0 C F
285 getfsig pr0_3
= fp0b_3 C M
286 xma.l fp1b_3
= uy
, v1
,fp1a_3 C F
287 xma.hu fp2a_3
= uy
, v1
,fp1a_3 C F
289 getfsig pr1_2
= fp1b_2 C M
290 getfsig acc1_2
= fp2a_2 C M
291 xma.l fp0b_1
= u_1
, v0
, r_1 C F
292 xma.hu fp1a_1
= u_1
, v0
, r_1 C F
295 L
(gt5
): xma.l fp0b_0
= u_0
, v0
, r_0
296 xma.hu fp1a_0
= u_0
, v0
, r_0
298 getfsig pr0_3
= fp0b_3
300 xma.l fp1b_3
= uy
, v1
, fp1a_3
301 xma.hu fp2a_3
= uy
, v1
, fp1a_3
304 getfsig pr1_2
= fp1b_2
305 xma.l fp0b_1
= u_1
, v0
, r_1
307 getfsig acc1_2
= fp2a_2
308 xma.hu fp1a_1
= u_1
, v0
, r_1
313 L
(b10
): br.cloop.dptk L
(gt2
)
314 xma.l fp0b_1
= ux
, v0
, rx
315 xma.hu fp1b_1
= ux
, v0
, rx
317 xma.l fp0b_2
= uy
, v0
, ry
318 xma.hu fp1a_2
= uy
, v0
, ry
320 stf8
[rp
] = fp0b_1
, 8
321 (p11
) xma.hu fp2a_1
= ux
, v1
, fp1b_1 C suppressed for addmul_2s
322 (p11
) xma.l fp1b_1
= ux
, v1
, fp1b_1 C suppressed for addmul_2s
324 getfsig acc0
= fp0b_2
325 xma.l fp1b_2
= uy
, v1
, fp1a_2
326 xma.hu fp2a_2
= uy
, v1
, fp1a_2
328 getfsig pr1_1
= fp1b_1
329 getfsig acc1_1
= fp2a_1
331 getfsig pr1_2
= fp1b_2
337 cmp.ltu p8
, p9
= s0
, pr1_1
340 .pred.rel
"mutex", p8
, p9
341 (p8
) add acc0
= pr1_2
, acc1_1
, 1
342 (p9
) add acc0
= pr1_2
, acc1_1
343 (p8
) cmp.leu p10
, p0
= r31
, pr1_2
344 (p9
) cmp.ltu p10
, p0
= r31
, pr1_2
352 {.mmi; ldf8 r_3 = [srp], 8
356 }{.mfi; ldf8 r_0 = [srp], 8
357 xma.l fp0b_1 = ux, v0, rx
359 }{.mfi; ldf8 u_0 = [up], 8
360 xma.hu fp1b_1 = ux, v0, rx
363 } xma.l fp0b_2
= uy
, v0
, ry
364 xma.hu fp1a_2
= uy
, v0
, ry
366 getfsig acc0
= fp0b_1
368 (p11
) xma.hu fp2a_1
= ux
, v1
, fp1b_1 C suppressed for addmul_2s
369 (p11
) xma.l fp1b_1
= ux
, v1
, fp1b_1 C suppressed for addmul_2s
372 xma.l fp0b_3
= u_3
, v0
, r_3
373 xma.hu fp1a_3
= u_3
, v0
, r_3
375 getfsig pr0_2
= fp0b_2
377 xma.l fp1b_2
= uy
, v1
, fp1a_2
378 xma.hu fp2a_2
= uy
, v1
, fp1a_2
381 getfsig pr1_1
= fp1b_1
383 {.mfi; getfsig acc1_1 = fp2a_1
384 xma.l fp0b_0 = u_0, v0, r_0
385 cmp.ne p8, p9 = r0, r0
386 }{.mfb; cmp.ne p12, p13 = r0, r0
387 xma.hu fp1a_0 = u_0, v0, r_0
388 br.cloop.sptk.clr L(top)
394 L
(b11
): ldf8 r_2
= [srp
], 8
402 cmp.ne p6
, p7
= r0
, r0
403 xma.l fp0b_0
= ux
, v0
, rx
404 xma.hu fp1b_0
= ux
, v0
, rx
406 cmp.ne p10
, p11
= r0
, r0
407 xma.l fp0b_1
= uy
, v0
, ry
408 xma.hu fp1a_1
= uy
, v0
, ry
410 getfsig acc0
= fp0b_0
411 (p15
) xma.hu fp2a_0
= ux
, v1
, fp1b_0 C suppressed for addmul_2s
412 (p15
) xma.l fp1b_0
= ux
, v1
, fp1b_0 C suppressed for addmul_2s
414 xma.l fp0b_2
= uy
, v1
, r_2
415 xma.hu fp1a_2
= uy
, v1
, r_2
417 getfsig pr0_1
= fp0b_1
418 xma.l fp1b_1
= u_2
, v0
, fp1a_1
419 xma.hu fp2a_1
= u_2
, v0
, fp1a_1
421 getfsig pr1_0
= fp1b_0
422 getfsig acc1_0
= fp2a_0
425 L
(gt3
): ldf8 r_3
= [srp
], 8
426 xma.l fp0b_0
= ux
, v0
, rx
427 cmp.ne p10
, p11
= r0
, r0
429 xma.hu fp1b_0
= ux
, v0
, rx
430 cmp.ne p6
, p7
= r0
, r0
432 xma.l fp0b_1
= uy
, v0
, ry
433 xma.hu fp1a_1
= uy
, v0
, ry
435 getfsig acc0
= fp0b_0
437 (p15
) xma.hu fp2a_0
= ux
, v1
, fp1b_0 C suppressed for addmul_2s
439 (p15
) xma.l fp1b_0
= ux
, v1
, fp1b_0 C suppressed for addmul_2s
441 xma.l fp0b_2
= u_2
, v0
, r_2
442 xma.hu fp1a_2
= u_2
, v0
, r_2
444 getfsig pr0_1
= fp0b_1
446 xma.l fp1b_1
= uy
, v1
, fp1a_1
447 xma.hu fp2a_1
= uy
, v1
, fp1a_1
450 getfsig pr1_0
= fp1b_0
452 getfsig acc1_0
= fp2a_0
453 xma.l fp0b_3
= u_3
, v0
, r_3
454 xma.hu fp1a_3
= u_3
, v0
, r_3
458 C
*** MAIN
LOOP START
***
461 .pred.rel
"mutex", p12
, p13
462 getfsig pr0_3
= fp0b_3
464 xma.l fp1b_3
= u_3
, v1
, fp1a_3
465 (p12
) add s0
= pr1_0
, acc0
, 1
466 (p13
) add s0
= pr1_0
, acc0
467 xma.hu fp2a_3
= u_3
, v1
, fp1a_3
469 .pred.rel
"mutex", p8
, p9
470 .pred.rel
"mutex", p12
, p13
472 getfsig pr1_2
= fp1b_2
473 (p8
) cmp.leu p6
, p7
= acc0
, pr0_1
474 (p9
) cmp.ltu p6
, p7
= acc0
, pr0_1
475 (p12
) cmp.leu p10
, p11
= s0
, pr1_0
476 (p13
) cmp.ltu p10
, p11
= s0
, pr1_0
478 .pred.rel
"mutex", p6
, p7
479 getfsig acc1_2
= fp2a_2
481 xma.l fp0b_1
= u_1
, v0
, r_1
482 (p6
) add acc0
= pr0_2
, acc1_0
, 1
483 (p7
) add acc0
= pr0_2
, acc1_0
484 xma.hu fp1a_1
= u_1
, v0
, r_1
487 .pred.rel
"mutex", p10
, p11
488 getfsig pr0_0
= fp0b_0
490 xma.l fp1b_0
= u_0
, v1
, fp1a_0
491 (p10
) add s0
= pr1_1
, acc0
, 1
492 (p11
) add s0
= pr1_1
, acc0
493 xma.hu fp2a_0
= u_0
, v1
, fp1a_0
495 .pred.rel
"mutex", p6
, p7
496 .pred.rel
"mutex", p10
, p11
498 getfsig pr1_3
= fp1b_3
499 (p6
) cmp.leu p8
, p9
= acc0
, pr0_2
500 (p7
) cmp.ltu p8
, p9
= acc0
, pr0_2
501 (p10
) cmp.leu p12
, p13
= s0
, pr1_1
502 (p11
) cmp.ltu p12
, p13
= s0
, pr1_1
504 .pred.rel
"mutex", p8
, p9
505 getfsig acc1_3
= fp2a_3
507 xma.l fp0b_2
= u_2
, v0
, r_2
508 (p8
) add acc0
= pr0_3
, acc1_1
, 1
509 (p9
) add acc0
= pr0_3
, acc1_1
510 xma.hu fp1a_2
= u_2
, v0
, r_2
513 .pred.rel
"mutex", p12
, p13
514 getfsig pr0_1
= fp0b_1
516 xma.l fp1b_1
= u_1
, v1
, fp1a_1
517 (p12
) add s0
= pr1_2
, acc0
, 1
518 (p13
) add s0
= pr1_2
, acc0
519 xma.hu fp2a_1
= u_1
, v1
, fp1a_1
521 .pred.rel
"mutex", p8
, p9
522 .pred.rel
"mutex", p12
, p13
524 getfsig pr1_0
= fp1b_0
525 (p8
) cmp.leu p6
, p7
= acc0
, pr0_3
526 (p9
) cmp.ltu p6
, p7
= acc0
, pr0_3
527 (p12
) cmp.leu p10
, p11
= s0
, pr1_2
528 (p13
) cmp.ltu p10
, p11
= s0
, pr1_2
530 .pred.rel
"mutex", p6
, p7
531 getfsig acc1_0
= fp2a_0
533 xma.l fp0b_3
= u_3
, v0
, r_3
534 (p6
) add acc0
= pr0_0
, acc1_2
, 1
535 (p7
) add acc0
= pr0_0
, acc1_2
536 xma.hu fp1a_3
= u_3
, v0
, r_3
539 .pred.rel
"mutex", p10
, p11
540 getfsig pr0_2
= fp0b_2
542 xma.l fp1b_2
= u_2
, v1
, fp1a_2
543 (p10
) add s0
= pr1_3
, acc0
, 1
544 (p11
) add s0
= pr1_3
, acc0
545 xma.hu fp2a_2
= u_2
, v1
, fp1a_2
547 .pred.rel
"mutex", p6
, p7
548 .pred.rel
"mutex", p10
, p11
550 getfsig pr1_1
= fp1b_1
551 (p6
) cmp.leu p8
, p9
= acc0
, pr0_0
552 (p7
) cmp.ltu p8
, p9
= acc0
, pr0_0
553 (p10
) cmp.leu p12
, p13
= s0
, pr1_3
554 (p11
) cmp.ltu p12
, p13
= s0
, pr1_3
556 .pred.rel
"mutex", p8
, p9
557 getfsig acc1_1
= fp2a_1
559 xma.l fp0b_0
= u_0
, v0
, r_0
560 (p8
) add acc0
= pr0_1
, acc1_3
, 1
561 (p9
) add acc0
= pr0_1
, acc1_3
562 xma.hu fp1a_0
= u_0
, v0
, r_0
563 L
(10): br.cloop.sptk.clr L
(top
) C
12
565 C
*** MAIN
LOOP END ***
567 .pred.rel
"mutex", p12
, p13
568 {.mfi; getfsig pr0_3 = fp0b_3
569 xma.l fp1b_3 = u_3, v1, fp1a_3
570 (p12) add s0 = pr1_0, acc0, 1
571 }{.mfi; (p13) add s0 = pr1_0, acc0
572 xma.hu fp2a_3 = u_3, v1, fp1a_3
575 } .pred.rel
"mutex", p8
, p9
576 .pred.rel
"mutex", p12
, p13
577 {.mmi; getfsig pr1_2 = fp1b_2
579 (p8) cmp.leu p6, p7 = acc0, pr0_1
580 }{.mmi; (p9) cmp.ltu p6, p7 = acc0, pr0_1
581 (p12) cmp.leu p10, p11 = s0, pr1_0
582 (p13) cmp.ltu p10, p11 = s0, pr1_0
584 } .pred.rel
"mutex", p6
, p7
585 {.mfi; getfsig acc1_2 = fp2a_2
586 xma.l fp0b_1 = u_1, v0, r_1
588 }{.mmf; (p6) add acc0 = pr0_2, acc1_0, 1
589 (p7) add acc0 = pr0_2, acc1_0
590 xma.hu fp1a_1 = u_1, v0, r_1
594 .pred.rel
"mutex", p10
, p11
595 {.mfi; getfsig pr0_0 = fp0b_0
596 xma.l fp1b_0 = u_0, v1, fp1a_0
597 (p10) add s0 = pr1_1, acc0, 1
598 }{.mfi; (p11) add s0 = pr1_1, acc0
599 xma.hu fp2a_0 = u_0, v1, fp1a_0
602 } .pred.rel
"mutex", p6
, p7
603 .pred.rel
"mutex", p10
, p11
604 {.mmi; getfsig pr1_3 = fp1b_3
606 (p6) cmp.leu p8, p9 = acc0, pr0_2
607 }{.mmi; (p7) cmp.ltu p8, p9 = acc0, pr0_2
608 (p10) cmp.leu p12, p13 = s0, pr1_1
609 (p11) cmp.ltu p12, p13 = s0, pr1_1
611 } .pred.rel
"mutex", p8
, p9
612 {.mfi; getfsig acc1_3 = fp2a_3
613 xma.l fp0b_2 = u_2, v0, r_2
615 }{.mmf; (p8) add acc0 = pr0_3, acc1_1, 1
616 (p9) add acc0 = pr0_3, acc1_1
617 xma.hu fp1a_2 = u_2, v0, r_2
621 .pred.rel
"mutex", p12
, p13
622 {.mfi; getfsig pr0_1 = fp0b_1
623 xma.l fp1b_1 = u_1, v1, fp1a_1
624 (p12) add s0 = pr1_2, acc0, 1
625 }{.mfi; (p13) add s0 = pr1_2, acc0
626 xma.hu fp2a_1 = u_1, v1, fp1a_1
629 } .pred.rel
"mutex", p8
, p9
630 .pred.rel
"mutex", p12
, p13
631 {.mmi; getfsig pr1_0 = fp1b_0
633 (p8) cmp.leu p6, p7 = acc0, pr0_3
634 }{.mmi; (p9) cmp.ltu p6, p7 = acc0, pr0_3
635 (p12) cmp.leu p10, p11 = s0, pr1_2
636 (p13) cmp.ltu p10, p11 = s0, pr1_2
638 } .pred.rel
"mutex", p6
, p7
639 {.mmi; getfsig acc1_0 = fp2a_0
640 (p6) add acc0 = pr0_0, acc1_2, 1
641 (p7) add acc0 = pr0_0, acc1_2
645 .pred.rel
"mutex", p10
, p11
646 {.mfi; getfsig pr0_2 = fp0b_2
647 xma.l fp1b_2 = u_2, v1, fp1a_2
648 (p10) add s0 = pr1_3, acc0, 1
649 }{.mfi; (p11) add s0 = pr1_3, acc0
650 xma.hu fp2a_2 = u_2, v1, fp1a_2
653 } .pred.rel
"mutex", p6
, p7
654 .pred.rel
"mutex", p10
, p11
655 {.mmi; getfsig pr1_1 = fp1b_1
657 (p6) cmp.leu p8, p9 = acc0, pr0_0
658 }{.mmi; (p7) cmp.ltu p8, p9 = acc0, pr0_0
659 (p10) cmp.leu p12, p13 = s0, pr1_3
660 (p11) cmp.ltu p12, p13 = s0, pr1_3
662 } .pred.rel
"mutex", p8
, p9
663 {.mmi; getfsig acc1_1 = fp2a_1
664 (p8) add acc0 = pr0_1, acc1_3, 1
665 (p9) add acc0 = pr0_1, acc1_3
667 } .pred.rel
"mutex", p12
, p13
668 {.mmi; (p12) add s0 = pr1_0, acc0, 1
669 (p13) add s0 = pr1_0, acc0
672 } .pred.rel
"mutex", p8
, p9
673 .pred.rel
"mutex", p12
, p13
674 {.mmi; getfsig pr1_2 = fp1b_2
676 (p8) cmp.leu p6, p7 = acc0, pr0_1
677 }{.mmi; (p9) cmp.ltu p6, p7 = acc0, pr0_1
678 (p12) cmp.leu p10, p11 = s0, pr1_0
679 (p13) cmp.ltu p10, p11 = s0, pr1_0
681 } .pred.rel
"mutex", p6
, p7
682 {.mmi; getfsig r8 = fp2a_2
683 (p6) add acc0 = pr0_2, acc1_0, 1
684 (p7) add acc0 = pr0_2, acc1_0
686 } .pred.rel
"mutex", p10
, p11
687 {.mmi; (p10) add s0 = pr1_1, acc0, 1
688 (p11) add s0 = pr1_1, acc0
689 (p6) cmp.leu p8, p9 = acc0, pr0_2
691 } .pred.rel
"mutex", p10
, p11
692 {.mmi; (p7) cmp.ltu p8, p9 = acc0, pr0_2
693 (p10) cmp.leu p12, p13 = s0, pr1_1
694 (p11) cmp.ltu p12, p13 = s0, pr1_1
696 } .pred.rel
"mutex", p8
, p9
697 {.mmi; st8 [rp] = s0, 8
698 (p8) add acc0 = pr1_2, acc1_1, 1
699 (p9) add acc0 = pr1_2, acc1_1
701 } .pred.rel
"mutex", p8
, p9
702 {.mmi; (p8) cmp.leu p10, p11 = acc0, pr1_2
703 (p9) cmp.ltu p10, p11 = acc0, pr1_2
704 (p12) add acc0 = 1, acc0
706 }{.mmi; st8 [rp] = acc0, 8
707 (p12) cmpeqor p10, p0 = 0, acc0
710 }{.mib; (p10) add r8 = 1, r8