beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / k6 / cross.pl
blobfc921a56b711d01b9927f877290d6dc7b7c63933
1 #! /usr/bin/perl
3 # Copyright 2000, 2001 Free Software Foundation, Inc.
5 # This file is part of the GNU MP Library.
7 # The GNU MP Library is free software; you can redistribute it and/or modify
8 # it under the terms of either:
10 # * the GNU Lesser General Public License as published by the Free
11 # Software Foundation; either version 3 of the License, or (at your
12 # option) any later version.
14 # or
16 # * the GNU General Public License as published by the Free Software
17 # Foundation; either version 2 of the License, or (at your option) any
18 # later version.
20 # or both in parallel, as here.
22 # The GNU MP Library is distributed in the hope that it will be useful, but
23 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 # for more details.
27 # You should have received copies of the GNU General Public License and the
28 # GNU Lesser General Public License along with the GNU MP Library. If not,
29 # see https://www.gnu.org/licenses/.
32 # Usage: cross.pl [filename.o]...
34 # Produce an annotated disassembly of the given object files, indicating
35 # certain code alignment and addressing mode problems afflicting K6 chips.
36 # "ZZ" is used on all annotations, so this can be searched for.
38 # With no arguments, all .o files corresponding to .asm files are processed.
39 # This is good in the mpn object directory of a k6*-*-* build.
41 # Code alignments of 8 bytes or more are handled. When 32 is used, cache
42 # line boundaries will fall in at offsets 0x20,0x40,etc and problems are
43 # flagged at those locations. When 16 is used, the line boundaries can also
44 # fall at offsets 0x10,0x30,0x50,etc, depending where the file is loaded, so
45 # problems are identified there too. Likewise when 8 byte alignment is used
46 # problems are flagged additionally at 0x08,0x18,0x28,etc.
48 # Usually 32 byte alignment is used for k6 routines, but less is certainly
49 # possible if through good luck, or a little tweaking, cache line crossing
50 # problems can be avoided at the extra locations.
52 # Bugs:
54 # Instructions without mod/rm bytes or which are already vector decoded are
55 # unaffected by cache line boundary crossing, but not all of these have yet
56 # been put in as exceptions. All that occur in practice in GMP are present
57 # though.
59 # There's no messages for using the vector decoded addressing mode (%esi),
60 # but that's easy to avoid when coding.
62 # Future:
64 # Warn about jump targets that are poorly aligned (less than 2 instructions
65 # before a cache line boundary).
67 use strict;
69 sub disassemble {
70 my ($file) = @_;
71 my ($addr,$b1,$b2,$b3, $prefix,$opcode,$modrm);
72 my $align;
74 open (IN, "objdump -Srfh $file |")
75 || die "Cannot open pipe from objdump\n";
76 while (<IN>) {
77 print;
79 if (/^[ \t]*[0-9]+[ \t]+\.text[ \t]/ && /2\*\*([0-9]+)$/) {
80 $align = 1 << $1;
81 if ($align < 8) {
82 print "ZZ cross.pl cannot handle alignment < 2**3\n";
83 $align = 8
87 if (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)[ \t]+([0-9a-f]+)[ \t]+([0-9a-f]+)/) {
88 ($addr,$b1,$b2,$b3) = ($1,$2,$3,$4);
90 } elsif (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)[ \t]+([0-9a-f]+)/) {
91 ($addr,$b1,$b2,$b3) = ($1,$2,$3,'');
93 } elsif (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)/) {
94 ($addr,$b1,$b2,$b3) = ($1,$2,'','');
96 } else {
97 next;
100 if ($b1 =~ /0f/) {
101 $prefix = $b1;
102 $opcode = $b2;
103 $modrm = $b3;
104 } else {
105 $prefix = '';
106 $opcode = $b1;
107 $modrm = $b2;
110 # modrm of the form 00-xxx-100 with an 0F prefix is the problem case
111 # for K6 and pre-CXT K6-2
112 if ($prefix =~ /0f/
113 && $opcode !~ /^8/ # jcond disp32
114 && $modrm =~ /^[0-3][4c]/) {
115 print "ZZ ($file) >3 bytes to determine instruction length [K6]\n";
118 # with just an opcode, starting 1f mod 20h
119 if (($align==32 && $addr =~ /[13579bdf]f$/
120 || $align==16 && $addr =~ /f$/
121 || $align==8 && $addr =~ /[7f]$/)
122 && $prefix !~ /0f/
123 && $opcode !~ /1[012345]/ # adc
124 && $opcode !~ /1[89abcd]/ # sbb
125 && $opcode !~ /^4/ # inc/dec reg
126 && $opcode !~ /^5/ # push/pop reg
127 && $opcode !~ /68/ # push $imm32
128 && $opcode !~ /^7/ # jcond disp8
129 && $opcode !~ /a[89]/ # test+imm
130 && $opcode !~ /a[a-f]/ # stos/lods/scas
131 && $opcode !~ /b8/ # movl $imm32,%eax
132 && $opcode !~ /d[0123]/ # rcl
133 && $opcode !~ /e[0123]/ # loop/loopz/loopnz/jcxz
134 && $opcode !~ /e8/ # call disp32
135 && $opcode !~ /e[9b]/ # jmp disp32/disp8
136 && $opcode !~ /f[89abcd]/ # clc,stc,cli,sti,cld,std
137 && !($opcode =~ /f[67]/ # grp 1
138 && $modrm =~ /^[2367abef]/) # mul, imul, div, idiv
139 && $modrm !~ /^$/) {
140 print "ZZ ($file) opcode/modrm cross 32-byte boundary\n";
143 # with an 0F prefix, anything starting at 1f mod 20h
144 if (($align==32 && $addr =~ /[13579bdf][f]$/
145 || $align==16 && $addr =~ /f$/
146 || $align==8 && $addr =~ /[7f]$/)
147 && $prefix =~ /0f/
148 && $opcode !~ /af/ # imul
149 && $opcode !~ /a[45]/ # shldl
150 && $opcode !~ /a[cd]/ # shrdl
152 print "ZZ ($file) prefix/opcode cross 32-byte boundary\n";
155 # with an 0F prefix, anything with mod/rm starting at 1e mod 20h
156 if (($align==32 && $addr =~ /[13579bdf][e]$/
157 || $align==16 && $addr =~ /[e]$/
158 || $align==8 && $addr =~ /[6e]$/)
159 && $prefix =~ /0f/
160 && $opcode !~ /^8/ # jcond disp32
161 && $opcode !~ /af/ # imull reg,reg
162 && $opcode !~ /a[45]/ # shldl
163 && $opcode !~ /a[cd]/ # shrdl
164 && $modrm !~ /^$/) {
165 print "ZZ ($file) prefix/opcode/modrm cross 32-byte boundary\n";
168 close IN || die "Error from objdump (or objdump not available)\n";
172 my @files;
173 if ($#ARGV >= 0) {
174 @files = @ARGV;
175 } else {
176 @files = glob "*.asm";
177 map {s/.asm/.o/} @files;
180 foreach (@files) {
181 disassemble($_);