From a04019c7f4e95d862bce855301e28ba1e7c27dba Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Sun, 3 May 2009 21:42:34 -0700
Subject: [PATCH] Infrastructure support for AMD's new XOP prefix

Handle AMD's XOP prefixes; they use basically the same encoding as VEX
prefixes, so treat them simply as a variant of VEX.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 assemble.c | 21 +++++++++++----------
 disasm.c   | 25 ++++++++++++++++++++++++-
 insns.h    |  2 +-
 insns.pl   | 61 ++++++++++++++++++++++++++++++++++++++-----------------------
 nasm.h     |  4 ++--
 5 files changed, 76 insertions(+), 37 deletions(-)

diff --git a/assemble.c b/assemble.c
index 604cc7c8..b72e5a60 100644
--- a/assemble.c
+++ b/assemble.c
@@ -56,19 +56,20 @@
  *                 is not equal to the truncated and sign-extended 32-bit
  *                 operand; used for 32-bit immediates in 64-bit mode.
  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
- * \260..\263    - this instruction uses VEX rather than REX, with the
+ * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  *		   V field taken from operand 0..3.
- * \270		 - this instruction uses VEX rather than REX, with the
+ * \270		 - this instruction uses VEX/XOP rather than REX, with the
  *		   V field set to 1111b.
  *
- * VEX prefixes are followed by the sequence:
- * \mm\wlp         where mm is the M field; and wlp is:
+ * VEX/XOP prefixes are followed by the sequence:
+ * \tmm\wlp        where mm is the M field; and wlp is:
  *                 00 0ww lpp
  *                 [w0] ww = 0 for W = 0
  *                 [w1] ww = 1 for W = 1
  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  *                 [ww] ww = 3 for W used as REX.W
  *
+ * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
  *
  * \274..\277    - a signed byte immediate operand, from operand 0..3,
  *                 which is to be extended to the operand size.
@@ -936,14 +937,14 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 	case4(0260):
 	    ins->rex |= REX_V;
 	    ins->drexdst = regval(opx);
-	    ins->vex_m = *codes++;
+	    ins->vex_cm = *codes++;
 	    ins->vex_wlp = *codes++;
 	    break;
 
 	case 0270:
 	    ins->rex |= REX_V;
 	    ins->drexdst = 0;
-	    ins->vex_m = *codes++;
+	    ins->vex_cm = *codes++;
 	    ins->vex_wlp = *codes++;
 	    break;
 
@@ -1141,7 +1142,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 	    errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
 	    return -1;
 	}
-	if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
+	if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
 	    length += 3;
 	else
 	    length += 2;
@@ -1536,9 +1537,9 @@ static void gencode(int32_t segment, int64_t offset, int bits,
 	case4(0260):
 	case 0270:
 	    codes += 2;
-	    if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
-		bytes[0] = 0xc4;
-		bytes[1] = ins->vex_m | ((~ins->rex & 7) << 5);
+	    if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
+		bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
+		bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
 		bytes[2] = ((ins->rex & REX_W) << (7-3)) |
 		    ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
 		out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
diff --git a/disasm.c b/disasm.c
index 349a3ff6..4ce6efd8 100644
--- a/disasm.c
+++ b/disasm.c
@@ -49,6 +49,7 @@ struct prefix_info {
     uint8_t wait;		/* WAIT "prefix" present */
     uint8_t lock;		/* Lock prefix present */
     uint8_t vex[3];		/* VEX prefix present */
+    uint8_t vex_c;		/* VEX "class" (VEX, XOP, ...) */
     uint8_t vex_m;		/* VEX.M field */
     uint8_t vex_v;
     uint8_t vex_lp;		/* VEX.LP fields */
@@ -1049,6 +1050,7 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
 		prefix.vex[1] = *data++;
 
 		prefix.rex = REX_V;
+		prefix.vex_c = 0;
 
 		if (prefix.vex[0] == 0xc4) {
 		    prefix.vex[2] = *data++;
@@ -1064,7 +1066,28 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
 		    prefix.vex_lp = prefix.vex[1] & 7;
 		}
 
-		ix = itable_VEX[prefix.vex_m][prefix.vex_lp];
+		ix = itable_VEX[0][prefix.vex_m][prefix.vex_lp];
+	    }
+	    end_prefix = true;
+	    break;
+
+	case 0x8F:
+	    if ((data[1] & 030) != 0 &&
+		(segsize == 64 || (data[1] & 0xc0) == 0xc0)) {
+		prefix.vex[0] = *data++;
+		prefix.vex[1] = *data++;
+		prefix.vex[2] = *data++;
+
+		prefix.rex = REX_V;
+		prefix.vex_c = 1;
+
+		prefix.rex |= (~prefix.vex[1] >> 5) & 7; /* REX_RXB */
+		prefix.rex |= (prefix.vex[2] >> (7-3)) & REX_W;
+		prefix.vex_m = prefix.vex[1] & 0x1f;
+		prefix.vex_v = (~prefix.vex[2] >> 3) & 15;
+		prefix.vex_lp = prefix.vex[2] & 7;
+
+		ix = itable_VEX[1][prefix.vex_m][prefix.vex_lp];
 	    }
 	    end_prefix = true;
 	    break;
diff --git a/insns.h b/insns.h
index 6905e71e..38b5f100 100644
--- a/insns.h
+++ b/insns.h
@@ -32,7 +32,7 @@ struct disasm_index {
 /* Tables for the assembler and disassembler, respectively */
 extern const struct itemplate * const nasm_instructions[];
 extern const struct disasm_index itable[256];
-extern const struct disasm_index * const itable_VEX[32][8];
+extern const struct disasm_index * const itable_VEX[2][32][8];
 
 /* Common table for the byte codes */
 extern const uint8_t nasm_bytecodes[];
diff --git a/insns.pl b/insns.pl
index 70942799..66238644 100755
--- a/insns.pl
+++ b/insns.pl
@@ -14,11 +14,17 @@
 # This should match MAX_OPERANDS from nasm.h
 $MAX_OPERANDS = 5;
 
-# Add VEX prefixes
+# Add VEX/XOP prefixes
+@vex_class = ( 'VEX', 'XOP' );
+$vex_classes = scalar(@vex_class);
 @vexlist = ();
-for ($m = 0; $m < 32; $m++) {
-    for ($lp = 0; $lp < 8; $lp++) {
-	push(@vexlist, sprintf("VEX%02X%01X", $m, $lp));
+%vexmap = ();
+for ($c = 0; $c < $vex_classes; $c++) {
+    $vexmap{"\L$vex_class[$c]"} = $c;
+    for ($m = 0; $m < 32; $m++) {
+	for ($lp = 0; $lp < 8; $lp++) {
+	    push(@vexlist, sprintf("%s%02X%01X", $vex_class[$c], $m, $lp));
+	}
     }
 }
 @disasm_prefixes = (@vexlist, @disasm_prefixes);
@@ -243,20 +249,26 @@ if ( !defined($output) || $output eq 'd' ) {
 	print D "};\n";
     }
 
-    print D "\nconst struct disasm_index * const itable_VEX[32][8] = {\n   ";
-    for ($m = 0; $m < 32; $m++) {
-	print D " {\n";
-	for ($lp = 0; $lp < 8; $lp++) {
-	    $vp = sprintf("VEX%02X%01X", $m, $lp);
-	    if ($is_prefix{$vp}) {
-		printf D "        itable_%s,\n", $vp;
-	    } else {
-		print  D "        NULL,\n";
+    printf D "\nconst struct disasm_index * const itable_VEX[%d][32][8] =\n",
+        $vex_classes;
+    print D "{\n";
+    for ($c = 0; $c < $vex_classes; $c++) {
+	print D "    {\n";
+	for ($m = 0; $m < 32; $m++) {
+	    print D "        {\n";
+	    for ($lp = 0; $lp < 8; $lp++) {
+		$vp = sprintf("%s%02X%01X", $vex_class[$c], $m, $lp);
+		if ($is_prefix{$vp}) {
+		    printf D "            itable_%s,\n", $vp;
+		} else {
+		    print  D "            NULL,\n";
+		}
 	    }
+	    print D "        },\n";
 	}
-	print D "    },";
+	print D "    },\n";
     }
-    print D "\n};\n";
+    print D "};\n";
 
     close D;
 }
@@ -521,10 +533,12 @@ sub startseq($) {
       } elsif ($c0 == 0347) {
 	  return addprefix($prefix, 0xA1, 0xA9);
       } elsif (($c0 & ~3) == 0260 || $c0 == 0270) {
-	  my $m,$wlp,$vxp;
+	  my $c,$m,$wlp;
 	  $m   = shift(@codes);
 	  $wlp = shift(@codes);
-	  $prefix .= sprintf('VEX%02X%01X', $m, $wlp & 7);
+	  $c = ($m >> 6);
+	  $m = $m & 31;
+	  $prefix .= sprintf('%s%02X%01X', $vex_class[$c], $m, $wlp & 7);
       } elsif ($c0 >= 0172 && $c0 <= 174) {
 	  shift(@codes);	# Skip is4 control byte
       } else {
@@ -644,13 +658,14 @@ sub byte_code_compile($) {
 	    push(@codes, 06) if ($oppos{'m'} & 4);
 	    push(@codes, 0200 + (($oppos{'m'} & 3) << 3) + $1);
 	    $prefix_ok = 0;
-	} elsif ($op =~ /^vex(|\..*)$/) {
+	} elsif ($op =~ /^(vex|xop)(|\..*)$/) {
+	    my $c = $vexmap{$1};
 	    my ($m,$w,$l,$p) = (undef,2,undef,0);
 	    my $has_nds = 0;
-	    foreach $oq (split(/\./, $op)) {
-		if ($oq eq 'vex') {
-		    # prefix
-		} elsif ($oq eq '128' || $oq eq 'l0') {
+	    my @subops = split(/\./, $op);
+	    shift @subops;	# Drop prefix
+	    foreach $oq (@subops) {
+		if ($oq eq '128' || $oq eq 'l0') {
 		    $l = 0;
 		} elsif ($oq eq '256' || $oq eq 'l1') {
 		    $l = 1;
@@ -692,7 +707,7 @@ sub byte_code_compile($) {
 		die "$fname: $line: 'v' operand without vex.nds or vex.ndd\n";
 	    }
 	    push(@codes, defined($oppos{'v'}) ? 0260+($oppos{'v'} & 3) : 0270,
-		 $m, ($w << 3)+($l << 2)+$p);
+		 ($c << 6)+$m, ($w << 3)+($l << 2)+$p);
 	    $prefix_ok = 0;
 	} elsif ($op =~ /^\/drex([01])$/) {
 	    my $oc0 = $1;
diff --git a/nasm.h b/nasm.h
index 7add093a..587ace8a 100644
--- a/nasm.h
+++ b/nasm.h
@@ -599,7 +599,7 @@ enum ccode {			/* condition code names */
 #define REX_H		0x80	/* High register present, REX forbidden */
 #define REX_D		0x0100	/* Instruction uses DREX instead of REX */
 #define REX_OC		0x0200	/* DREX suffix has the OC0 bit set */
-#define REX_V		0x0400	/* Instruction uses VEX instead of REX */
+#define REX_V		0x0400	/* Instruction uses VEX/XOP instead of REX */
 
 /*
  * Note that because segment registers may be used as instruction
@@ -702,7 +702,7 @@ typedef struct insn {		/* an instruction itself */
     bool forw_ref;              /* is there a forward reference? */
     int rex;			/* Special REX Prefix */
     int drexdst;		/* Destination register for DREX/VEX suffix */
-    int vex_m;			/* M register for VEX prefix */
+    int vex_cm;			/* Class and M field for VEX prefix */
     int vex_wlp;		/* W, P and L information for VEX prefix */
 } insn;
 
-- 
2.11.4.GIT