Ignore key order for APC bespokes
[hiphop-php.git] / hphp / util / memset-x64-avx2.S
blob2cbf543b46534239539b68e1fc1777df25017822
1 #include "hphp/util/etch-helpers.h"
2 #include "hphp/util/hphp-config.h"
4 #if defined(__x86_64__) && defined(ENABLE_AVX2) && !defined(__APPLE__)
5         .file   "memset-x64-avx2.S"
6         ETCH_SECTION(memset)
7         .globl  ETCH_NAME(memset)
8         .type   ETCH_NAME(memset), @function
9         .p2align 4,,10
10 ETCH_NAME(memset):
11 ETCH_LABEL(ENTRY_MEMSET):
12         CFI(startproc)
13         mov             %rdi, %r8
14         vpxor   %xmm0, %xmm0, %xmm0
15         vmovd   %esi, %xmm1
16         vpshufb %xmm0, %xmm1, %xmm0
17         vmovq   %xmm0, %rsi
18         testb   $63, %dl
19         movq    %rdi, %rcx
20         je              ETCH_LABEL(more_than_63bytes)
21         movl    %edx, %edi
22         andl    $7, %edi
23         leal    -1(%rdi), %eax
24         cmpl    $6, %eax
25         ja              ETCH_LABEL(more_than_7bytes)
26         jmp             *ETCH_LABEL(less_than_7bytes)(,%rax,8)
27 .section        .rodata
28 ETCH_ALIGN8
29 ETCH_ALIGN4
30 ETCH_LABEL(less_than_7bytes):
31         .quad   ETCH_LABEL(1byte_move)
32         .quad   ETCH_LABEL(2byte_move)
33         .quad   ETCH_LABEL(3byte_move)
34         .quad   ETCH_LABEL(4byte_move)
35         .quad   ETCH_LABEL(5byte_move)
36         .quad   ETCH_LABEL(6byte_move)
37         .quad   ETCH_LABEL(7byte_move)
38 ETCH_SECTION(memset)
39 ETCH_LABEL(7byte_move):
40         movb    %sil, 6(%rcx)
41 ETCH_LABEL(6byte_move):
42         movb    %sil, 5(%rcx)
43 ETCH_LABEL(5byte_move):
44         movb    %sil, 4(%rcx)
45 ETCH_LABEL(4byte_move):
46         movb    %sil, 3(%rcx)
47 ETCH_LABEL(3byte_move):
48         movb    %sil, 2(%rcx)
49 ETCH_LABEL(2byte_move):
50         movb    %sil, 1(%rcx)
51 ETCH_LABEL(1byte_move):
52         movb    %sil, (%rcx)
53 ETCH_LABEL(more_than_7bytes):
54         subq    %rdi, %rdx
55         leaq    (%rcx,%rdi), %rcx
56         movq    %rdx, %rax
57         andl    $63, %eax
58         movl    %eax, %edi
59         shrl    $3, %edi
60         decl    %edi
61         cmpl    $6, %edi
62         ja              ETCH_LABEL(more_than_63bytes)
63         jmp             *ETCH_LABEL(less_than_63bytes)(,%rdi,8)
64         .section        .rodata
65 ETCH_ALIGN8
66 ETCH_ALIGN4
67 ETCH_LABEL(less_than_63bytes):
68         .quad   ETCH_LABEL(8byte_move)
69         .quad   ETCH_LABEL(16byte_move)
70         .quad   ETCH_LABEL(24byte_move)
71         .quad   ETCH_LABEL(32byte_move)
72         .quad   ETCH_LABEL(40byte_move)
73         .quad   ETCH_LABEL(48byte_move)
74         .quad   ETCH_LABEL(56byte_move)
75 ETCH_SECTION(memset)
76 ETCH_LABEL(56byte_move):
77         movq    %xmm0, 48(%rcx)
78 ETCH_LABEL(48byte_move):
79         movq    %xmm0, 40(%rcx)
80 ETCH_LABEL(40byte_move):
81         movq    %xmm0, 32(%rcx)
82 ETCH_LABEL(32byte_move):
83         movq    %xmm0, 24(%rcx)
84 ETCH_LABEL(24byte_move):
85         movq    %xmm0, 16(%rcx)
86 ETCH_LABEL(16byte_move):
87         movq    %xmm0, 8(%rcx)
88 ETCH_LABEL(8byte_move):
89         movq    %xmm0, (%rcx)
90         subq    %rax, %rdx
91         leaq    (%rcx,%rax), %rcx
92 ETCH_LABEL(more_than_63bytes):
93         testq   %rdx, %rdx
94         je              ETCH_LABEL(EXIT_MEMSET)
95         movq    %rdx, %rax
96         vinserti128 $1, %xmm0, %ymm0, %ymm0
97         andq    $64, %rax
98         je              ETCH_LABEL(128byte_loop_data_guzzler)
99         vmovdqu %ymm0, (%rcx)
100         vmovdqu %ymm0, 0x20(%rcx)
101         addq    %rax, %rcx
102         subq    %rax, %rdx
103         je              ETCH_LABEL(EXIT_MEMSET)
104         
105 ETCH_ALIGN8
106 ETCH_ALIGN4
107 ETCH_LABEL(128byte_loop_data_guzzler):
108         vmovdqu         %ymm0, (%rcx)
109         vmovdqu         %ymm0, 0x20(%rcx)
110         vmovdqu         %ymm0, 0x40(%rcx)
111         vmovdqu         %ymm0, 0x60(%rcx)
112         addq            $128, %rcx
113         subq            $128, %rdx
114         ja                      ETCH_LABEL(128byte_loop_data_guzzler)
115         vzeroupper
116 ETCH_LABEL(EXIT_MEMSET):
117         movq %r8, %rax
118         retq
119         CFI(endproc)
120         .size   memset, .-memset
121 #endif