2 Copyright (C) 2010 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 #include "asm-syntax.h"
26 #define CFI_PUSH(REG) \
27 cfi_adjust_cfa_offset (4); \
28 cfi_rel_offset (REG, 0)
30 #define CFI_POP(REG) \
31 cfi_adjust_cfa_offset (-4); \
34 #define PUSH(REG) pushl REG; CFI_PUSH (REG)
35 #define POP(REG) popl REG; CFI_POP (REG)
37 #ifndef USE_AS_STRNCMP
39 # define STRCMP __strcmp_ssse3
43 # define RETURN ret; .p2align 4
44 # define UPDATE_STRNCMP_COUNTER
47 # define STRCMP __strncmp_ssse3
52 # define RETURN POP (%ebp); ret; .p2align 4; CFI_PUSH (%ebp)
53 # define UPDATE_STRNCMP_COUNTER \
54 /* calculate left number to compare */ \
58 jbe L(more8byteseq); \
62 .section .text.ssse3,"ax",@progbits
72 jb L(less16bytes_sncmp)
136 movhpd 8(%eax), %xmm1
137 movhpd 8(%edx), %xmm2
144 #ifdef USE_AS_STRNCMP
157 #ifdef USE_AS_STRNCMP
211 * The following cases will be handled by ashr_0
212 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
213 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
221 pcmpeqb (%edx), %xmm1
229 UPDATE_STRNCMP_COUNTER
235 movdqa (%eax, %ecx), %xmm1
236 movdqa (%edx, %ecx), %xmm2
244 #ifdef USE_AS_STRNCMP
253 * The following cases will be handled by ashr_1
254 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
255 * n(15) n -15 0(15 +(n-15) - n) ashr_1
274 UPDATE_STRNCMP_COUNTER
290 movdqa (%eax, %ecx), %xmm1
291 movdqa (%edx, %ecx), %xmm2
294 palignr $1, %xmm3, %xmm2
302 #ifdef USE_AS_STRNCMP
314 movdqa (%eax, %ecx), %xmm1
315 movdqa (%edx, %ecx), %xmm2
318 palignr $1, %xmm3, %xmm2
327 #ifdef USE_AS_STRNCMP
341 jnz L(ashr_1_exittail)
343 #ifdef USE_AS_STRNCMP
345 jbe L(ashr_1_exittail)
353 movdqa (%eax, %ecx), %xmm1
359 * The following cases will be handled by ashr_2
360 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
361 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
380 UPDATE_STRNCMP_COUNTER
396 movdqa (%eax, %ecx), %xmm1
397 movdqa (%edx, %ecx), %xmm2
400 palignr $2, %xmm3, %xmm2
409 #ifdef USE_AS_STRNCMP
420 movdqa (%eax, %ecx), %xmm1
421 movdqa (%edx, %ecx), %xmm2
424 palignr $2, %xmm3, %xmm2
433 #ifdef USE_AS_STRNCMP
447 jnz L(ashr_2_exittail)
449 #ifdef USE_AS_STRNCMP
451 jbe L(ashr_2_exittail)
460 movdqa (%eax, %ecx), %xmm1
466 * The following cases will be handled by ashr_3
467 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
468 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
487 UPDATE_STRNCMP_COUNTER
503 movdqa (%eax, %ecx), %xmm1
504 movdqa (%edx, %ecx), %xmm2
507 palignr $3, %xmm3, %xmm2
516 #ifdef USE_AS_STRNCMP
527 movdqa (%eax, %ecx), %xmm1
528 movdqa (%edx, %ecx), %xmm2
531 palignr $3, %xmm3, %xmm2
540 #ifdef USE_AS_STRNCMP
554 jnz L(ashr_3_exittail)
556 #ifdef USE_AS_STRNCMP
558 jbe L(ashr_3_exittail)
566 movdqa (%eax, %ecx), %xmm1
572 * The following cases will be handled by ashr_4
573 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
574 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
593 UPDATE_STRNCMP_COUNTER
609 movdqa (%eax, %ecx), %xmm1
610 movdqa (%edx, %ecx), %xmm2
613 palignr $4, %xmm3, %xmm2
622 #ifdef USE_AS_STRNCMP
634 movdqa (%eax, %ecx), %xmm1
635 movdqa (%edx, %ecx), %xmm2
638 palignr $4, %xmm3, %xmm2
647 #ifdef USE_AS_STRNCMP
662 jnz L(ashr_4_exittail)
664 #ifdef USE_AS_STRNCMP
666 jbe L(ashr_4_exittail)
675 movdqa (%eax, %ecx), %xmm1
681 * The following cases will be handled by ashr_5
682 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
683 * n(11~15) n -11 4(15 +(n-11) - n) ashr_5
702 UPDATE_STRNCMP_COUNTER
718 movdqa (%eax, %ecx), %xmm1
719 movdqa (%edx, %ecx), %xmm2
722 palignr $5, %xmm3, %xmm2
731 #ifdef USE_AS_STRNCMP
742 movdqa (%eax, %ecx), %xmm1
743 movdqa (%edx, %ecx), %xmm2
746 palignr $5, %xmm3, %xmm2
755 #ifdef USE_AS_STRNCMP
769 jnz L(ashr_5_exittail)
771 #ifdef USE_AS_STRNCMP
773 jbe L(ashr_5_exittail)
781 movdqa (%eax, %ecx), %xmm1
787 * The following cases will be handled by ashr_6
788 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
789 * n(10~15) n -10 5(15 +(n-10) - n) ashr_6
809 UPDATE_STRNCMP_COUNTER
825 movdqa (%eax, %ecx), %xmm1
826 movdqa (%edx, %ecx), %xmm2
829 palignr $6, %xmm3, %xmm2
838 #ifdef USE_AS_STRNCMP
850 movdqa (%eax, %ecx), %xmm1
851 movdqa (%edx, %ecx), %xmm2
854 palignr $6, %xmm3, %xmm2
862 #ifdef USE_AS_STRNCMP
877 jnz L(ashr_6_exittail)
879 #ifdef USE_AS_STRNCMP
881 jbe L(ashr_6_exittail)
889 movdqa (%eax, %ecx), %xmm1
895 * The following cases will be handled by ashr_7
896 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
897 * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7
917 UPDATE_STRNCMP_COUNTER
933 movdqa (%eax, %ecx), %xmm1
934 movdqa (%edx, %ecx), %xmm2
937 palignr $7, %xmm3, %xmm2
946 #ifdef USE_AS_STRNCMP
958 movdqa (%eax, %ecx), %xmm1
959 movdqa (%edx, %ecx), %xmm2
962 palignr $7, %xmm3, %xmm2
971 #ifdef USE_AS_STRNCMP
986 jnz L(ashr_7_exittail)
988 #ifdef USE_AS_STRNCMP
990 jbe L(ashr_7_exittail)
999 movdqa (%eax, %ecx), %xmm1
1005 * The following cases will be handled by ashr_8
1006 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1007 * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8
1013 movdqa (%edx), %xmm2
1014 movdqa (%eax), %xmm1
1015 pcmpeqb %xmm1, %xmm0
1017 pcmpeqb %xmm1, %xmm2
1019 pmovmskb %xmm2, %edi
1026 UPDATE_STRNCMP_COUNTER
1028 movdqa (%edx), %xmm3
1042 movdqa (%eax, %ecx), %xmm1
1043 movdqa (%edx, %ecx), %xmm2
1046 palignr $8, %xmm3, %xmm2
1048 pcmpeqb %xmm1, %xmm0
1049 pcmpeqb %xmm2, %xmm1
1051 pmovmskb %xmm1, %esi
1055 #ifdef USE_AS_STRNCMP
1066 movdqa (%eax, %ecx), %xmm1
1067 movdqa (%edx, %ecx), %xmm2
1070 palignr $8, %xmm3, %xmm2
1072 pcmpeqb %xmm1, %xmm0
1073 pcmpeqb %xmm2, %xmm1
1075 pmovmskb %xmm1, %esi
1079 #ifdef USE_AS_STRNCMP
1090 pcmpeqb %xmm3, %xmm0
1091 pmovmskb %xmm0, %esi
1093 jnz L(ashr_8_exittail)
1095 #ifdef USE_AS_STRNCMP
1097 jbe L(ashr_8_exittail)
1102 jmp L(gobble_ashr_8)
1106 movdqa (%eax, %ecx), %xmm1
1112 * The following cases will be handled by ashr_9
1113 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1114 * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9
1120 movdqa (%edx), %xmm2
1121 movdqa (%eax), %xmm1
1122 pcmpeqb %xmm1, %xmm0
1124 pcmpeqb %xmm1, %xmm2
1126 pmovmskb %xmm2, %edi
1133 UPDATE_STRNCMP_COUNTER
1135 movdqa (%edx), %xmm3
1149 movdqa (%eax, %ecx), %xmm1
1150 movdqa (%edx, %ecx), %xmm2
1153 palignr $9, %xmm3, %xmm2
1155 pcmpeqb %xmm1, %xmm0
1156 pcmpeqb %xmm2, %xmm1
1158 pmovmskb %xmm1, %esi
1162 #ifdef USE_AS_STRNCMP
1173 movdqa (%eax, %ecx), %xmm1
1174 movdqa (%edx, %ecx), %xmm2
1177 palignr $9, %xmm3, %xmm2
1179 pcmpeqb %xmm1, %xmm0
1180 pcmpeqb %xmm2, %xmm1
1182 pmovmskb %xmm1, %esi
1186 #ifdef USE_AS_STRNCMP
1197 pcmpeqb %xmm3, %xmm0
1198 pmovmskb %xmm0, %esi
1200 jnz L(ashr_9_exittail)
1202 #ifdef USE_AS_STRNCMP
1204 jbe L(ashr_9_exittail)
1208 jmp L(gobble_ashr_9)
1212 movdqa (%eax, %ecx), %xmm1
1218 * The following cases will be handled by ashr_10
1219 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1220 * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10
1226 movdqa (%edx), %xmm2
1227 movdqa (%eax), %xmm1
1228 pcmpeqb %xmm1, %xmm0
1230 pcmpeqb %xmm1, %xmm2
1232 pmovmskb %xmm2, %edi
1239 UPDATE_STRNCMP_COUNTER
1241 movdqa (%edx), %xmm3
1252 jg L(nibble_ashr_10)
1255 movdqa (%eax, %ecx), %xmm1
1256 movdqa (%edx, %ecx), %xmm2
1259 palignr $10, %xmm3, %xmm2
1261 pcmpeqb %xmm1, %xmm0
1262 pcmpeqb %xmm2, %xmm1
1264 pmovmskb %xmm1, %esi
1268 #ifdef USE_AS_STRNCMP
1277 jg L(nibble_ashr_10)
1279 movdqa (%eax, %ecx), %xmm1
1280 movdqa (%edx, %ecx), %xmm2
1283 palignr $10, %xmm3, %xmm2
1285 pcmpeqb %xmm1, %xmm0
1286 pcmpeqb %xmm2, %xmm1
1288 pmovmskb %xmm1, %esi
1292 #ifdef USE_AS_STRNCMP
1303 pcmpeqb %xmm3, %xmm0
1304 pmovmskb %xmm0, %esi
1306 jnz L(ashr_10_exittail)
1308 #ifdef USE_AS_STRNCMP
1310 jbe L(ashr_10_exittail)
1314 jmp L(gobble_ashr_10)
1317 L(ashr_10_exittail):
1318 movdqa (%eax, %ecx), %xmm1
1324 * The following cases will be handled by ashr_11
1325 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1326 * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11
1332 movdqa (%edx), %xmm2
1333 movdqa (%eax), %xmm1
1334 pcmpeqb %xmm1, %xmm0
1336 pcmpeqb %xmm1, %xmm2
1338 pmovmskb %xmm2, %edi
1345 UPDATE_STRNCMP_COUNTER
1347 movdqa (%edx), %xmm3
1358 jg L(nibble_ashr_11)
1361 movdqa (%eax, %ecx), %xmm1
1362 movdqa (%edx, %ecx), %xmm2
1365 palignr $11, %xmm3, %xmm2
1367 pcmpeqb %xmm1, %xmm0
1368 pcmpeqb %xmm2, %xmm1
1370 pmovmskb %xmm1, %esi
1374 #ifdef USE_AS_STRNCMP
1383 jg L(nibble_ashr_11)
1385 movdqa (%eax, %ecx), %xmm1
1386 movdqa (%edx, %ecx), %xmm2
1389 palignr $11, %xmm3, %xmm2
1391 pcmpeqb %xmm1, %xmm0
1392 pcmpeqb %xmm2, %xmm1
1394 pmovmskb %xmm1, %esi
1398 #ifdef USE_AS_STRNCMP
1409 pcmpeqb %xmm3, %xmm0
1410 pmovmskb %xmm0, %esi
1412 jnz L(ashr_11_exittail)
1414 #ifdef USE_AS_STRNCMP
1416 jbe L(ashr_11_exittail)
1420 jmp L(gobble_ashr_11)
1423 L(ashr_11_exittail):
1424 movdqa (%eax, %ecx), %xmm1
1430 * The following cases will be handled by ashr_12
1431 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1432 * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12
1438 movdqa (%edx), %xmm2
1439 movdqa (%eax), %xmm1
1440 pcmpeqb %xmm1, %xmm0
1442 pcmpeqb %xmm1, %xmm2
1444 pmovmskb %xmm2, %edi
1451 UPDATE_STRNCMP_COUNTER
1453 movdqa (%edx), %xmm3
1464 jg L(nibble_ashr_12)
1467 movdqa (%eax, %ecx), %xmm1
1468 movdqa (%edx, %ecx), %xmm2
1471 palignr $12, %xmm3, %xmm2
1473 pcmpeqb %xmm1, %xmm0
1474 pcmpeqb %xmm2, %xmm1
1476 pmovmskb %xmm1, %esi
1480 #ifdef USE_AS_STRNCMP
1490 jg L(nibble_ashr_12)
1492 movdqa (%eax, %ecx), %xmm1
1493 movdqa (%edx, %ecx), %xmm2
1496 palignr $12, %xmm3, %xmm2
1498 pcmpeqb %xmm1, %xmm0
1499 pcmpeqb %xmm2, %xmm1
1501 pmovmskb %xmm1, %esi
1505 #ifdef USE_AS_STRNCMP
1516 pcmpeqb %xmm3, %xmm0
1517 pmovmskb %xmm0, %esi
1519 jnz L(ashr_12_exittail)
1521 #ifdef USE_AS_STRNCMP
1523 jbe L(ashr_12_exittail)
1527 jmp L(gobble_ashr_12)
1530 L(ashr_12_exittail):
1531 movdqa (%eax, %ecx), %xmm1
1537 * The following cases will be handled by ashr_13
1538 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1539 * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13
1545 movdqa (%edx), %xmm2
1546 movdqa (%eax), %xmm1
1547 pcmpeqb %xmm1, %xmm0
1549 pcmpeqb %xmm1, %xmm2
1551 pmovmskb %xmm2, %edi
1558 UPDATE_STRNCMP_COUNTER
1560 movdqa (%edx), %xmm3
1571 jg L(nibble_ashr_13)
1574 movdqa (%eax, %ecx), %xmm1
1575 movdqa (%edx, %ecx), %xmm2
1578 palignr $13, %xmm3, %xmm2
1580 pcmpeqb %xmm1, %xmm0
1581 pcmpeqb %xmm2, %xmm1
1583 pmovmskb %xmm1, %esi
1587 #ifdef USE_AS_STRNCMP
1596 jg L(nibble_ashr_13)
1598 movdqa (%eax, %ecx), %xmm1
1599 movdqa (%edx, %ecx), %xmm2
1602 palignr $13, %xmm3, %xmm2
1604 pcmpeqb %xmm1, %xmm0
1605 pcmpeqb %xmm2, %xmm1
1607 pmovmskb %xmm1, %esi
1611 #ifdef USE_AS_STRNCMP
1622 pcmpeqb %xmm3, %xmm0
1623 pmovmskb %xmm0, %esi
1625 jnz L(ashr_13_exittail)
1627 #ifdef USE_AS_STRNCMP
1629 jbe L(ashr_13_exittail)
1633 jmp L(gobble_ashr_13)
1636 L(ashr_13_exittail):
1637 movdqa (%eax, %ecx), %xmm1
1643 * The following cases will be handled by ashr_14
1644 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1645 * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14
1651 movdqa (%edx), %xmm2
1652 movdqa (%eax), %xmm1
1653 pcmpeqb %xmm1, %xmm0
1655 pcmpeqb %xmm1, %xmm2
1657 pmovmskb %xmm2, %edi
1664 UPDATE_STRNCMP_COUNTER
1666 movdqa (%edx), %xmm3
1677 jg L(nibble_ashr_14)
1680 movdqa (%eax, %ecx), %xmm1
1681 movdqa (%edx, %ecx), %xmm2
1684 palignr $14, %xmm3, %xmm2
1686 pcmpeqb %xmm1, %xmm0
1687 pcmpeqb %xmm2, %xmm1
1689 pmovmskb %xmm1, %esi
1693 #ifdef USE_AS_STRNCMP
1702 jg L(nibble_ashr_14)
1704 movdqa (%eax, %ecx), %xmm1
1705 movdqa (%edx, %ecx), %xmm2
1708 palignr $14, %xmm3, %xmm2
1710 pcmpeqb %xmm1, %xmm0
1711 pcmpeqb %xmm2, %xmm1
1713 pmovmskb %xmm1, %esi
1717 #ifdef USE_AS_STRNCMP
1728 pcmpeqb %xmm3, %xmm0
1729 pmovmskb %xmm0, %esi
1731 jnz L(ashr_14_exittail)
1733 #ifdef USE_AS_STRNCMP
1735 jbe L(ashr_14_exittail)
1739 jmp L(gobble_ashr_14)
1742 L(ashr_14_exittail):
1743 movdqa (%eax, %ecx), %xmm1
1749 * The following cases will be handled by ashr_14
1750 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1751 * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15
1758 movdqa (%edx), %xmm2
1759 movdqa (%eax), %xmm1
1760 pcmpeqb %xmm1, %xmm0
1762 pcmpeqb %xmm1, %xmm2
1764 pmovmskb %xmm2, %edi
1771 UPDATE_STRNCMP_COUNTER
1773 movdqa (%edx), %xmm3
1784 jg L(nibble_ashr_15)
1787 movdqa (%eax, %ecx), %xmm1
1788 movdqa (%edx, %ecx), %xmm2
1791 palignr $15, %xmm3, %xmm2
1793 pcmpeqb %xmm1, %xmm0
1794 pcmpeqb %xmm2, %xmm1
1796 pmovmskb %xmm1, %esi
1800 #ifdef USE_AS_STRNCMP
1809 jg L(nibble_ashr_15)
1811 movdqa (%eax, %ecx), %xmm1
1812 movdqa (%edx, %ecx), %xmm2
1815 palignr $15, %xmm3, %xmm2
1817 pcmpeqb %xmm1, %xmm0
1818 pcmpeqb %xmm2, %xmm1
1820 pmovmskb %xmm1, %esi
1824 #ifdef USE_AS_STRNCMP
1835 pcmpeqb %xmm3, %xmm0
1836 pmovmskb %xmm0, %esi
1838 jnz L(ashr_15_exittail)
1840 #ifdef USE_AS_STRNCMP
1842 jbe L(ashr_15_exittail)
1846 jmp L(gobble_ashr_15)
1849 L(ashr_15_exittail):
1850 movdqa (%eax, %ecx), %xmm1
1857 pcmpeqb %xmm3, %xmm1
1859 pmovmskb %xmm1, %esi
1864 lea -16(%edi, %ecx), %edi
1902 #ifdef USE_AS_STRNCMP
1914 #ifdef USE_AS_STRNCMP
1925 #ifdef USE_AS_STRNCMP
1936 #ifdef USE_AS_STRNCMP
1947 #ifdef USE_AS_STRNCMP
1958 #ifdef USE_AS_STRNCMP
1969 #ifdef USE_AS_STRNCMP
1980 #ifdef USE_AS_STRNCMP
1993 #ifdef USE_AS_STRNCMP
2020 #ifdef USE_AS_STRNCMP
2035 #ifdef USE_AS_STRNCMP
2040 #ifdef USE_AS_STRNCMP
2051 #ifdef USE_AS_STRNCMP
2057 #ifdef USE_AS_STRNCMP
2060 L(less16bytes_sncmp):
2073 movzbl 1(%eax), %ecx
2082 movzbl 2(%eax), %ecx
2091 movzbl 3(%eax), %ecx
2100 movzbl 4(%eax), %ecx
2109 movzbl 5(%eax), %ecx
2118 movzbl 6(%eax), %ecx
2127 movzbl 7(%eax), %ecx
2137 movzbl 8(%eax), %ecx
2146 movzbl 9(%eax), %ecx
2155 movzbl 10(%eax), %ecx
2164 movzbl 11(%eax), %ecx
2174 movzbl 12(%eax), %ecx
2183 movzbl 13(%eax), %ecx
2192 movzbl 14(%eax), %ecx
2201 movzbl 15(%eax), %ecx