2 Copyright (C) 2010 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 #include "asm-syntax.h"
26 #define CFI_PUSH(REG) \
27 cfi_adjust_cfa_offset (4); \
28 cfi_rel_offset (REG, 0)
30 #define CFI_POP(REG) \
31 cfi_adjust_cfa_offset (-4); \
34 #define PUSH(REG) pushl REG; CFI_PUSH (REG)
35 #define POP(REG) popl REG; CFI_POP (REG)
37 #ifndef USE_AS_STRNCMP
39 # define STRCMP __strcmp_ssse3
43 # define UPDATE_STRNCMP_COUNTER
46 # define STRCMP __strncmp_ssse3
52 # define UPDATE_STRNCMP_COUNTER \
53 /* calculate left number to compare */ \
57 jbe L(more8byteseq); \
61 .section .text.ssse3,"ax",@progbits
71 jb L(less16bytes_sncmp)
125 #ifdef USE_AS_STRNCMP
142 movhpd 8(%eax), %xmm1
143 movhpd 8(%edx), %xmm2
150 #ifdef USE_AS_STRNCMP
214 * The following cases will be handled by ashr_0
215 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
216 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
224 pcmpeqb (%edx), %xmm1
232 UPDATE_STRNCMP_COUNTER
238 movdqa (%eax, %ecx), %xmm1
239 movdqa (%edx, %ecx), %xmm2
247 #ifdef USE_AS_STRNCMP
256 * The following cases will be handled by ashr_1
257 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
258 * n(15) n -15 0(15 +(n-15) - n) ashr_1
277 UPDATE_STRNCMP_COUNTER
293 movdqa (%eax, %ecx), %xmm1
294 movdqa (%edx, %ecx), %xmm2
297 palignr $1, %xmm3, %xmm2
305 #ifdef USE_AS_STRNCMP
317 movdqa (%eax, %ecx), %xmm1
318 movdqa (%edx, %ecx), %xmm2
321 palignr $1, %xmm3, %xmm2
330 #ifdef USE_AS_STRNCMP
344 jnz L(ashr_1_exittail)
346 #ifdef USE_AS_STRNCMP
348 jbe L(ashr_1_exittail)
356 movdqa (%eax, %ecx), %xmm1
362 * The following cases will be handled by ashr_2
363 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
364 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
383 UPDATE_STRNCMP_COUNTER
399 movdqa (%eax, %ecx), %xmm1
400 movdqa (%edx, %ecx), %xmm2
403 palignr $2, %xmm3, %xmm2
412 #ifdef USE_AS_STRNCMP
423 movdqa (%eax, %ecx), %xmm1
424 movdqa (%edx, %ecx), %xmm2
427 palignr $2, %xmm3, %xmm2
436 #ifdef USE_AS_STRNCMP
450 jnz L(ashr_2_exittail)
452 #ifdef USE_AS_STRNCMP
454 jbe L(ashr_2_exittail)
463 movdqa (%eax, %ecx), %xmm1
469 * The following cases will be handled by ashr_3
470 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
471 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
490 UPDATE_STRNCMP_COUNTER
506 movdqa (%eax, %ecx), %xmm1
507 movdqa (%edx, %ecx), %xmm2
510 palignr $3, %xmm3, %xmm2
519 #ifdef USE_AS_STRNCMP
530 movdqa (%eax, %ecx), %xmm1
531 movdqa (%edx, %ecx), %xmm2
534 palignr $3, %xmm3, %xmm2
543 #ifdef USE_AS_STRNCMP
557 jnz L(ashr_3_exittail)
559 #ifdef USE_AS_STRNCMP
561 jbe L(ashr_3_exittail)
569 movdqa (%eax, %ecx), %xmm1
575 * The following cases will be handled by ashr_4
576 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
577 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
596 UPDATE_STRNCMP_COUNTER
612 movdqa (%eax, %ecx), %xmm1
613 movdqa (%edx, %ecx), %xmm2
616 palignr $4, %xmm3, %xmm2
625 #ifdef USE_AS_STRNCMP
637 movdqa (%eax, %ecx), %xmm1
638 movdqa (%edx, %ecx), %xmm2
641 palignr $4, %xmm3, %xmm2
650 #ifdef USE_AS_STRNCMP
665 jnz L(ashr_4_exittail)
667 #ifdef USE_AS_STRNCMP
669 jbe L(ashr_4_exittail)
678 movdqa (%eax, %ecx), %xmm1
684 * The following cases will be handled by ashr_5
685 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
686 * n(11~15) n -11 4(15 +(n-11) - n) ashr_5
705 UPDATE_STRNCMP_COUNTER
721 movdqa (%eax, %ecx), %xmm1
722 movdqa (%edx, %ecx), %xmm2
725 palignr $5, %xmm3, %xmm2
734 #ifdef USE_AS_STRNCMP
745 movdqa (%eax, %ecx), %xmm1
746 movdqa (%edx, %ecx), %xmm2
749 palignr $5, %xmm3, %xmm2
758 #ifdef USE_AS_STRNCMP
772 jnz L(ashr_5_exittail)
774 #ifdef USE_AS_STRNCMP
776 jbe L(ashr_5_exittail)
784 movdqa (%eax, %ecx), %xmm1
790 * The following cases will be handled by ashr_6
791 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
792 * n(10~15) n -10 5(15 +(n-10) - n) ashr_6
812 UPDATE_STRNCMP_COUNTER
828 movdqa (%eax, %ecx), %xmm1
829 movdqa (%edx, %ecx), %xmm2
832 palignr $6, %xmm3, %xmm2
841 #ifdef USE_AS_STRNCMP
853 movdqa (%eax, %ecx), %xmm1
854 movdqa (%edx, %ecx), %xmm2
857 palignr $6, %xmm3, %xmm2
865 #ifdef USE_AS_STRNCMP
880 jnz L(ashr_6_exittail)
882 #ifdef USE_AS_STRNCMP
884 jbe L(ashr_6_exittail)
892 movdqa (%eax, %ecx), %xmm1
898 * The following cases will be handled by ashr_7
899 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
900 * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7
920 UPDATE_STRNCMP_COUNTER
936 movdqa (%eax, %ecx), %xmm1
937 movdqa (%edx, %ecx), %xmm2
940 palignr $7, %xmm3, %xmm2
949 #ifdef USE_AS_STRNCMP
961 movdqa (%eax, %ecx), %xmm1
962 movdqa (%edx, %ecx), %xmm2
965 palignr $7, %xmm3, %xmm2
974 #ifdef USE_AS_STRNCMP
989 jnz L(ashr_7_exittail)
991 #ifdef USE_AS_STRNCMP
993 jbe L(ashr_7_exittail)
1002 movdqa (%eax, %ecx), %xmm1
1008 * The following cases will be handled by ashr_8
1009 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1010 * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8
1016 movdqa (%edx), %xmm2
1017 movdqa (%eax), %xmm1
1018 pcmpeqb %xmm1, %xmm0
1020 pcmpeqb %xmm1, %xmm2
1022 pmovmskb %xmm2, %edi
1029 UPDATE_STRNCMP_COUNTER
1031 movdqa (%edx), %xmm3
1045 movdqa (%eax, %ecx), %xmm1
1046 movdqa (%edx, %ecx), %xmm2
1049 palignr $8, %xmm3, %xmm2
1051 pcmpeqb %xmm1, %xmm0
1052 pcmpeqb %xmm2, %xmm1
1054 pmovmskb %xmm1, %esi
1058 #ifdef USE_AS_STRNCMP
1069 movdqa (%eax, %ecx), %xmm1
1070 movdqa (%edx, %ecx), %xmm2
1073 palignr $8, %xmm3, %xmm2
1075 pcmpeqb %xmm1, %xmm0
1076 pcmpeqb %xmm2, %xmm1
1078 pmovmskb %xmm1, %esi
1082 #ifdef USE_AS_STRNCMP
1093 pcmpeqb %xmm3, %xmm0
1094 pmovmskb %xmm0, %esi
1096 jnz L(ashr_8_exittail)
1098 #ifdef USE_AS_STRNCMP
1100 jbe L(ashr_8_exittail)
1105 jmp L(gobble_ashr_8)
1109 movdqa (%eax, %ecx), %xmm1
1115 * The following cases will be handled by ashr_9
1116 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1117 * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9
1123 movdqa (%edx), %xmm2
1124 movdqa (%eax), %xmm1
1125 pcmpeqb %xmm1, %xmm0
1127 pcmpeqb %xmm1, %xmm2
1129 pmovmskb %xmm2, %edi
1136 UPDATE_STRNCMP_COUNTER
1138 movdqa (%edx), %xmm3
1152 movdqa (%eax, %ecx), %xmm1
1153 movdqa (%edx, %ecx), %xmm2
1156 palignr $9, %xmm3, %xmm2
1158 pcmpeqb %xmm1, %xmm0
1159 pcmpeqb %xmm2, %xmm1
1161 pmovmskb %xmm1, %esi
1165 #ifdef USE_AS_STRNCMP
1176 movdqa (%eax, %ecx), %xmm1
1177 movdqa (%edx, %ecx), %xmm2
1180 palignr $9, %xmm3, %xmm2
1182 pcmpeqb %xmm1, %xmm0
1183 pcmpeqb %xmm2, %xmm1
1185 pmovmskb %xmm1, %esi
1189 #ifdef USE_AS_STRNCMP
1200 pcmpeqb %xmm3, %xmm0
1201 pmovmskb %xmm0, %esi
1203 jnz L(ashr_9_exittail)
1205 #ifdef USE_AS_STRNCMP
1207 jbe L(ashr_9_exittail)
1211 jmp L(gobble_ashr_9)
1215 movdqa (%eax, %ecx), %xmm1
1221 * The following cases will be handled by ashr_10
1222 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1223 * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10
1229 movdqa (%edx), %xmm2
1230 movdqa (%eax), %xmm1
1231 pcmpeqb %xmm1, %xmm0
1233 pcmpeqb %xmm1, %xmm2
1235 pmovmskb %xmm2, %edi
1242 UPDATE_STRNCMP_COUNTER
1244 movdqa (%edx), %xmm3
1255 jg L(nibble_ashr_10)
1258 movdqa (%eax, %ecx), %xmm1
1259 movdqa (%edx, %ecx), %xmm2
1262 palignr $10, %xmm3, %xmm2
1264 pcmpeqb %xmm1, %xmm0
1265 pcmpeqb %xmm2, %xmm1
1267 pmovmskb %xmm1, %esi
1271 #ifdef USE_AS_STRNCMP
1280 jg L(nibble_ashr_10)
1282 movdqa (%eax, %ecx), %xmm1
1283 movdqa (%edx, %ecx), %xmm2
1286 palignr $10, %xmm3, %xmm2
1288 pcmpeqb %xmm1, %xmm0
1289 pcmpeqb %xmm2, %xmm1
1291 pmovmskb %xmm1, %esi
1295 #ifdef USE_AS_STRNCMP
1306 pcmpeqb %xmm3, %xmm0
1307 pmovmskb %xmm0, %esi
1309 jnz L(ashr_10_exittail)
1311 #ifdef USE_AS_STRNCMP
1313 jbe L(ashr_10_exittail)
1317 jmp L(gobble_ashr_10)
1320 L(ashr_10_exittail):
1321 movdqa (%eax, %ecx), %xmm1
1327 * The following cases will be handled by ashr_11
1328 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1329 * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11
1335 movdqa (%edx), %xmm2
1336 movdqa (%eax), %xmm1
1337 pcmpeqb %xmm1, %xmm0
1339 pcmpeqb %xmm1, %xmm2
1341 pmovmskb %xmm2, %edi
1348 UPDATE_STRNCMP_COUNTER
1350 movdqa (%edx), %xmm3
1361 jg L(nibble_ashr_11)
1364 movdqa (%eax, %ecx), %xmm1
1365 movdqa (%edx, %ecx), %xmm2
1368 palignr $11, %xmm3, %xmm2
1370 pcmpeqb %xmm1, %xmm0
1371 pcmpeqb %xmm2, %xmm1
1373 pmovmskb %xmm1, %esi
1377 #ifdef USE_AS_STRNCMP
1386 jg L(nibble_ashr_11)
1388 movdqa (%eax, %ecx), %xmm1
1389 movdqa (%edx, %ecx), %xmm2
1392 palignr $11, %xmm3, %xmm2
1394 pcmpeqb %xmm1, %xmm0
1395 pcmpeqb %xmm2, %xmm1
1397 pmovmskb %xmm1, %esi
1401 #ifdef USE_AS_STRNCMP
1412 pcmpeqb %xmm3, %xmm0
1413 pmovmskb %xmm0, %esi
1415 jnz L(ashr_11_exittail)
1417 #ifdef USE_AS_STRNCMP
1419 jbe L(ashr_11_exittail)
1423 jmp L(gobble_ashr_11)
1426 L(ashr_11_exittail):
1427 movdqa (%eax, %ecx), %xmm1
1433 * The following cases will be handled by ashr_12
1434 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1435 * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12
1441 movdqa (%edx), %xmm2
1442 movdqa (%eax), %xmm1
1443 pcmpeqb %xmm1, %xmm0
1445 pcmpeqb %xmm1, %xmm2
1447 pmovmskb %xmm2, %edi
1454 UPDATE_STRNCMP_COUNTER
1456 movdqa (%edx), %xmm3
1467 jg L(nibble_ashr_12)
1470 movdqa (%eax, %ecx), %xmm1
1471 movdqa (%edx, %ecx), %xmm2
1474 palignr $12, %xmm3, %xmm2
1476 pcmpeqb %xmm1, %xmm0
1477 pcmpeqb %xmm2, %xmm1
1479 pmovmskb %xmm1, %esi
1487 jg L(nibble_ashr_12)
1489 #ifdef USE_AS_STRNCMP
1494 movdqa (%eax, %ecx), %xmm1
1495 movdqa (%edx, %ecx), %xmm2
1498 palignr $12, %xmm3, %xmm2
1500 pcmpeqb %xmm1, %xmm0
1501 pcmpeqb %xmm2, %xmm1
1503 pmovmskb %xmm1, %esi
1507 #ifdef USE_AS_STRNCMP
1518 pcmpeqb %xmm3, %xmm0
1519 pmovmskb %xmm0, %esi
1521 jnz L(ashr_12_exittail)
1523 #ifdef USE_AS_STRNCMP
1525 jbe L(ashr_12_exittail)
1529 jmp L(gobble_ashr_12)
1532 L(ashr_12_exittail):
1533 movdqa (%eax, %ecx), %xmm1
1539 * The following cases will be handled by ashr_13
1540 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1541 * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13
1547 movdqa (%edx), %xmm2
1548 movdqa (%eax), %xmm1
1549 pcmpeqb %xmm1, %xmm0
1551 pcmpeqb %xmm1, %xmm2
1553 pmovmskb %xmm2, %edi
1560 UPDATE_STRNCMP_COUNTER
1562 movdqa (%edx), %xmm3
1573 jg L(nibble_ashr_13)
1576 movdqa (%eax, %ecx), %xmm1
1577 movdqa (%edx, %ecx), %xmm2
1580 palignr $13, %xmm3, %xmm2
1582 pcmpeqb %xmm1, %xmm0
1583 pcmpeqb %xmm2, %xmm1
1585 pmovmskb %xmm1, %esi
1589 #ifdef USE_AS_STRNCMP
1598 jg L(nibble_ashr_13)
1600 movdqa (%eax, %ecx), %xmm1
1601 movdqa (%edx, %ecx), %xmm2
1604 palignr $13, %xmm3, %xmm2
1606 pcmpeqb %xmm1, %xmm0
1607 pcmpeqb %xmm2, %xmm1
1609 pmovmskb %xmm1, %esi
1613 #ifdef USE_AS_STRNCMP
1624 pcmpeqb %xmm3, %xmm0
1625 pmovmskb %xmm0, %esi
1627 jnz L(ashr_13_exittail)
1629 #ifdef USE_AS_STRNCMP
1631 jbe L(ashr_13_exittail)
1635 jmp L(gobble_ashr_13)
1638 L(ashr_13_exittail):
1639 movdqa (%eax, %ecx), %xmm1
1645 * The following cases will be handled by ashr_14
1646 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1647 * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14
1653 movdqa (%edx), %xmm2
1654 movdqa (%eax), %xmm1
1655 pcmpeqb %xmm1, %xmm0
1657 pcmpeqb %xmm1, %xmm2
1659 pmovmskb %xmm2, %edi
1666 UPDATE_STRNCMP_COUNTER
1668 movdqa (%edx), %xmm3
1679 jg L(nibble_ashr_14)
1682 movdqa (%eax, %ecx), %xmm1
1683 movdqa (%edx, %ecx), %xmm2
1686 palignr $14, %xmm3, %xmm2
1688 pcmpeqb %xmm1, %xmm0
1689 pcmpeqb %xmm2, %xmm1
1691 pmovmskb %xmm1, %esi
1695 #ifdef USE_AS_STRNCMP
1704 jg L(nibble_ashr_14)
1706 movdqa (%eax, %ecx), %xmm1
1707 movdqa (%edx, %ecx), %xmm2
1710 palignr $14, %xmm3, %xmm2
1712 pcmpeqb %xmm1, %xmm0
1713 pcmpeqb %xmm2, %xmm1
1715 pmovmskb %xmm1, %esi
1719 #ifdef USE_AS_STRNCMP
1730 pcmpeqb %xmm3, %xmm0
1731 pmovmskb %xmm0, %esi
1733 jnz L(ashr_14_exittail)
1735 #ifdef USE_AS_STRNCMP
1737 jbe L(ashr_14_exittail)
1741 jmp L(gobble_ashr_14)
1744 L(ashr_14_exittail):
1745 movdqa (%eax, %ecx), %xmm1
1751 * The following cases will be handled by ashr_14
1752 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1753 * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15
1760 movdqa (%edx), %xmm2
1761 movdqa (%eax), %xmm1
1762 pcmpeqb %xmm1, %xmm0
1764 pcmpeqb %xmm1, %xmm2
1766 pmovmskb %xmm2, %edi
1773 UPDATE_STRNCMP_COUNTER
1775 movdqa (%edx), %xmm3
1786 jg L(nibble_ashr_15)
1789 movdqa (%eax, %ecx), %xmm1
1790 movdqa (%edx, %ecx), %xmm2
1793 palignr $15, %xmm3, %xmm2
1795 pcmpeqb %xmm1, %xmm0
1796 pcmpeqb %xmm2, %xmm1
1798 pmovmskb %xmm1, %esi
1802 #ifdef USE_AS_STRNCMP
1811 jg L(nibble_ashr_15)
1813 movdqa (%eax, %ecx), %xmm1
1814 movdqa (%edx, %ecx), %xmm2
1817 palignr $15, %xmm3, %xmm2
1819 pcmpeqb %xmm1, %xmm0
1820 pcmpeqb %xmm2, %xmm1
1822 pmovmskb %xmm1, %esi
1826 #ifdef USE_AS_STRNCMP
1837 pcmpeqb %xmm3, %xmm0
1838 pmovmskb %xmm0, %esi
1840 jnz L(ashr_15_exittail)
1842 #ifdef USE_AS_STRNCMP
1844 jbe L(ashr_15_exittail)
1848 jmp L(gobble_ashr_15)
1851 L(ashr_15_exittail):
1852 movdqa (%eax, %ecx), %xmm1
1859 pcmpeqb %xmm3, %xmm1
1861 pmovmskb %xmm1, %esi
1866 lea -16(%edi, %ecx), %edi
1904 #ifdef USE_AS_STRNCMP
1913 #ifdef USE_AS_STRNCMP
1920 #ifdef USE_AS_STRNCMP
1928 #ifdef USE_AS_STRNCMP
1935 #ifdef USE_AS_STRNCMP
1943 #ifdef USE_AS_STRNCMP
1950 #ifdef USE_AS_STRNCMP
1958 #ifdef USE_AS_STRNCMP
1965 #ifdef USE_AS_STRNCMP
1973 #ifdef USE_AS_STRNCMP
1980 #ifdef USE_AS_STRNCMP
1988 #ifdef USE_AS_STRNCMP
1994 #ifdef USE_AS_STRNCMP
2002 #ifdef USE_AS_STRNCMP
2009 #ifdef USE_AS_STRNCMP
2017 #ifdef USE_AS_STRNCMP
2026 #ifdef USE_AS_STRNCMP
2053 #ifdef USE_AS_STRNCMP
2061 #ifdef USE_AS_STRNCMP
2071 #ifdef USE_AS_STRNCMP
2076 #ifdef USE_AS_STRNCMP
2085 #ifdef USE_AS_STRNCMP
2090 #ifdef USE_AS_STRNCMP
2091 L(less16bytes_sncmp):
2104 movzbl 1(%eax), %ecx
2113 movzbl 2(%eax), %ecx
2122 movzbl 3(%eax), %ecx
2131 movzbl 4(%eax), %ecx
2140 movzbl 5(%eax), %ecx
2149 movzbl 6(%eax), %ecx
2158 movzbl 7(%eax), %ecx
2168 movzbl 8(%eax), %ecx
2177 movzbl 9(%eax), %ecx
2186 movzbl 10(%eax), %ecx
2195 movzbl 11(%eax), %ecx
2205 movzbl 12(%eax), %ecx
2214 movzbl 13(%eax), %ecx
2223 movzbl 14(%eax), %ecx
2232 movzbl 15(%eax), %ecx