patches/musl/branch-updates.diff

   1 diff --git a/arch/aarch64/pthread_arch.h b/arch/aarch64/pthread_arch.h
   2 index e64b126d..3909616c 100644
   3 --- a/arch/aarch64/pthread_arch.h
   4 +++ b/arch/aarch64/pthread_arch.h
   5 @@ -1,12 +1,11 @@
   6 -static inline struct pthread *__pthread_self()
   7 +static inline uintptr_t __get_tp()
   8  {
   9 -       char *self;
  10 -       __asm__ ("mrs %0,tpidr_el0" : "=r"(self));
  11 -       return (void*)(self - sizeof(struct pthread));
  12 +       uintptr_t tp;
  13 +       __asm__ ("mrs %0,tpidr_el0" : "=r"(tp));
  14 +       return tp;
  15  }
  16
  17  #define TLS_ABOVE_TP
  18  #define GAP_ABOVE_TP 16
  19 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
  20
  21  #define MC_PC pc
  22 diff --git a/arch/arm/pthread_arch.h b/arch/arm/pthread_arch.h
  23 index e689ea21..157e2eae 100644
  24 --- a/arch/arm/pthread_arch.h
  25 +++ b/arch/arm/pthread_arch.h
  26 @@ -1,11 +1,11 @@
  27  #if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \
  28   || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
  29
  30 -static inline pthread_t __pthread_self()
  31 +static inline uintptr_t __get_tp()
  32  {
  33 -       char *p;
  34 -       __asm__ ( "mrc p15,0,%0,c13,c0,3" : "=r"(p) );
  35 -       return (void *)(p-sizeof(struct pthread));
  36 +       uintptr_t tp;
  37 +       __asm__ ( "mrc p15,0,%0,c13,c0,3" : "=r"(tp) );
  38 +       return tp;
  39  }
  40
  41  #else
  42 @@ -16,18 +16,17 @@ static inline pthread_t __pthread_self()
  43  #define BLX "blx"
  44  #endif
  45
  46 -static inline pthread_t __pthread_self()
  47 +static inline uintptr_t __get_tp()
  48  {
  49         extern hidden uintptr_t __a_gettp_ptr;
  50 -       register uintptr_t p __asm__("r0");
  51 -       __asm__ ( BLX " %1" : "=r"(p) : "r"(__a_gettp_ptr) : "cc", "lr" );
  52 -       return (void *)(p-sizeof(struct pthread));
  53 +       register uintptr_t tp __asm__("r0");
  54 +       __asm__ ( BLX " %1" : "=r"(tp) : "r"(__a_gettp_ptr) : "cc", "lr" );
  55 +       return tp;
  56  }
  57
  58  #endif
  59
  60  #define TLS_ABOVE_TP
  61  #define GAP_ABOVE_TP 8
  62 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
  63
  64  #define MC_PC arm_pc
  65 diff --git a/arch/generic/bits/fcntl.h b/arch/generic/bits/fcntl.h
  66 index ae233cc0..730a98cf 100644
  67 --- a/arch/generic/bits/fcntl.h
  68 +++ b/arch/generic/bits/fcntl.h
  69 @@ -30,9 +30,15 @@
  70  #define F_SETSIG 10
  71  #define F_GETSIG 11
  72
  73 +#if __LONG_MAX == 0x7fffffffL
  74  #define F_GETLK 12
  75  #define F_SETLK 13
  76  #define F_SETLKW 14
  77 +#else
  78 +#define F_GETLK 5
  79 +#define F_SETLK 6
  80 +#define F_SETLKW 7
  81 +#endif
  82
  83  #define F_SETOWN_EX 15
  84  #define F_GETOWN_EX 16
  85 diff --git a/arch/i386/pthread_arch.h b/arch/i386/pthread_arch.h
  86 index 6f600b9e..a639c382 100644
  87 --- a/arch/i386/pthread_arch.h
  88 +++ b/arch/i386/pthread_arch.h
  89 @@ -1,10 +1,8 @@
  90 -static inline struct pthread *__pthread_self()
  91 +static inline uintptr_t __get_tp()
  92  {
  93 -       struct pthread *self;
  94 -       __asm__ ("movl %%gs:0,%0" : "=r" (self) );
  95 -       return self;
  96 +       uintptr_t tp;
  97 +       __asm__ ("movl %%gs:0,%0" : "=r" (tp) );
  98 +       return tp;
  99  }
 100
 101 -#define TP_ADJ(p) (p)
 102 -
 103  #define MC_PC gregs[REG_EIP]
 104 diff --git a/arch/i386/syscall_arch.h b/arch/i386/syscall_arch.h
 105 index 69642e57..f92b7aa9 100644
 106 --- a/arch/i386/syscall_arch.h
 107 +++ b/arch/i386/syscall_arch.h
 108 @@ -87,5 +87,3 @@ static inline long __syscall6(long n, long a1, long a2, long a3, long a4, long a
 109  #define VDSO_CGT32_VER "LINUX_2.6"
 110  #define VDSO_CGT_SYM "__vdso_clock_gettime64"
 111  #define VDSO_CGT_VER "LINUX_2.6"
 112 -
 113 -#define SYSCALL_USE_SOCKETCALL
 114 diff --git a/arch/m68k/pthread_arch.h b/arch/m68k/pthread_arch.h
 115 index 02d5b8a0..5bea4e1a 100644
 116 --- a/arch/m68k/pthread_arch.h
 117 +++ b/arch/m68k/pthread_arch.h
 118 @@ -1,13 +1,12 @@
 119 -static inline struct pthread *__pthread_self()
 120 +static inline uintptr_t __get_tp()
 121  {
 122 -       uintptr_t tp = __syscall(SYS_get_thread_area);
 123 -       return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
 124 +       return __syscall(SYS_get_thread_area);
 125  }
 126
 127  #define TLS_ABOVE_TP
 128  #define GAP_ABOVE_TP 0
 129 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 130
 131 +#define TP_OFFSET 0x7000
 132  #define DTP_OFFSET 0x8000
 133
 134  #define MC_PC gregs[R_PC]
 135 diff --git a/arch/m68k/syscall_arch.h b/arch/m68k/syscall_arch.h
 136 index af79c306..6a9d0ae8 100644
 137 --- a/arch/m68k/syscall_arch.h
 138 +++ b/arch/m68k/syscall_arch.h
 139 @@ -87,5 +87,4 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
 140         return d0;
 141  }
 142
 143 -#define SYSCALL_USE_SOCKETCALL
 144  #define SYSCALL_IPC_BROKEN_MODE
 145 diff --git a/arch/microblaze/pthread_arch.h b/arch/microblaze/pthread_arch.h
 146 index f6ba8de9..ff26624e 100644
 147 --- a/arch/microblaze/pthread_arch.h
 148 +++ b/arch/microblaze/pthread_arch.h
 149 @@ -1,10 +1,8 @@
 150 -static inline struct pthread *__pthread_self()
 151 +static inline uintptr_t __get_tp()
 152  {
 153 -       struct pthread *self;
 154 -       __asm__ ("ori %0, r21, 0" : "=r" (self) );
 155 -       return self;
 156 +       uintptr_t tp;
 157 +       __asm__ ("ori %0, r21, 0" : "=r" (tp) );
 158 +       return tp;
 159  }
 160
 161 -#define TP_ADJ(p) (p)
 162 -
 163  #define MC_PC regs.pc
 164 diff --git a/arch/microblaze/syscall_arch.h b/arch/microblaze/syscall_arch.h
 165 index 169013f8..61d8248e 100644
 166 --- a/arch/microblaze/syscall_arch.h
 167 +++ b/arch/microblaze/syscall_arch.h
 168 @@ -95,3 +95,5 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
 169  }
 170
 171  #define SYSCALL_IPC_BROKEN_MODE
 172 +
 173 +#undef SYS_socketcall
 174 diff --git a/arch/mips/pthread_arch.h b/arch/mips/pthread_arch.h
 175 index 1e7839ea..c45347ab 100644
 176 --- a/arch/mips/pthread_arch.h
 177 +++ b/arch/mips/pthread_arch.h
 178 @@ -1,19 +1,19 @@
 179 -static inline struct pthread *__pthread_self()
 180 +static inline uintptr_t __get_tp()
 181  {
 182  #if __mips_isa_rev < 2
 183 -       register char *tp __asm__("$3");
 184 +       register uintptr_t tp __asm__("$3");
 185         __asm__ (".word 0x7c03e83b" : "=r" (tp) );
 186  #else
 187 -       char *tp;
 188 +       uintptr_t tp;
 189         __asm__ ("rdhwr %0, $29" : "=r" (tp) );
 190  #endif
 191 -       return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
 192 +       return tp;
 193  }
 194
 195  #define TLS_ABOVE_TP
 196  #define GAP_ABOVE_TP 0
 197 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 198
 199 +#define TP_OFFSET 0x7000
 200  #define DTP_OFFSET 0x8000
 201
 202  #define MC_PC pc
 203 diff --git a/arch/mips/syscall_arch.h b/arch/mips/syscall_arch.h
 204 index 380a94b3..5b7c38de 100644
 205 --- a/arch/mips/syscall_arch.h
 206 +++ b/arch/mips/syscall_arch.h
 207 @@ -149,3 +149,5 @@ static inline long __syscall7(long n, long a, long b, long c, long d, long e, lo
 208
 209  #define SO_SNDTIMEO_OLD 0x1005
 210  #define SO_RCVTIMEO_OLD 0x1006
 211 +
 212 +#undef SYS_socketcall
 213 diff --git a/arch/mips64/bits/fcntl.h b/arch/mips64/bits/fcntl.h
 214 index 3bcec15e..5da1eef8 100644
 215 --- a/arch/mips64/bits/fcntl.h
 216 +++ b/arch/mips64/bits/fcntl.h
 217 @@ -13,7 +13,7 @@
 218
 219  #define O_ASYNC      010000
 220  #define O_DIRECT    0100000
 221 -#define O_LARGEFILE       0
 222 +#define O_LARGEFILE  020000
 223  #define O_NOATIME  01000000
 224  #define O_PATH    010000000
 225  #define O_TMPFILE 020200000
 226 diff --git a/arch/mips64/pthread_arch.h b/arch/mips64/pthread_arch.h
 227 index 1e7839ea..c45347ab 100644
 228 --- a/arch/mips64/pthread_arch.h
 229 +++ b/arch/mips64/pthread_arch.h
 230 @@ -1,19 +1,19 @@
 231 -static inline struct pthread *__pthread_self()
 232 +static inline uintptr_t __get_tp()
 233  {
 234  #if __mips_isa_rev < 2
 235 -       register char *tp __asm__("$3");
 236 +       register uintptr_t tp __asm__("$3");
 237         __asm__ (".word 0x7c03e83b" : "=r" (tp) );
 238  #else
 239 -       char *tp;
 240 +       uintptr_t tp;
 241         __asm__ ("rdhwr %0, $29" : "=r" (tp) );
 242  #endif
 243 -       return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
 244 +       return tp;
 245  }
 246
 247  #define TLS_ABOVE_TP
 248  #define GAP_ABOVE_TP 0
 249 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 250
 251 +#define TP_OFFSET 0x7000
 252  #define DTP_OFFSET 0x8000
 253
 254  #define MC_PC pc
 255 diff --git a/arch/mipsn32/pthread_arch.h b/arch/mipsn32/pthread_arch.h
 256 index 1e7839ea..c45347ab 100644
 257 --- a/arch/mipsn32/pthread_arch.h
 258 +++ b/arch/mipsn32/pthread_arch.h
 259 @@ -1,19 +1,19 @@
 260 -static inline struct pthread *__pthread_self()
 261 +static inline uintptr_t __get_tp()
 262  {
 263  #if __mips_isa_rev < 2
 264 -       register char *tp __asm__("$3");
 265 +       register uintptr_t tp __asm__("$3");
 266         __asm__ (".word 0x7c03e83b" : "=r" (tp) );
 267  #else
 268 -       char *tp;
 269 +       uintptr_t tp;
 270         __asm__ ("rdhwr %0, $29" : "=r" (tp) );
 271  #endif
 272 -       return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
 273 +       return tp;
 274  }
 275
 276  #define TLS_ABOVE_TP
 277  #define GAP_ABOVE_TP 0
 278 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 279
 280 +#define TP_OFFSET 0x7000
 281  #define DTP_OFFSET 0x8000
 282
 283  #define MC_PC pc
 284 diff --git a/arch/or1k/pthread_arch.h b/arch/or1k/pthread_arch.h
 285 index 1b806f89..f75ea7e4 100644
 286 --- a/arch/or1k/pthread_arch.h
 287 +++ b/arch/or1k/pthread_arch.h
 288 @@ -1,18 +1,16 @@
 289 -/* or1k use variant I, but with the twist that tp points to the end of TCB */
 290 -static inline struct pthread *__pthread_self()
 291 +static inline uintptr_t __get_tp()
 292  {
 293  #ifdef __clang__
 294 -       char *tp;
 295 +       uintptr_t tp;
 296         __asm__ ("l.ori %0, r10, 0" : "=r" (tp) );
 297  #else
 298 -       register char *tp __asm__("r10");
 299 +       register uintptr_t tp __asm__("r10");
 300         __asm__ ("" : "=r" (tp) );
 301  #endif
 302 -       return (struct pthread *) (tp - sizeof(struct pthread));
 303 +       return tp;
 304  }
 305
 306  #define TLS_ABOVE_TP
 307  #define GAP_ABOVE_TP 0
 308 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 309
 310  #define MC_PC regs.pc
 311 diff --git a/arch/powerpc/pthread_arch.h b/arch/powerpc/pthread_arch.h
 312 index ae0f28d6..42e88b07 100644
 313 --- a/arch/powerpc/pthread_arch.h
 314 +++ b/arch/powerpc/pthread_arch.h
 315 @@ -1,18 +1,16 @@
 316 -static inline struct pthread *__pthread_self()
 317 +static inline uintptr_t __get_tp()
 318  {
 319 -       register char *tp __asm__("r2");
 320 +       register uintptr_t tp __asm__("r2");
 321         __asm__ ("" : "=r" (tp) );
 322 -       return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
 323 +       return tp;
 324  }
 325
 326  #define TLS_ABOVE_TP
 327  #define GAP_ABOVE_TP 0
 328 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 329
 330 +#define TP_OFFSET 0x7000
 331  #define DTP_OFFSET 0x8000
 332
 333  // the kernel calls the ip "nip", it's the first saved value after the 32
 334  // GPRs.
 335  #define MC_PC gregs[32]
 336 -
 337 -#define CANARY canary_at_end
 338 diff --git a/arch/powerpc64/pthread_arch.h b/arch/powerpc64/pthread_arch.h
 339 index 79c3ecd8..1b7b9079 100644
 340 --- a/arch/powerpc64/pthread_arch.h
 341 +++ b/arch/powerpc64/pthread_arch.h
 342 @@ -1,18 +1,16 @@
 343 -static inline struct pthread *__pthread_self()
 344 +static inline uintptr_t __get_tp()
 345  {
 346 -       register char *tp __asm__("r13");
 347 +       register uintptr_t tp __asm__("r13");
 348         __asm__ ("" : "=r" (tp) );
 349 -       return (pthread_t)(tp - 0x7000 - sizeof(struct pthread));
 350 +       return tp;
 351  }
 352
 353  #define TLS_ABOVE_TP
 354  #define GAP_ABOVE_TP 0
 355 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 356
 357 +#define TP_OFFSET 0x7000
 358  #define DTP_OFFSET 0x8000
 359
 360  // the kernel calls the ip "nip", it's the first saved value after the 32
 361  // GPRs.
 362  #define MC_PC gp_regs[32]
 363 -
 364 -#define CANARY canary_at_end
 365 diff --git a/arch/riscv64/bits/fcntl.h b/arch/riscv64/bits/fcntl.h
 366 deleted file mode 100644
 367 index ecb4d18f..00000000
 368 --- a/arch/riscv64/bits/fcntl.h
 369 +++ /dev/null
 370 @@ -1,38 +0,0 @@
 371 -#define O_CREAT        0100
 372 -#define O_EXCL         0200
 373 -#define O_NOCTTY       0400
 374 -#define O_TRUNC       01000
 375 -#define O_APPEND      02000
 376 -#define O_NONBLOCK    04000
 377 -#define O_DSYNC      010000
 378 -#define O_SYNC     04010000
 379 -#define O_RSYNC    04010000
 380 -#define O_DIRECTORY 0200000
 381 -#define O_NOFOLLOW  0400000
 382 -#define O_CLOEXEC  02000000
 383 -
 384 -#define O_ASYNC      020000
 385 -#define O_DIRECT     040000
 386 -#define O_LARGEFILE 0100000
 387 -#define O_NOATIME  01000000
 388 -#define O_PATH    010000000
 389 -#define O_TMPFILE 020200000
 390 -#define O_NDELAY O_NONBLOCK
 391 -
 392 -#define F_DUPFD  0
 393 -#define F_GETFD  1
 394 -#define F_SETFD  2
 395 -#define F_GETFL  3
 396 -#define F_SETFL  4
 397 -#define F_GETLK  5
 398 -#define F_SETLK  6
 399 -#define F_SETLKW 7
 400 -#define F_SETOWN 8
 401 -#define F_GETOWN 9
 402 -#define F_SETSIG 10
 403 -#define F_GETSIG 11
 404 -
 405 -#define F_SETOWN_EX 15
 406 -#define F_GETOWN_EX 16
 407 -
 408 -#define F_GETOWNER_UIDS 17
 409 diff --git a/arch/riscv64/pthread_arch.h b/arch/riscv64/pthread_arch.h
 410 index db414b17..a20d7fba 100644
 411 --- a/arch/riscv64/pthread_arch.h
 412 +++ b/arch/riscv64/pthread_arch.h
 413 @@ -1,13 +1,12 @@
 414 -static inline struct pthread *__pthread_self()
 415 +static inline uintptr_t __get_tp()
 416  {
 417 -       char *tp;
 418 +       uintptr_t tp;
 419         __asm__ __volatile__("mv %0, tp" : "=r"(tp));
 420 -       return (void *)(tp - sizeof(struct pthread));
 421 +       return tp;
 422  }
 423
 424  #define TLS_ABOVE_TP
 425  #define GAP_ABOVE_TP 0
 426 -#define TP_ADJ(p) ((char *)p + sizeof(struct pthread))
 427
 428  #define DTP_OFFSET 0x800
 429
 430 diff --git a/arch/s390x/pthread_arch.h b/arch/s390x/pthread_arch.h
 431 index e2251f1f..e54fec3f 100644
 432 --- a/arch/s390x/pthread_arch.h
 433 +++ b/arch/s390x/pthread_arch.h
 434 @@ -1,14 +1,12 @@
 435 -static inline struct pthread *__pthread_self()
 436 +static inline uintptr_t __get_tp()
 437  {
 438 -       struct pthread *self;
 439 +       uintptr_t tp;
 440         __asm__ (
 441                 "ear  %0, %%a0\n"
 442                 "sllg %0, %0, 32\n"
 443                 "ear  %0, %%a1\n"
 444 -               : "=r"(self));
 445 -       return self;
 446 +               : "=r"(tp));
 447 +       return tp;
 448  }
 449
 450 -#define TP_ADJ(p) (p)
 451 -
 452  #define MC_PC psw.addr
 453 diff --git a/arch/s390x/syscall_arch.h b/arch/s390x/syscall_arch.h
 454 index afb99852..83cc9a27 100644
 455 --- a/arch/s390x/syscall_arch.h
 456 +++ b/arch/s390x/syscall_arch.h
 457 @@ -72,5 +72,3 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
 458         register long r7 __asm__("r7") = f;
 459         __asm_syscall("+r"(r2), "r"(r1), "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7));
 460  }
 461 -
 462 -#define SYSCALL_USE_SOCKETCALL
 463 diff --git a/arch/sh/pthread_arch.h b/arch/sh/pthread_arch.h
 464 index 3ee9c1a9..0fcf70d2 100644
 465 --- a/arch/sh/pthread_arch.h
 466 +++ b/arch/sh/pthread_arch.h
 467 @@ -1,13 +1,12 @@
 468 -static inline struct pthread *__pthread_self()
 469 +static inline uintptr_t __get_tp()
 470  {
 471 -       char *self;
 472 -       __asm__ ("stc gbr,%0" : "=r" (self) );
 473 -       return (struct pthread *) (self - sizeof(struct pthread));
 474 +       uintptr_t tp;
 475 +       __asm__ ("stc gbr,%0" : "=r" (tp) );
 476 +       return tp;
 477  }
 478
 479  #define TLS_ABOVE_TP
 480  #define GAP_ABOVE_TP 8
 481 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 482
 483  #define MC_PC sc_pc
 484
 485 diff --git a/arch/x32/bits/fcntl.h b/arch/x32/bits/fcntl.h
 486 index 1b88ad39..08627f81 100644
 487 --- a/arch/x32/bits/fcntl.h
 488 +++ b/arch/x32/bits/fcntl.h
 489 @@ -13,7 +13,7 @@
 490
 491  #define O_ASYNC      020000
 492  #define O_DIRECT     040000
 493 -#define O_LARGEFILE       0
 494 +#define O_LARGEFILE 0100000
 495  #define O_NOATIME  01000000
 496  #define O_PATH    010000000
 497  #define O_TMPFILE 020200000
 498 diff --git a/arch/x32/pthread_arch.h b/arch/x32/pthread_arch.h
 499 index f640a1a1..c1e7716d 100644
 500 --- a/arch/x32/pthread_arch.h
 501 +++ b/arch/x32/pthread_arch.h
 502 @@ -1,14 +1,12 @@
 503 -static inline struct pthread *__pthread_self()
 504 +static inline uintptr_t __get_tp()
 505  {
 506 -       struct pthread *self;
 507 -       __asm__ ("mov %%fs:0,%0" : "=r" (self) );
 508 -       return self;
 509 +       uintptr_t tp;
 510 +       __asm__ ("mov %%fs:0,%0" : "=r" (tp) );
 511 +       return tp;
 512  }
 513
 514 -#define TP_ADJ(p) (p)
 515 -
 516  #define MC_PC gregs[REG_RIP]
 517
 518 -#define CANARY canary2
 519 +#define CANARY_PAD
 520
 521  #define tls_mod_off_t unsigned long long
 522 diff --git a/arch/x86_64/bits/fcntl.h b/arch/x86_64/bits/fcntl.h
 523 deleted file mode 100644
 524 index 1b88ad39..00000000
 525 --- a/arch/x86_64/bits/fcntl.h
 526 +++ /dev/null
 527 @@ -1,40 +0,0 @@
 528 -#define O_CREAT        0100
 529 -#define O_EXCL         0200
 530 -#define O_NOCTTY       0400
 531 -#define O_TRUNC       01000
 532 -#define O_APPEND      02000
 533 -#define O_NONBLOCK    04000
 534 -#define O_DSYNC      010000
 535 -#define O_SYNC     04010000
 536 -#define O_RSYNC    04010000
 537 -#define O_DIRECTORY 0200000
 538 -#define O_NOFOLLOW  0400000
 539 -#define O_CLOEXEC  02000000
 540 -
 541 -#define O_ASYNC      020000
 542 -#define O_DIRECT     040000
 543 -#define O_LARGEFILE       0
 544 -#define O_NOATIME  01000000
 545 -#define O_PATH    010000000
 546 -#define O_TMPFILE 020200000
 547 -#define O_NDELAY O_NONBLOCK
 548 -
 549 -#define F_DUPFD  0
 550 -#define F_GETFD  1
 551 -#define F_SETFD  2
 552 -#define F_GETFL  3
 553 -#define F_SETFL  4
 554 -
 555 -#define F_SETOWN 8
 556 -#define F_GETOWN 9
 557 -#define F_SETSIG 10
 558 -#define F_GETSIG 11
 559 -
 560 -#define F_GETLK 5
 561 -#define F_SETLK 6
 562 -#define F_SETLKW 7
 563 -
 564 -#define F_SETOWN_EX 15
 565 -#define F_GETOWN_EX 16
 566 -
 567 -#define F_GETOWNER_UIDS 17
 568 diff --git a/arch/x86_64/pthread_arch.h b/arch/x86_64/pthread_arch.h
 569 index 65e880c6..c8c63f2e 100644
 570 --- a/arch/x86_64/pthread_arch.h
 571 +++ b/arch/x86_64/pthread_arch.h
 572 @@ -1,10 +1,8 @@
 573 -static inline struct pthread *__pthread_self()
 574 +static inline uintptr_t __get_tp()
 575  {
 576 -       struct pthread *self;
 577 -       __asm__ ("mov %%fs:0,%0" : "=r" (self) );
 578 -       return self;
 579 +       uintptr_t tp;
 580 +       __asm__ ("mov %%fs:0,%0" : "=r" (tp) );
 581 +       return tp;
 582  }
 583
 584 -#define TP_ADJ(p) (p)
 585 -
 586  #define MC_PC gregs[REG_RIP]
 587 diff --git a/configure b/configure
 588 index 18fda9af..947adf41 100755
 589 --- a/configure
 590 +++ b/configure
 591 @@ -30,7 +30,7 @@ System types:
 592  Optional features:
 593    --enable-optimize=...   optimize listed components for speed over size [auto]
 594    --enable-debug          build with debugging information [disabled]
 595 -  --enable-warnings       build with recommended warnings flags [disabled]
 596 +  --disable-warnings      build with recommended warnings flags [enabled]
 597    --enable-wrapper=...    build given musl toolchain wrapper [auto]
 598    --disable-shared        inhibit building shared library [enabled]
 599    --disable-static        inhibit building static library [enabled]
 600 @@ -136,7 +136,7 @@ build=
 601  target=
 602  optimize=auto
 603  debug=no
 604 -warnings=no
 605 +warnings=yes
 606  shared=auto
 607  static=yes
 608  wrapper=auto
 609 @@ -508,10 +508,13 @@ fi
 610  #
 611  # GCC defines -w as overriding any -W options, regardless of order, but
 612  # clang has a bunch of annoying warnings enabled by default and needs -w
 613 -# to start from a clean slate. So use -w if building with clang.
 614 +# to start from a clean slate. So use -w if building with clang. Also
 615 +# turn off a common on-by-default cast warning regardless of compiler.
 616  #
 617  test "$cc_family" = clang && tryflag CFLAGS_AUTO -w
 618
 619 +tryflag CFLAGS_AUTO -Wno-pointer-to-int-cast
 620 +
 621  #
 622  # Even with -std=c99, gcc accepts some constructs which are constraint
 623  # violations. We want to treat these as errors regardless of whether
 624 @@ -522,6 +525,10 @@ tryflag CFLAGS_AUTO -Werror=implicit-function-declaration
 625  tryflag CFLAGS_AUTO -Werror=implicit-int
 626  tryflag CFLAGS_AUTO -Werror=pointer-sign
 627  tryflag CFLAGS_AUTO -Werror=pointer-arith
 628 +tryflag CFLAGS_AUTO -Werror=int-conversion
 629 +tryflag CFLAGS_AUTO -Werror=incompatible-pointer-types
 630 +tryflag CFLAGS_AUTO -Werror=discarded-qualifiers
 631 +tryflag CFLAGS_AUTO -Werror=discarded-array-qualifiers
 632
 633  #
 634  # GCC ignores unused arguements by default, but Clang needs this extra
 635 @@ -531,14 +538,17 @@ tryflag CFLAGS_AUTO -Werror=pointer-arith
 636  test "$cc_family" = clang && tryflag CFLAGS_AUTO -Qunused-arguments
 637
 638  if test "x$warnings" = xyes ; then
 639 -tryflag CFLAGS_AUTO -Wall
 640 -tryflag CFLAGS_AUTO -Wno-parentheses
 641 -tryflag CFLAGS_AUTO -Wno-uninitialized
 642 -tryflag CFLAGS_AUTO -Wno-missing-braces
 643 -tryflag CFLAGS_AUTO -Wno-unused-value
 644 -tryflag CFLAGS_AUTO -Wno-unused-but-set-variable
 645 -tryflag CFLAGS_AUTO -Wno-unknown-pragmas
 646 -tryflag CFLAGS_AUTO -Wno-pointer-to-int-cast
 647 +tryflag CFLAGS_AUTO -Waddress
 648 +tryflag CFLAGS_AUTO -Warray-bounds
 649 +tryflag CFLAGS_AUTO -Wchar-subscripts
 650 +tryflag CFLAGS_AUTO -Wduplicate-decl-specifier
 651 +tryflag CFLAGS_AUTO -Winit-self
 652 +tryflag CFLAGS_AUTO -Wreturn-type
 653 +tryflag CFLAGS_AUTO -Wsequence-point
 654 +tryflag CFLAGS_AUTO -Wstrict-aliasing
 655 +tryflag CFLAGS_AUTO -Wunused-function
 656 +tryflag CFLAGS_AUTO -Wunused-label
 657 +tryflag CFLAGS_AUTO -Wunused-variable
 658  fi
 659
 660  # Determine if the compiler produces position-independent code (PIC)
 661 diff --git a/include/alltypes.h.in b/include/alltypes.h.in
 662 index d9ff462e..d47aeea9 100644
 663 --- a/include/alltypes.h.in
 664 +++ b/include/alltypes.h.in
 665 @@ -77,6 +77,8 @@ TYPEDEF struct __sigset_t { unsigned long __bits[128/sizeof(long)]; } sigset_t;
 666
 667  STRUCT iovec { void *iov_base; size_t iov_len; };
 668
 669 +STRUCT winsize { unsigned short ws_row, ws_col, ws_xpixel, ws_ypixel; };
 670 +
 671  TYPEDEF unsigned socklen_t;
 672  TYPEDEF unsigned short sa_family_t;
 673
 674 diff --git a/include/sys/ioctl.h b/include/sys/ioctl.h
 675 index c2ce3b48..a9a2346e 100644
 676 --- a/include/sys/ioctl.h
 677 +++ b/include/sys/ioctl.h
 678 @@ -4,6 +4,8 @@
 679  extern "C" {
 680  #endif
 681
 682 +#define __NEED_struct_winsize
 683 +
 684  #include <bits/alltypes.h>
 685  #include <bits/ioctl.h>
 686
 687 @@ -47,13 +49,6 @@ extern "C" {
 688
 689  #define TIOCSER_TEMT 1
 690
 691 -struct winsize {
 692 -       unsigned short ws_row;
 693 -       unsigned short ws_col;
 694 -       unsigned short ws_xpixel;
 695 -       unsigned short ws_ypixel;
 696 -};
 697 -
 698  #define SIOCADDRT          0x890B
 699  #define SIOCDELRT          0x890C
 700  #define SIOCRTMSG          0x890D
 701 diff --git a/include/termios.h b/include/termios.h
 702 index d73c780d..cbb53301 100644
 703 --- a/include/termios.h
 704 +++ b/include/termios.h
 705 @@ -8,6 +8,7 @@ extern "C" {
 706  #include <features.h>
 707
 708  #define __NEED_pid_t
 709 +#define __NEED_struct_winsize
 710
 711  #include <bits/alltypes.h>
 712
 713 @@ -27,6 +28,9 @@ int cfsetispeed (struct termios *, speed_t);
 714  int tcgetattr (int, struct termios *);
 715  int tcsetattr (int, int, const struct termios *);
 716
 717 +int tcgetwinsize (int, struct winsize *);
 718 +int tcsetwinsize (int, const struct winsize *);
 719 +
 720  int tcsendbreak (int, int);
 721  int tcdrain (int);
 722  int tcflush (int, int);
 723 diff --git a/include/unistd.h b/include/unistd.h
 724 index 7bcbff94..07584a23 100644
 725 --- a/include/unistd.h
 726 +++ b/include/unistd.h
 727 @@ -190,6 +190,7 @@ int syncfs(int);
 728  int euidaccess(const char *, int);
 729  int eaccess(const char *, int);
 730  ssize_t copy_file_range(int, off_t *, int, off_t *, size_t, unsigned);
 731 +pid_t gettid(void);
 732  #endif
 733
 734  #if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE)
 735 diff --git a/ldso/dynlink.c b/ldso/dynlink.c
 736 index d3d4ddd2..f7474743 100644
 737 --- a/ldso/dynlink.c
 738 +++ b/ldso/dynlink.c
 739 @@ -1579,7 +1579,7 @@ static void install_new_tls(void)
 740
 741         /* Install new dtv for each thread. */
 742         for (j=0, td=self; !j || td!=self; j++, td=td->next) {
 743 -               td->dtv = td->dtv_copy = newdtv[j];
 744 +               td->dtv = newdtv[j];
 745         }
 746
 747         __tl_unlock();
 748 diff --git a/src/env/__init_tls.c b/src/env/__init_tls.c
 749 index 772baba3..a93141ed 100644
 750 --- a/src/env/__init_tls.c
 751 +++ b/src/env/__init_tls.c
 752 @@ -67,7 +67,7 @@ void *__copy_tls(unsigned char *mem)
 753         }
 754  #endif
 755         dtv[0] = libc.tls_cnt;
 756 -       td->dtv = td->dtv_copy = dtv;
 757 +       td->dtv = dtv;
 758         return td;
 759  }
 760
 761 diff --git a/src/env/__stack_chk_fail.c b/src/env/__stack_chk_fail.c
 762 index e32596d1..bf5a280a 100644
 763 --- a/src/env/__stack_chk_fail.c
 764 +++ b/src/env/__stack_chk_fail.c
 765 @@ -9,7 +9,7 @@ void __init_ssp(void *entropy)
 766         if (entropy) memcpy(&__stack_chk_guard, entropy, sizeof(uintptr_t));
 767         else __stack_chk_guard = (uintptr_t)&__stack_chk_guard * 1103515245;
 768
 769 -       __pthread_self()->CANARY = __stack_chk_guard;
 770 +       __pthread_self()->canary = __stack_chk_guard;
 771  }
 772
 773  void __stack_chk_fail(void)
 774 diff --git a/src/internal/libm.h b/src/internal/libm.h
 775 index 7533f6ba..72ad17d8 100644
 776 --- a/src/internal/libm.h
 777 +++ b/src/internal/libm.h
 778 @@ -267,5 +267,8 @@ hidden double __math_uflow(uint32_t);
 779  hidden double __math_oflow(uint32_t);
 780  hidden double __math_divzero(uint32_t);
 781  hidden double __math_invalid(double);
 782 +#if LDBL_MANT_DIG != DBL_MANT_DIG
 783 +hidden long double __math_invalidl(long double);
 784 +#endif
 785
 786  #endif
 787 diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h
 788 index 5742dfc5..4d709bbc 100644
 789 --- a/src/internal/pthread_impl.h
 790 +++ b/src/internal/pthread_impl.h
 791 @@ -11,16 +11,25 @@
 792  #include "atomic.h"
 793  #include "futex.h"
 794
 795 +#include "pthread_arch.h"
 796 +
 797  #define pthread __pthread
 798
 799  struct pthread {
 800         /* Part 1 -- these fields may be external or
 801          * internal (accessed via asm) ABI. Do not change. */
 802         struct pthread *self;
 803 +#ifndef TLS_ABOVE_TP
 804         uintptr_t *dtv;
 805 +#endif
 806         struct pthread *prev, *next; /* non-ABI */
 807         uintptr_t sysinfo;
 808 -       uintptr_t canary, canary2;
 809 +#ifndef TLS_ABOVE_TP
 810 +#ifdef CANARY_PAD
 811 +       uintptr_t canary_pad;
 812 +#endif
 813 +       uintptr_t canary;
 814 +#endif
 815
 816         /* Part 2 -- implementation details, non-ABI. */
 817         int tid;
 818 @@ -43,6 +52,7 @@ struct pthread {
 819                 long off;
 820                 volatile void *volatile pending;
 821         } robust_list;
 822 +       int h_errno_val;
 823         volatile int timer_id;
 824         locale_t locale;
 825         volatile int killlock[1];
 826 @@ -51,8 +61,10 @@ struct pthread {
 827
 828         /* Part 3 -- the positions of these fields relative to
 829          * the end of the structure is external and internal ABI. */
 830 -       uintptr_t canary_at_end;
 831 -       uintptr_t *dtv_copy;
 832 +#ifdef TLS_ABOVE_TP
 833 +       uintptr_t canary;
 834 +       uintptr_t *dtv;
 835 +#endif
 836  };
 837
 838  enum {
 839 @@ -98,16 +110,22 @@ struct __timer {
 840  #define _b_waiters2 __u.__vi[4]
 841  #define _b_inst __u.__p[3]
 842
 843 -#include "pthread_arch.h"
 844 -
 845 -#ifndef CANARY
 846 -#define CANARY canary
 847 +#ifndef TP_OFFSET
 848 +#define TP_OFFSET 0
 849  #endif
 850
 851  #ifndef DTP_OFFSET
 852  #define DTP_OFFSET 0
 853  #endif
 854
 855 +#ifdef TLS_ABOVE_TP
 856 +#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + TP_OFFSET)
 857 +#define __pthread_self() ((pthread_t)(__get_tp() - sizeof(struct __pthread) - TP_OFFSET))
 858 +#else
 859 +#define TP_ADJ(p) (p)
 860 +#define __pthread_self() ((pthread_t)__get_tp())
 861 +#endif
 862 +
 863  #ifndef tls_mod_off_t
 864  #define tls_mod_off_t size_t
 865  #endif
 866 diff --git a/src/internal/syscall.h b/src/internal/syscall.h
 867 index 975a0031..d5f294d4 100644
 868 --- a/src/internal/syscall.h
 869 +++ b/src/internal/syscall.h
 870 @@ -2,6 +2,7 @@
 871  #define _INTERNAL_SYSCALL_H
 872
 873  #include <features.h>
 874 +#include <errno.h>
 875  #include <sys/syscall.h>
 876  #include "syscall_arch.h"
 877
 878 @@ -57,15 +58,22 @@ hidden long __syscall_ret(unsigned long),
 879  #define __syscall_cp(...) __SYSCALL_DISP(__syscall_cp,__VA_ARGS__)
 880  #define syscall_cp(...) __syscall_ret(__syscall_cp(__VA_ARGS__))
 881
 882 -#ifndef SYSCALL_USE_SOCKETCALL
 883 -#define __socketcall(nm,a,b,c,d,e,f) __syscall(SYS_##nm, a, b, c, d, e, f)
 884 -#define __socketcall_cp(nm,a,b,c,d,e,f) __syscall_cp(SYS_##nm, a, b, c, d, e, f)
 885 -#else
 886 -#define __socketcall(nm,a,b,c,d,e,f) __syscall(SYS_socketcall, __SC_##nm, \
 887 -    ((long [6]){ (long)a, (long)b, (long)c, (long)d, (long)e, (long)f }))
 888 -#define __socketcall_cp(nm,a,b,c,d,e,f) __syscall_cp(SYS_socketcall, __SC_##nm, \
 889 -    ((long [6]){ (long)a, (long)b, (long)c, (long)d, (long)e, (long)f }))
 890 -#endif
 891 +static inline long __alt_socketcall(int sys, int sock, int cp, long a, long b, long c, long d, long e, long f)
 892 +{
 893 +       long r;
 894 +       if (cp) r = __syscall_cp(sys, a, b, c, d, e, f);
 895 +       else r = __syscall(sys, a, b, c, d, e, f);
 896 +       if (r != -ENOSYS) return r;
 897 +#ifdef SYS_socketcall
 898 +       if (cp) r = __syscall_cp(SYS_socketcall, sock, ((long[6]){a, b, c, d, e, f}));
 899 +       else r = __syscall(SYS_socketcall, sock, ((long[6]){a, b, c, d, e, f}));
 900 +#endif
 901 +       return r;
 902 +}
 903 +#define __socketcall(nm, a, b, c, d, e, f) __alt_socketcall(SYS_##nm, __SC_##nm, 0, \
 904 +       (long)(a), (long)(b), (long)(c), (long)(d), (long)(e), (long)(f))
 905 +#define __socketcall_cp(nm, a, b, c, d, e, f) __alt_socketcall(SYS_##nm, __SC_##nm, 1, \
 906 +       (long)(a), (long)(b), (long)(c), (long)(d), (long)(e), (long)(f))
 907
 908  /* fixup legacy 16-bit junk */
 909
 910 @@ -338,6 +346,12 @@ hidden long __syscall_ret(unsigned long),
 911  #define __SC_recvmmsg    19
 912  #define __SC_sendmmsg    20
 913
 914 +/* This is valid only because all socket syscalls are made via
 915 + * socketcall, which always fills unused argument slots with zeros. */
 916 +#ifndef SYS_accept
 917 +#define SYS_accept SYS_accept4
 918 +#endif
 919 +
 920  #ifndef SO_RCVTIMEO_OLD
 921  #define SO_RCVTIMEO_OLD  20
 922  #endif
 923 diff --git a/src/linux/gettid.c b/src/linux/gettid.c
 924 new file mode 100644
 925 index 00000000..70767137
 926 --- /dev/null
 927 +++ b/src/linux/gettid.c
 928 @@ -0,0 +1,8 @@
 929 +#define _GNU_SOURCE
 930 +#include <unistd.h>
 931 +#include "pthread_impl.h"
 932 +
 933 +pid_t gettid(void)
 934 +{
 935 +       return __pthread_self()->tid;
 936 +}
 937 diff --git a/src/locale/locale_map.c b/src/locale/locale_map.c
 938 index 2321bac0..e7eede62 100644
 939 --- a/src/locale/locale_map.c
 940 +++ b/src/locale/locale_map.c
 941 @@ -67,7 +67,7 @@ const struct __locale_map *__get_locale(int cat, const char *val)
 942
 943         if (path) for (; *path; path=z+!!*z) {
 944                 z = __strchrnul(path, ':');
 945 -               l = z - path - !!*z;
 946 +               l = z - path;
 947                 if (l >= sizeof buf - n - 2) continue;
 948                 memcpy(buf, path, l);
 949                 buf[l] = '/';
 950 diff --git a/src/math/__math_invalidl.c b/src/math/__math_invalidl.c
 951 new file mode 100644
 952 index 00000000..1fca99de
 953 --- /dev/null
 954 +++ b/src/math/__math_invalidl.c
 955 @@ -0,0 +1,9 @@
 956 +#include <float.h>
 957 +#include "libm.h"
 958 +
 959 +#if LDBL_MANT_DIG != DBL_MANT_DIG
 960 +long double __math_invalidl(long double x)
 961 +{
 962 +       return (x - x) / (x - x);
 963 +}
 964 +#endif
 965 diff --git a/src/math/sqrt.c b/src/math/sqrt.c
 966 index f1f6d76c..5ba26559 100644
 967 --- a/src/math/sqrt.c
 968 +++ b/src/math/sqrt.c
 969 @@ -1,184 +1,158 @@
 970 -/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */
 971 -/*
 972 - * ====================================================
 973 - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 974 - *
 975 - * Developed at SunSoft, a Sun Microsystems, Inc. business.
 976 - * Permission to use, copy, modify, and distribute this
 977 - * software is freely granted, provided that this notice
 978 - * is preserved.
 979 - * ====================================================
 980 - */
 981 -/* sqrt(x)
 982 - * Return correctly rounded sqrt.
 983 - *           ------------------------------------------
 984 - *           |  Use the hardware sqrt if you have one |
 985 - *           ------------------------------------------
 986 - * Method:
 987 - *   Bit by bit method using integer arithmetic. (Slow, but portable)
 988 - *   1. Normalization
 989 - *      Scale x to y in [1,4) with even powers of 2:
 990 - *      find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
 991 - *              sqrt(x) = 2^k * sqrt(y)
 992 - *   2. Bit by bit computation
 993 - *      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
 994 - *           i                                                   0
 995 - *                                     i+1         2
 996 - *          s  = 2*q , and      y  =  2   * ( y - q  ).         (1)
 997 - *           i      i            i                 i
 998 - *
 999 - *      To compute q    from q , one checks whether
1000 - *                  i+1       i
1001 - *
1002 - *                            -(i+1) 2
1003 - *                      (q + 2      ) <= y.                     (2)
1004 - *                        i
1005 - *                                                            -(i+1)
1006 - *      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
1007 - *                             i+1   i             i+1   i
1008 - *
1009 - *      With some algebric manipulation, it is not difficult to see
1010 - *      that (2) is equivalent to
1011 - *                             -(i+1)
1012 - *                      s  +  2       <= y                      (3)
1013 - *                       i                i
1014 - *
1015 - *      The advantage of (3) is that s  and y  can be computed by
1016 - *                                    i      i
1017 - *      the following recurrence formula:
1018 - *          if (3) is false
1019 - *
1020 - *          s     =  s  ,       y    = y   ;                    (4)
1021 - *           i+1      i          i+1    i
1022 - *
1023 - *          otherwise,
1024 - *                         -i                     -(i+1)
1025 - *          s     =  s  + 2  ,  y    = y  -  s  - 2             (5)
1026 - *           i+1      i          i+1    i     i
1027 - *
1028 - *      One may easily use induction to prove (4) and (5).
1029 - *      Note. Since the left hand side of (3) contain only i+2 bits,
1030 - *            it does not necessary to do a full (53-bit) comparison
1031 - *            in (3).
1032 - *   3. Final rounding
1033 - *      After generating the 53 bits result, we compute one more bit.
1034 - *      Together with the remainder, we can decide whether the
1035 - *      result is exact, bigger than 1/2ulp, or less than 1/2ulp
1036 - *      (it will never equal to 1/2ulp).
1037 - *      The rounding mode can be detected by checking whether
1038 - *      huge + tiny is equal to huge, and whether huge - tiny is
1039 - *      equal to huge for some floating point number "huge" and "tiny".
1040 - *
1041 - * Special cases:
1042 - *      sqrt(+-0) = +-0         ... exact
1043 - *      sqrt(inf) = inf
1044 - *      sqrt(-ve) = NaN         ... with invalid signal
1045 - *      sqrt(NaN) = NaN         ... with invalid signal for signaling NaN
1046 - */
1047 -
1048 +#include <stdint.h>
1049 +#include <math.h>
1050  #include "libm.h"
1051 +#include "sqrt_data.h"
1052
1053 -static const double tiny = 1.0e-300;
1054 +#define FENV_SUPPORT 1
1055
1056 -double sqrt(double x)
1057 +/* returns a*b*2^-32 - e, with error 0 <= e < 1.  */
1058 +static inline uint32_t mul32(uint32_t a, uint32_t b)
1059  {
1060 -       double z;
1061 -       int32_t sign = (int)0x80000000;
1062 -       int32_t ix0,s0,q,m,t,i;
1063 -       uint32_t r,t1,s1,ix1,q1;
1064 +       return (uint64_t)a*b >> 32;
1065 +}
1066
1067 -       EXTRACT_WORDS(ix0, ix1, x);
1068 +/* returns a*b*2^-64 - e, with error 0 <= e < 3.  */
1069 +static inline uint64_t mul64(uint64_t a, uint64_t b)
1070 +{
1071 +       uint64_t ahi = a>>32;
1072 +       uint64_t alo = a&0xffffffff;
1073 +       uint64_t bhi = b>>32;
1074 +       uint64_t blo = b&0xffffffff;
1075 +       return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
1076 +}
1077
1078 -       /* take care of Inf and NaN */
1079 -       if ((ix0&0x7ff00000) == 0x7ff00000) {
1080 -               return x*x + x;  /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
1081 -       }
1082 -       /* take care of zero */
1083 -       if (ix0 <= 0) {
1084 -               if (((ix0&~sign)|ix1) == 0)
1085 -                       return x;  /* sqrt(+-0) = +-0 */
1086 -               if (ix0 < 0)
1087 -                       return (x-x)/(x-x);  /* sqrt(-ve) = sNaN */
1088 -       }
1089 -       /* normalize x */
1090 -       m = ix0>>20;
1091 -       if (m == 0) {  /* subnormal x */
1092 -               while (ix0 == 0) {
1093 -                       m -= 21;
1094 -                       ix0 |= (ix1>>11);
1095 -                       ix1 <<= 21;
1096 -               }
1097 -               for (i=0; (ix0&0x00100000) == 0; i++)
1098 -                       ix0<<=1;
1099 -               m -= i - 1;
1100 -               ix0 |= ix1>>(32-i);
1101 -               ix1 <<= i;
1102 -       }
1103 -       m -= 1023;    /* unbias exponent */
1104 -       ix0 = (ix0&0x000fffff)|0x00100000;
1105 -       if (m & 1) {  /* odd m, double x to make it even */
1106 -               ix0 += ix0 + ((ix1&sign)>>31);
1107 -               ix1 += ix1;
1108 -       }
1109 -       m >>= 1;      /* m = [m/2] */
1110 -
1111 -       /* generate sqrt(x) bit by bit */
1112 -       ix0 += ix0 + ((ix1&sign)>>31);
1113 -       ix1 += ix1;
1114 -       q = q1 = s0 = s1 = 0;  /* [q,q1] = sqrt(x) */
1115 -       r = 0x00200000;        /* r = moving bit from right to left */
1116 -
1117 -       while (r != 0) {
1118 -               t = s0 + r;
1119 -               if (t <= ix0) {
1120 -                       s0   = t + r;
1121 -                       ix0 -= t;
1122 -                       q   += r;
1123 -               }
1124 -               ix0 += ix0 + ((ix1&sign)>>31);
1125 -               ix1 += ix1;
1126 -               r >>= 1;
1127 -       }
1128 +double sqrt(double x)
1129 +{
1130 +       uint64_t ix, top, m;
1131
1132 -       r = sign;
1133 -       while (r != 0) {
1134 -               t1 = s1 + r;
1135 -               t  = s0;
1136 -               if (t < ix0 || (t == ix0 && t1 <= ix1)) {
1137 -                       s1 = t1 + r;
1138 -                       if ((t1&sign) == sign && (s1&sign) == 0)
1139 -                               s0++;
1140 -                       ix0 -= t;
1141 -                       if (ix1 < t1)
1142 -                               ix0--;
1143 -                       ix1 -= t1;
1144 -                       q1 += r;
1145 -               }
1146 -               ix0 += ix0 + ((ix1&sign)>>31);
1147 -               ix1 += ix1;
1148 -               r >>= 1;
1149 +       /* special case handling.  */
1150 +       ix = asuint64(x);
1151 +       top = ix >> 52;
1152 +       if (predict_false(top - 0x001 >= 0x7ff - 0x001)) {
1153 +               /* x < 0x1p-1022 or inf or nan.  */
1154 +               if (ix * 2 == 0)
1155 +                       return x;
1156 +               if (ix == 0x7ff0000000000000)
1157 +                       return x;
1158 +               if (ix > 0x7ff0000000000000)
1159 +                       return __math_invalid(x);
1160 +               /* x is subnormal, normalize it.  */
1161 +               ix = asuint64(x * 0x1p52);
1162 +               top = ix >> 52;
1163 +               top -= 52;
1164         }
1165
1166 -       /* use floating add to find out rounding direction */
1167 -       if ((ix0|ix1) != 0) {
1168 -               z = 1.0 - tiny; /* raise inexact flag */
1169 -               if (z >= 1.0) {
1170 -                       z = 1.0 + tiny;
1171 -                       if (q1 == (uint32_t)0xffffffff) {
1172 -                               q1 = 0;
1173 -                               q++;
1174 -                       } else if (z > 1.0) {
1175 -                               if (q1 == (uint32_t)0xfffffffe)
1176 -                                       q++;
1177 -                               q1 += 2;
1178 -                       } else
1179 -                               q1 += q1 & 1;
1180 -               }
1181 +       /* argument reduction:
1182 +          x = 4^e m; with integer e, and m in [1, 4)
1183 +          m: fixed point representation [2.62]
1184 +          2^e is the exponent part of the result.  */
1185 +       int even = top & 1;
1186 +       m = (ix << 11) | 0x8000000000000000;
1187 +       if (even) m >>= 1;
1188 +       top = (top + 0x3ff) >> 1;
1189 +
1190 +       /* approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
1191 +
1192 +          initial estimate:
1193 +          7bit table lookup (1bit exponent and 6bit significand).
1194 +
1195 +          iterative approximation:
1196 +          using 2 goldschmidt iterations with 32bit int arithmetics
1197 +          and a final iteration with 64bit int arithmetics.
1198 +
1199 +          details:
1200 +
1201 +          the relative error (e = r0 sqrt(m)-1) of a linear estimate
1202 +          (r0 = a m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best,
1203 +          a table lookup is faster and needs one less iteration
1204 +          6 bit lookup table (128b) gives |e| < 0x1.f9p-8
1205 +          7 bit lookup table (256b) gives |e| < 0x1.fdp-9
1206 +          for single and double prec 6bit is enough but for quad
1207 +          prec 7bit is needed (or modified iterations). to avoid
1208 +          one more iteration >=13bit table would be needed (16k).
1209 +
1210 +          a newton-raphson iteration for r is
1211 +            w = r*r
1212 +            u = 3 - m*w
1213 +            r = r*u/2
1214 +          can use a goldschmidt iteration for s at the end or
1215 +            s = m*r
1216 +
1217 +          first goldschmidt iteration is
1218 +            s = m*r
1219 +            u = 3 - s*r
1220 +            r = r*u/2
1221 +            s = s*u/2
1222 +          next goldschmidt iteration is
1223 +            u = 3 - s*r
1224 +            r = r*u/2
1225 +            s = s*u/2
1226 +          and at the end r is not computed only s.
1227 +
1228 +          they use the same amount of operations and converge at the
1229 +          same quadratic rate, i.e. if
1230 +            r1 sqrt(m) - 1 = e, then
1231 +            r2 sqrt(m) - 1 = -3/2 e^2 - 1/2 e^3
1232 +          the advantage of goldschmidt is that the mul for s and r
1233 +          are independent (computed in parallel), however it is not
1234 +          "self synchronizing": it only uses the input m in the
1235 +          first iteration so rounding errors accumulate. at the end
1236 +          or when switching to larger precision arithmetics rounding
1237 +          errors dominate so the first iteration should be used.
1238 +
1239 +          the fixed point representations are
1240 +            m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30
1241 +          and after switching to 64 bit
1242 +            m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62  */
1243 +
1244 +       static const uint64_t three = 0xc0000000;
1245 +       uint64_t r, s, d, u, i;
1246 +
1247 +       i = (ix >> 46) % 128;
1248 +       r = (uint32_t)__rsqrt_tab[i] << 16;
1249 +       /* |r sqrt(m) - 1| < 0x1.fdp-9 */
1250 +       s = mul32(m>>32, r);
1251 +       /* |s/sqrt(m) - 1| < 0x1.fdp-9 */
1252 +       d = mul32(s, r);
1253 +       u = three - d;
1254 +       r = mul32(r, u) << 1;
1255 +       /* |r sqrt(m) - 1| < 0x1.7bp-16 */
1256 +       s = mul32(s, u) << 1;
1257 +       /* |s/sqrt(m) - 1| < 0x1.7bp-16 */
1258 +       d = mul32(s, r);
1259 +       u = three - d;
1260 +       r = mul32(r, u) << 1;
1261 +       /* |r sqrt(m) - 1| < 0x1.3704p-29 (measured worst-case) */
1262 +       r = r << 32;
1263 +       s = mul64(m, r);
1264 +       d = mul64(s, r);
1265 +       u = (three<<32) - d;
1266 +       s = mul64(s, u);  /* repr: 3.61 */
1267 +       /* -0x1p-57 < s - sqrt(m) < 0x1.8001p-61 */
1268 +       s = (s - 2) >> 9; /* repr: 12.52 */
1269 +       /* -0x1.09p-52 < s - sqrt(m) < -0x1.fffcp-63 */
1270 +
1271 +       /* s < sqrt(m) < s + 0x1.09p-52,
1272 +          compute nearest rounded result:
1273 +          the nearest result to 52 bits is either s or s+0x1p-52,
1274 +          we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m.  */
1275 +       uint64_t d0, d1, d2;
1276 +       double y, t;
1277 +       d0 = (m << 42) - s*s;
1278 +       d1 = s - d0;
1279 +       d2 = d1 + s + 1;
1280 +       s += d1 >> 63;
1281 +       s &= 0x000fffffffffffff;
1282 +       s |= top << 52;
1283 +       y = asdouble(s);
1284 +       if (FENV_SUPPORT) {
1285 +               /* handle rounding modes and inexact exception:
1286 +                  only (s+1)^2 == 2^42 m case is exact otherwise
1287 +                  add a tiny value to cause the fenv effects.  */
1288 +               uint64_t tiny = predict_false(d2==0) ? 0 : 0x0010000000000000;
1289 +               tiny |= (d1^d2) & 0x8000000000000000;
1290 +               t = asdouble(tiny);
1291 +               y = eval_as_double(y + t);
1292         }
1293 -       ix0 = (q>>1) + 0x3fe00000;
1294 -       ix1 = q1>>1;
1295 -       if (q&1)
1296 -               ix1 |= sign;
1297 -       INSERT_WORDS(z, ix0 + ((uint32_t)m << 20), ix1);
1298 -       return z;
1299 +       return y;
1300  }
1301 diff --git a/src/math/sqrt_data.c b/src/math/sqrt_data.c
1302 new file mode 100644
1303 index 00000000..61bc22f4
1304 --- /dev/null
1305 +++ b/src/math/sqrt_data.c
1306 @@ -0,0 +1,19 @@
1307 +#include "sqrt_data.h"
1308 +const uint16_t __rsqrt_tab[128] = {
1309 +0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43,
1310 +0xaa14,0xa8eb,0xa7c8,0xa6aa,0xa592,0xa480,0xa373,0xa26b,
1311 +0xa168,0xa06a,0x9f70,0x9e7b,0x9d8a,0x9c9d,0x9bb5,0x9ad1,
1312 +0x99f0,0x9913,0x983a,0x9765,0x9693,0x95c4,0x94f8,0x9430,
1313 +0x936b,0x92a9,0x91ea,0x912e,0x9075,0x8fbe,0x8f0a,0x8e59,
1314 +0x8daa,0x8cfe,0x8c54,0x8bac,0x8b07,0x8a64,0x89c4,0x8925,
1315 +0x8889,0x87ee,0x8756,0x86c0,0x862b,0x8599,0x8508,0x8479,
1316 +0x83ec,0x8361,0x82d8,0x8250,0x81c9,0x8145,0x80c2,0x8040,
1317 +0xff02,0xfd0e,0xfb25,0xf947,0xf773,0xf5aa,0xf3ea,0xf234,
1318 +0xf087,0xeee3,0xed47,0xebb3,0xea27,0xe8a3,0xe727,0xe5b2,
1319 +0xe443,0xe2dc,0xe17a,0xe020,0xdecb,0xdd7d,0xdc34,0xdaf1,
1320 +0xd9b3,0xd87b,0xd748,0xd61a,0xd4f1,0xd3cd,0xd2ad,0xd192,
1321 +0xd07b,0xcf69,0xce5b,0xcd51,0xcc4a,0xcb48,0xca4a,0xc94f,
1322 +0xc858,0xc764,0xc674,0xc587,0xc49d,0xc3b7,0xc2d4,0xc1f4,
1323 +0xc116,0xc03c,0xbf65,0xbe90,0xbdbe,0xbcef,0xbc23,0xbb59,
1324 +0xba91,0xb9cc,0xb90a,0xb84a,0xb78c,0xb6d0,0xb617,0xb560,
1325 +};
1326 diff --git a/src/math/sqrt_data.h b/src/math/sqrt_data.h
1327 new file mode 100644
1328 index 00000000..260c7f9c
1329 --- /dev/null
1330 +++ b/src/math/sqrt_data.h
1331 @@ -0,0 +1,13 @@
1332 +#ifndef _SQRT_DATA_H
1333 +#define _SQRT_DATA_H
1334 +
1335 +#include <features.h>
1336 +#include <stdint.h>
1337 +
1338 +/* if x in [1,2): i = (int)(64*x);
1339 +   if x in [2,4): i = (int)(32*x-64);
1340 +   __rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error:
1341 +   |__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */
1342 +extern hidden const uint16_t __rsqrt_tab[128];
1343 +
1344 +#endif
1345 diff --git a/src/math/sqrtf.c b/src/math/sqrtf.c
1346 index d6ace38a..740d81cb 100644
1347 --- a/src/math/sqrtf.c
1348 +++ b/src/math/sqrtf.c
1349 @@ -1,83 +1,83 @@
1350 -/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */
1351 -/*
1352 - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
1353 - */
1354 -/*
1355 - * ====================================================
1356 - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
1357 - *
1358 - * Developed at SunPro, a Sun Microsystems, Inc. business.
1359 - * Permission to use, copy, modify, and distribute this
1360 - * software is freely granted, provided that this notice
1361 - * is preserved.
1362 - * ====================================================
1363 - */
1364 -
1365 +#include <stdint.h>
1366 +#include <math.h>
1367  #include "libm.h"
1368 +#include "sqrt_data.h"
1369
1370 -static const float tiny = 1.0e-30;
1371 +#define FENV_SUPPORT 1
1372
1373 -float sqrtf(float x)
1374 +static inline uint32_t mul32(uint32_t a, uint32_t b)
1375  {
1376 -       float z;
1377 -       int32_t sign = (int)0x80000000;
1378 -       int32_t ix,s,q,m,t,i;
1379 -       uint32_t r;
1380 +       return (uint64_t)a*b >> 32;
1381 +}
1382
1383 -       GET_FLOAT_WORD(ix, x);
1384 +/* see sqrt.c for more detailed comments.  */
1385
1386 -       /* take care of Inf and NaN */
1387 -       if ((ix&0x7f800000) == 0x7f800000)
1388 -               return x*x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
1389 +float sqrtf(float x)
1390 +{
1391 +       uint32_t ix, m, m1, m0, even, ey;
1392
1393 -       /* take care of zero */
1394 -       if (ix <= 0) {
1395 -               if ((ix&~sign) == 0)
1396 -                       return x;  /* sqrt(+-0) = +-0 */
1397 -               if (ix < 0)
1398 -                       return (x-x)/(x-x);  /* sqrt(-ve) = sNaN */
1399 -       }
1400 -       /* normalize x */
1401 -       m = ix>>23;
1402 -       if (m == 0) {  /* subnormal x */
1403 -               for (i = 0; (ix&0x00800000) == 0; i++)
1404 -                       ix<<=1;
1405 -               m -= i - 1;
1406 +       ix = asuint(x);
1407 +       if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
1408 +               /* x < 0x1p-126 or inf or nan.  */
1409 +               if (ix * 2 == 0)
1410 +                       return x;
1411 +               if (ix == 0x7f800000)
1412 +                       return x;
1413 +               if (ix > 0x7f800000)
1414 +                       return __math_invalidf(x);
1415 +               /* x is subnormal, normalize it.  */
1416 +               ix = asuint(x * 0x1p23f);
1417 +               ix -= 23 << 23;
1418         }
1419 -       m -= 127;  /* unbias exponent */
1420 -       ix = (ix&0x007fffff)|0x00800000;
1421 -       if (m&1)  /* odd m, double x to make it even */
1422 -               ix += ix;
1423 -       m >>= 1;  /* m = [m/2] */
1424
1425 -       /* generate sqrt(x) bit by bit */
1426 -       ix += ix;
1427 -       q = s = 0;       /* q = sqrt(x) */
1428 -       r = 0x01000000;  /* r = moving bit from right to left */
1429 +       /* x = 4^e m; with int e and m in [1, 4).  */
1430 +       even = ix & 0x00800000;
1431 +       m1 = (ix << 8) | 0x80000000;
1432 +       m0 = (ix << 7) & 0x7fffffff;
1433 +       m = even ? m0 : m1;
1434
1435 -       while (r != 0) {
1436 -               t = s + r;
1437 -               if (t <= ix) {
1438 -                       s = t+r;
1439 -                       ix -= t;
1440 -                       q += r;
1441 -               }
1442 -               ix += ix;
1443 -               r >>= 1;
1444 -       }
1445 +       /* 2^e is the exponent part of the return value.  */
1446 +       ey = ix >> 1;
1447 +       ey += 0x3f800000 >> 1;
1448 +       ey &= 0x7f800000;
1449 +
1450 +       /* compute r ~ 1/sqrt(m), s ~ sqrt(m) with 2 goldschmidt iterations.  */
1451 +       static const uint32_t three = 0xc0000000;
1452 +       uint32_t r, s, d, u, i;
1453 +       i = (ix >> 17) % 128;
1454 +       r = (uint32_t)__rsqrt_tab[i] << 16;
1455 +       /* |r*sqrt(m) - 1| < 0x1p-8 */
1456 +       s = mul32(m, r);
1457 +       /* |s/sqrt(m) - 1| < 0x1p-8 */
1458 +       d = mul32(s, r);
1459 +       u = three - d;
1460 +       r = mul32(r, u) << 1;
1461 +       /* |r*sqrt(m) - 1| < 0x1.7bp-16 */
1462 +       s = mul32(s, u) << 1;
1463 +       /* |s/sqrt(m) - 1| < 0x1.7bp-16 */
1464 +       d = mul32(s, r);
1465 +       u = three - d;
1466 +       s = mul32(s, u);
1467 +       /* -0x1.03p-28 < s/sqrt(m) - 1 < 0x1.fp-31 */
1468 +       s = (s - 1)>>6;
1469 +       /* s < sqrt(m) < s + 0x1.08p-23 */
1470
1471 -       /* use floating add to find out rounding direction */
1472 -       if (ix != 0) {
1473 -               z = 1.0f - tiny; /* raise inexact flag */
1474 -               if (z >= 1.0f) {
1475 -                       z = 1.0f + tiny;
1476 -                       if (z > 1.0f)
1477 -                               q += 2;
1478 -                       else
1479 -                               q += q & 1;
1480 -               }
1481 +       /* compute nearest rounded result.  */
1482 +       uint32_t d0, d1, d2;
1483 +       float y, t;
1484 +       d0 = (m << 16) - s*s;
1485 +       d1 = s - d0;
1486 +       d2 = d1 + s + 1;
1487 +       s += d1 >> 31;
1488 +       s &= 0x007fffff;
1489 +       s |= ey;
1490 +       y = asfloat(s);
1491 +       if (FENV_SUPPORT) {
1492 +               /* handle rounding and inexact exception. */
1493 +               uint32_t tiny = predict_false(d2==0) ? 0 : 0x01000000;
1494 +               tiny |= (d1^d2) & 0x80000000;
1495 +               t = asfloat(tiny);
1496 +               y = eval_as_float(y + t);
1497         }
1498 -       ix = (q>>1) + 0x3f000000;
1499 -       SET_FLOAT_WORD(z, ix + ((uint32_t)m << 23));
1500 -       return z;
1501 +       return y;
1502  }
1503 diff --git a/src/math/sqrtl.c b/src/math/sqrtl.c
1504 index 83a8f80c..1b9f19c7 100644
1505 --- a/src/math/sqrtl.c
1506 +++ b/src/math/sqrtl.c
1507 @@ -1,7 +1,259 @@
1508 +#include <stdint.h>
1509  #include <math.h>
1510 +#include <float.h>
1511 +#include "libm.h"
1512
1513 +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
1514  long double sqrtl(long double x)
1515  {
1516 -       /* FIXME: implement in C, this is for LDBL_MANT_DIG == 64 only */
1517         return sqrt(x);
1518  }
1519 +#elif (LDBL_MANT_DIG == 113 || LDBL_MANT_DIG == 64) && LDBL_MAX_EXP == 16384
1520 +#include "sqrt_data.h"
1521 +
1522 +#define FENV_SUPPORT 1
1523 +
1524 +typedef struct {
1525 +       uint64_t hi;
1526 +       uint64_t lo;
1527 +} u128;
1528 +
1529 +/* top: 16 bit sign+exponent, x: significand.  */
1530 +static inline long double mkldbl(uint64_t top, u128 x)
1531 +{
1532 +       union ldshape u;
1533 +#if LDBL_MANT_DIG == 113
1534 +       u.i2.hi = x.hi;
1535 +       u.i2.lo = x.lo;
1536 +       u.i2.hi &= 0x0000ffffffffffff;
1537 +       u.i2.hi |= top << 48;
1538 +#elif LDBL_MANT_DIG == 64
1539 +       u.i.se = top;
1540 +       u.i.m = x.lo;
1541 +       /* force the top bit on non-zero (and non-subnormal) results.  */
1542 +       if (top & 0x7fff)
1543 +               u.i.m |= 0x8000000000000000;
1544 +#endif
1545 +       return u.f;
1546 +}
1547 +
1548 +/* return: top 16 bit is sign+exp and following bits are the significand.  */
1549 +static inline u128 asu128(long double x)
1550 +{
1551 +       union ldshape u = {.f=x};
1552 +       u128 r;
1553 +#if LDBL_MANT_DIG == 113
1554 +       r.hi = u.i2.hi;
1555 +       r.lo = u.i2.lo;
1556 +#elif LDBL_MANT_DIG == 64
1557 +       r.lo = u.i.m<<49;
1558 +       /* ignore the top bit: pseudo numbers are not handled. */
1559 +       r.hi = u.i.m>>15;
1560 +       r.hi &= 0x0000ffffffffffff;
1561 +       r.hi |= (uint64_t)u.i.se << 48;
1562 +#endif
1563 +       return r;
1564 +}
1565 +
1566 +/* returns a*b*2^-32 - e, with error 0 <= e < 1.  */
1567 +static inline uint32_t mul32(uint32_t a, uint32_t b)
1568 +{
1569 +       return (uint64_t)a*b >> 32;
1570 +}
1571 +
1572 +/* returns a*b*2^-64 - e, with error 0 <= e < 3.  */
1573 +static inline uint64_t mul64(uint64_t a, uint64_t b)
1574 +{
1575 +       uint64_t ahi = a>>32;
1576 +       uint64_t alo = a&0xffffffff;
1577 +       uint64_t bhi = b>>32;
1578 +       uint64_t blo = b&0xffffffff;
1579 +       return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
1580 +}
1581 +
1582 +static inline u128 add64(u128 a, uint64_t b)
1583 +{
1584 +       u128 r;
1585 +       r.lo = a.lo + b;
1586 +       r.hi = a.hi;
1587 +       if (r.lo < a.lo)
1588 +               r.hi++;
1589 +       return r;
1590 +}
1591 +
1592 +static inline u128 add128(u128 a, u128 b)
1593 +{
1594 +       u128 r;
1595 +       r.lo = a.lo + b.lo;
1596 +       r.hi = a.hi + b.hi;
1597 +       if (r.lo < a.lo)
1598 +               r.hi++;
1599 +       return r;
1600 +}
1601 +
1602 +static inline u128 sub64(u128 a, uint64_t b)
1603 +{
1604 +       u128 r;
1605 +       r.lo = a.lo - b;
1606 +       r.hi = a.hi;
1607 +       if (a.lo < b)
1608 +               r.hi--;
1609 +       return r;
1610 +}
1611 +
1612 +static inline u128 sub128(u128 a, u128 b)
1613 +{
1614 +       u128 r;
1615 +       r.lo = a.lo - b.lo;
1616 +       r.hi = a.hi - b.hi;
1617 +       if (a.lo < b.lo)
1618 +               r.hi--;
1619 +       return r;
1620 +}
1621 +
1622 +/* a<<n, 0 <= n <= 127 */
1623 +static inline u128 lsh(u128 a, int n)
1624 +{
1625 +       if (n == 0)
1626 +               return a;
1627 +       if (n >= 64) {
1628 +               a.hi = a.lo<<(n-64);
1629 +               a.lo = 0;
1630 +       } else {
1631 +               a.hi = (a.hi<<n) | (a.lo>>(64-n));
1632 +               a.lo = a.lo<<n;
1633 +       }
1634 +       return a;
1635 +}
1636 +
1637 +/* a>>n, 0 <= n <= 127 */
1638 +static inline u128 rsh(u128 a, int n)
1639 +{
1640 +       if (n == 0)
1641 +               return a;
1642 +       if (n >= 64) {
1643 +               a.lo = a.hi>>(n-64);
1644 +               a.hi = 0;
1645 +       } else {
1646 +               a.lo = (a.lo>>n) | (a.hi<<(64-n));
1647 +               a.hi = a.hi>>n;
1648 +       }
1649 +       return a;
1650 +}
1651 +
1652 +/* returns a*b exactly.  */
1653 +static inline u128 mul64_128(uint64_t a, uint64_t b)
1654 +{
1655 +       u128 r;
1656 +       uint64_t ahi = a>>32;
1657 +       uint64_t alo = a&0xffffffff;
1658 +       uint64_t bhi = b>>32;
1659 +       uint64_t blo = b&0xffffffff;
1660 +       uint64_t lo1 = ((ahi*blo)&0xffffffff) + ((alo*bhi)&0xffffffff) + (alo*blo>>32);
1661 +       uint64_t lo2 = (alo*blo)&0xffffffff;
1662 +       r.hi = ahi*bhi + (ahi*blo>>32) + (alo*bhi>>32) + (lo1>>32);
1663 +       r.lo = (lo1<<32) + lo2;
1664 +       return r;
1665 +}
1666 +
1667 +/* returns a*b*2^-128 - e, with error 0 <= e < 7.  */
1668 +static inline u128 mul128(u128 a, u128 b)
1669 +{
1670 +       u128 hi = mul64_128(a.hi, b.hi);
1671 +       uint64_t m1 = mul64(a.hi, b.lo);
1672 +       uint64_t m2 = mul64(a.lo, b.hi);
1673 +       return add64(add64(hi, m1), m2);
1674 +}
1675 +
1676 +/* returns a*b % 2^128.  */
1677 +static inline u128 mul128_tail(u128 a, u128 b)
1678 +{
1679 +       u128 lo = mul64_128(a.lo, b.lo);
1680 +       lo.hi += a.hi*b.lo + a.lo*b.hi;
1681 +       return lo;
1682 +}
1683 +
1684 +
1685 +/* see sqrt.c for detailed comments.  */
1686 +
1687 +long double sqrtl(long double x)
1688 +{
1689 +       u128 ix, ml;
1690 +       uint64_t top;
1691 +
1692 +       ix = asu128(x);
1693 +       top = ix.hi >> 48;
1694 +       if (predict_false(top - 0x0001 >= 0x7fff - 0x0001)) {
1695 +               /* x < 0x1p-16382 or inf or nan.  */
1696 +               if (2*ix.hi == 0 && ix.lo == 0)
1697 +                       return x;
1698 +               if (ix.hi == 0x7fff000000000000 && ix.lo == 0)
1699 +                       return x;
1700 +               if (top >= 0x7fff)
1701 +                       return __math_invalidl(x);
1702 +               /* x is subnormal, normalize it.  */
1703 +               ix = asu128(x * 0x1p112);
1704 +               top = ix.hi >> 48;
1705 +               top -= 112;
1706 +       }
1707 +
1708 +       /* x = 4^e m; with int e and m in [1, 4) */
1709 +       int even = top & 1;
1710 +       ml = lsh(ix, 15);
1711 +       ml.hi |= 0x8000000000000000;
1712 +       if (even) ml = rsh(ml, 1);
1713 +       top = (top + 0x3fff) >> 1;
1714 +
1715 +       /* r ~ 1/sqrt(m) */
1716 +       static const uint64_t three = 0xc0000000;
1717 +       uint64_t r, s, d, u, i;
1718 +       i = (ix.hi >> 42) % 128;
1719 +       r = (uint32_t)__rsqrt_tab[i] << 16;
1720 +       /* |r sqrt(m) - 1| < 0x1p-8 */
1721 +       s = mul32(ml.hi>>32, r);
1722 +       d = mul32(s, r);
1723 +       u = three - d;
1724 +       r = mul32(u, r) << 1;
1725 +       /* |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit */
1726 +       r = r<<32;
1727 +       s = mul64(ml.hi, r);
1728 +       d = mul64(s, r);
1729 +       u = (three<<32) - d;
1730 +       r = mul64(u, r) << 1;
1731 +       /* |r sqrt(m) - 1| < 0x1.a5p-31 */
1732 +       s = mul64(u, s) << 1;
1733 +       d = mul64(s, r);
1734 +       u = (three<<32) - d;
1735 +       r = mul64(u, r) << 1;
1736 +       /* |r sqrt(m) - 1| < 0x1.c001p-59, switch to 128bit */
1737 +
1738 +       static const u128 threel = {.hi=three<<32, .lo=0};
1739 +       u128 rl, sl, dl, ul;
1740 +       rl.hi = r;
1741 +       rl.lo = 0;
1742 +       sl = mul128(ml, rl);
1743 +       dl = mul128(sl, rl);
1744 +       ul = sub128(threel, dl);
1745 +       sl = mul128(ul, sl); /* repr: 3.125 */
1746 +       /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
1747 +       sl = rsh(sub64(sl, 4), 125-(LDBL_MANT_DIG-1));
1748 +       /* s < sqrt(m) < s + 1 ULP + tiny */
1749 +
1750 +       long double y;
1751 +       u128 d2, d1, d0;
1752 +       d0 = sub128(lsh(ml, 2*(LDBL_MANT_DIG-1)-126), mul128_tail(sl,sl));
1753 +       d1 = sub128(sl, d0);
1754 +       d2 = add128(add64(sl, 1), d1);
1755 +       sl = add64(sl, d1.hi >> 63);
1756 +       y = mkldbl(top, sl);
1757 +       if (FENV_SUPPORT) {
1758 +               /* handle rounding modes and inexact exception.  */
1759 +               top = predict_false((d2.hi|d2.lo)==0) ? 0 : 1;
1760 +               top |= ((d1.hi^d2.hi)&0x8000000000000000) >> 48;
1761 +               y += mkldbl(top, (u128){0});
1762 +       }
1763 +       return y;
1764 +}
1765 +#else
1766 +#error unsupported long double format
1767 +#endif
1768 diff --git a/src/network/h_errno.c b/src/network/h_errno.c
1769 index 4f700cea..638f7718 100644
1770 --- a/src/network/h_errno.c
1771 +++ b/src/network/h_errno.c
1772 @@ -1,9 +1,11 @@
1773  #include <netdb.h>
1774 +#include "pthread_impl.h"
1775
1776  #undef h_errno
1777  int h_errno;
1778
1779  int *__h_errno_location(void)
1780  {
1781 -       return &h_errno;
1782 +       if (!__pthread_self()->stack) return &h_errno;
1783 +       return &__pthread_self()->h_errno_val;
1784  }
1785 diff --git a/src/network/herror.c b/src/network/herror.c
1786 index 65f25ff3..87f8cff4 100644
1787 --- a/src/network/herror.c
1788 +++ b/src/network/herror.c
1789 @@ -4,5 +4,5 @@
1790
1791  void herror(const char *msg)
1792  {
1793 -       fprintf(stderr, "%s%s%s", msg?msg:"", msg?": ":"", hstrerror(h_errno));
1794 +       fprintf(stderr, "%s%s%s\n", msg?msg:"", msg?": ":"", hstrerror(h_errno));
1795  }
1796 diff --git a/src/network/lookup_name.c b/src/network/lookup_name.c
1797 index aae0d95a..aa558c19 100644
1798 --- a/src/network/lookup_name.c
1799 +++ b/src/network/lookup_name.c
1800 @@ -50,7 +50,7 @@ static int name_from_hosts(struct address buf[static MAXADDRS], char canon[stati
1801  {
1802         char line[512];
1803         size_t l = strlen(name);
1804 -       int cnt = 0, badfam = 0;
1805 +       int cnt = 0, badfam = 0, have_canon = 0;
1806         unsigned char _buf[1032];
1807         FILE _f, *f = __fopen_rb_ca("/etc/hosts", &_f, _buf, sizeof _buf);
1808         if (!f) switch (errno) {
1809 @@ -80,14 +80,19 @@ static int name_from_hosts(struct address buf[static MAXADDRS], char canon[stati
1810                         continue;
1811                 default:
1812                         badfam = EAI_NONAME;
1813 -                       continue;
1814 +                       break;
1815                 }
1816
1817 +               if (have_canon) continue;
1818 +
1819                 /* Extract first name as canonical name */
1820                 for (; *p && isspace(*p); p++);
1821                 for (z=p; *z && !isspace(*z); z++);
1822                 *z = 0;
1823 -               if (is_valid_hostname(p)) memcpy(canon, p, z-p+1);
1824 +               if (is_valid_hostname(p)) {
1825 +                       have_canon = 1;
1826 +                       memcpy(canon, p, z-p+1);
1827 +               }
1828         }
1829         __fclose_ca(f);
1830         return cnt ? cnt : badfam;
1831 diff --git a/src/network/res_query.c b/src/network/res_query.c
1832 index 2f4da2e2..506dc231 100644
1833 --- a/src/network/res_query.c
1834 +++ b/src/network/res_query.c
1835 @@ -1,3 +1,4 @@
1836 +#define _BSD_SOURCE
1837  #include <resolv.h>
1838  #include <netdb.h>
1839
1840 @@ -6,7 +7,20 @@ int res_query(const char *name, int class, int type, unsigned char *dest, int le
1841         unsigned char q[280];
1842         int ql = __res_mkquery(0, name, class, type, 0, 0, 0, q, sizeof q);
1843         if (ql < 0) return ql;
1844 -       return __res_send(q, ql, dest, len);
1845 +       int r = __res_send(q, ql, dest, len);
1846 +       if (r<12) {
1847 +               h_errno = TRY_AGAIN;
1848 +               return -1;
1849 +       }
1850 +       if ((dest[3] & 15) == 3) {
1851 +               h_errno = HOST_NOT_FOUND;
1852 +               return -1;
1853 +       }
1854 +       if ((dest[3] & 15) == 0 && !dest[6] && !dest[7]) {
1855 +               h_errno = NO_DATA;
1856 +               return -1;
1857 +       }
1858 +       return r;
1859  }
1860
1861  weak_alias(res_query, res_search);
1862 diff --git a/src/setjmp/aarch64/longjmp.s b/src/setjmp/aarch64/longjmp.s
1863 index 7c4655fa..0af9c50e 100644
1864 --- a/src/setjmp/aarch64/longjmp.s
1865 +++ b/src/setjmp/aarch64/longjmp.s
1866 @@ -18,7 +18,6 @@ longjmp:
1867         ldp d12, d13, [x0,#144]
1868         ldp d14, d15, [x0,#160]
1869
1870 -       mov x0, x1
1871 -       cbnz x1, 1f
1872 -       mov x0, #1
1873 -1:     br x30
1874 +       cmp w1, 0
1875 +       csinc w0, w1, wzr, ne
1876 +       br x30
1877 diff --git a/src/setjmp/i386/longjmp.s b/src/setjmp/i386/longjmp.s
1878 index 772d28dd..8188f06b 100644
1879 --- a/src/setjmp/i386/longjmp.s
1880 +++ b/src/setjmp/i386/longjmp.s
1881 @@ -6,15 +6,11 @@ _longjmp:
1882  longjmp:
1883         mov  4(%esp),%edx
1884         mov  8(%esp),%eax
1885 -       test    %eax,%eax
1886 -       jnz 1f
1887 -       inc     %eax
1888 -1:
1889 +       cmp       $1,%eax
1890 +       adc       $0, %al
1891         mov   (%edx),%ebx
1892         mov  4(%edx),%esi
1893         mov  8(%edx),%edi
1894         mov 12(%edx),%ebp
1895 -       mov 16(%edx),%ecx
1896 -       mov     %ecx,%esp
1897 -       mov 20(%edx),%ecx
1898 -       jmp *%ecx
1899 +       mov 16(%edx),%esp
1900 +       jmp *20(%edx)
1901 diff --git a/src/setjmp/x32/longjmp.s b/src/setjmp/x32/longjmp.s
1902 index e175a4b9..1b2661c3 100644
1903 --- a/src/setjmp/x32/longjmp.s
1904 +++ b/src/setjmp/x32/longjmp.s
1905 @@ -5,18 +5,14 @@
1906  .type longjmp,@function
1907  _longjmp:
1908  longjmp:
1909 -       mov %rsi,%rax           /* val will be longjmp return */
1910 -       test %rax,%rax
1911 -       jnz 1f
1912 -       inc %rax                /* if val==0, val=1 per longjmp semantics */
1913 -1:
1914 +       xor %eax,%eax
1915 +       cmp $1,%esi             /* CF = val ? 0 : 1 */
1916 +       adc %esi,%eax           /* eax = val + !val */
1917         mov (%rdi),%rbx         /* rdi is the jmp_buf, restore regs from it */
1918         mov 8(%rdi),%rbp
1919         mov 16(%rdi),%r12
1920         mov 24(%rdi),%r13
1921         mov 32(%rdi),%r14
1922         mov 40(%rdi),%r15
1923 -       mov 48(%rdi),%rdx       /* this ends up being the stack pointer */
1924 -       mov %rdx,%rsp
1925 -       mov 56(%rdi),%rdx       /* this is the instruction pointer */
1926 -       jmp *%rdx               /* goto saved address without altering rsp */
1927 +       mov 48(%rdi),%rsp
1928 +       jmp *56(%rdi)           /* goto saved address without altering rsp */
1929 diff --git a/src/setjmp/x32/setjmp.s b/src/setjmp/x32/setjmp.s
1930 index 98f58b8d..d95e4853 100644
1931 --- a/src/setjmp/x32/setjmp.s
1932 +++ b/src/setjmp/x32/setjmp.s
1933 @@ -18,5 +18,5 @@ setjmp:
1934         mov %rdx,48(%rdi)
1935         mov (%rsp),%rdx         /* save return addr ptr for new rip */
1936         mov %rdx,56(%rdi)
1937 -       xor %rax,%rax           /* always return 0 */
1938 +       xor %eax,%eax           /* always return 0 */
1939         ret
1940 diff --git a/src/setjmp/x86_64/longjmp.s b/src/setjmp/x86_64/longjmp.s
1941 index e175a4b9..1b2661c3 100644
1942 --- a/src/setjmp/x86_64/longjmp.s
1943 +++ b/src/setjmp/x86_64/longjmp.s
1944 @@ -5,18 +5,14 @@
1945  .type longjmp,@function
1946  _longjmp:
1947  longjmp:
1948 -       mov %rsi,%rax           /* val will be longjmp return */
1949 -       test %rax,%rax
1950 -       jnz 1f
1951 -       inc %rax                /* if val==0, val=1 per longjmp semantics */
1952 -1:
1953 +       xor %eax,%eax
1954 +       cmp $1,%esi             /* CF = val ? 0 : 1 */
1955 +       adc %esi,%eax           /* eax = val + !val */
1956         mov (%rdi),%rbx         /* rdi is the jmp_buf, restore regs from it */
1957         mov 8(%rdi),%rbp
1958         mov 16(%rdi),%r12
1959         mov 24(%rdi),%r13
1960         mov 32(%rdi),%r14
1961         mov 40(%rdi),%r15
1962 -       mov 48(%rdi),%rdx       /* this ends up being the stack pointer */
1963 -       mov %rdx,%rsp
1964 -       mov 56(%rdi),%rdx       /* this is the instruction pointer */
1965 -       jmp *%rdx               /* goto saved address without altering rsp */
1966 +       mov 48(%rdi),%rsp
1967 +       jmp *56(%rdi)           /* goto saved address without altering rsp */
1968 diff --git a/src/setjmp/x86_64/setjmp.s b/src/setjmp/x86_64/setjmp.s
1969 index 98f58b8d..d95e4853 100644
1970 --- a/src/setjmp/x86_64/setjmp.s
1971 +++ b/src/setjmp/x86_64/setjmp.s
1972 @@ -18,5 +18,5 @@ setjmp:
1973         mov %rdx,48(%rdi)
1974         mov (%rsp),%rdx         /* save return addr ptr for new rip */
1975         mov %rdx,56(%rdi)
1976 -       xor %rax,%rax           /* always return 0 */
1977 +       xor %eax,%eax           /* always return 0 */
1978         ret
1979 diff --git a/src/termios/tcgetwinsize.c b/src/termios/tcgetwinsize.c
1980 new file mode 100644
1981 index 00000000..9b3a65a4
1982 --- /dev/null
1983 +++ b/src/termios/tcgetwinsize.c
1984 @@ -0,0 +1,8 @@
1985 +#include <termios.h>
1986 +#include <sys/ioctl.h>
1987 +#include "syscall.h"
1988 +
1989 +int tcgetwinsize(int fd, struct winsize *wsz)
1990 +{
1991 +       return syscall(SYS_ioctl, fd, TIOCGWINSZ, wsz);
1992 +}
1993 diff --git a/src/termios/tcsetwinsize.c b/src/termios/tcsetwinsize.c
1994 new file mode 100644
1995 index 00000000..e01d0e25
1996 --- /dev/null
1997 +++ b/src/termios/tcsetwinsize.c
1998 @@ -0,0 +1,8 @@
1999 +#include <termios.h>
2000 +#include <sys/ioctl.h>
2001 +#include "syscall.h"
2002 +
2003 +int tcsetwinsize(int fd, const struct winsize *wsz)
2004 +{
2005 +       return syscall(SYS_ioctl, fd, TIOCSWINSZ, wsz);
2006 +}
2007 diff --git a/src/thread/i386/__set_thread_area.s b/src/thread/i386/__set_thread_area.s
2008 index c2c21dd5..aa6852be 100644
2009 --- a/src/thread/i386/__set_thread_area.s
2010 +++ b/src/thread/i386/__set_thread_area.s
2011 @@ -28,6 +28,7 @@ __set_thread_area:
2012         ret
2013  2:
2014         mov %ebx,%ecx
2015 +       xor %eax,%eax
2016         xor %ebx,%ebx
2017         xor %edx,%edx
2018         mov %ebx,(%esp)
2019 diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c
2020 index 10f1b7d8..55744155 100644
2021 --- a/src/thread/pthread_create.c
2022 +++ b/src/thread/pthread_create.c
2023 @@ -314,7 +314,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
2024                 new->detach_state = DT_JOINABLE;
2025         }
2026         new->robust_list.head = &new->robust_list.head;
2027 -       new->CANARY = self->CANARY;
2028 +       new->canary = self->canary;
2029         new->sysinfo = self->sysinfo;
2030
2031         /* Setup argument structure for the new thread on its stack.