From 2ccdea26f290f6990606f4a43de5272afa1a784d Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 7 Jan 2013 11:20:53 -0600 Subject: [PATCH] Fix spelling errors in sysdeps/powerpc files. --- ChangeLog | 46 ++++++++++++++++++++++++++ sysdeps/powerpc/fpu/feholdexcpt.c | 2 +- sysdeps/powerpc/fpu/feupdateenv.c | 4 +-- sysdeps/powerpc/fpu/math_ldbl.h | 2 +- sysdeps/powerpc/powerpc32/bits/atomic.h | 4 +-- sysdeps/powerpc/powerpc32/cell/memcpy.S | 4 +-- sysdeps/powerpc/powerpc32/dl-machine.c | 2 +- sysdeps/powerpc/powerpc32/dl-start.S | 2 +- sysdeps/powerpc/powerpc32/memset.S | 2 +- sysdeps/powerpc/powerpc32/power4/fpu/mpa.c | 6 ++-- sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c | 2 +- sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S | 2 +- sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S | 2 +- sysdeps/powerpc/powerpc32/power4/hp-timing.h | 2 +- sysdeps/powerpc/powerpc32/power4/memcmp.S | 4 +-- sysdeps/powerpc/powerpc32/power4/strncmp.S | 2 +- sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S | 2 +- sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S | 2 +- sysdeps/powerpc/powerpc32/power6/memcpy.S | 16 ++++----- sysdeps/powerpc/powerpc32/power7/memchr.S | 2 +- sysdeps/powerpc/powerpc32/power7/memcmp.S | 4 +-- sysdeps/powerpc/powerpc32/power7/memrchr.S | 2 +- sysdeps/powerpc/powerpc32/power7/strcasecmp.S | 4 +-- sysdeps/powerpc/powerpc32/power7/strncmp.S | 2 +- sysdeps/powerpc/powerpc32/strncmp.S | 2 +- sysdeps/powerpc/powerpc64/bits/atomic.h | 4 +-- sysdeps/powerpc/powerpc64/cell/memcpy.S | 4 +-- sysdeps/powerpc/powerpc64/dl-machine.h | 6 ++-- sysdeps/powerpc/powerpc64/fpu/s_ceill.S | 4 +-- sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S | 8 ++--- sysdeps/powerpc/powerpc64/hp-timing.h | 2 +- sysdeps/powerpc/powerpc64/memcpy.S | 18 +++++----- sysdeps/powerpc/powerpc64/power4/fpu/mpa.c | 6 ++-- sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c | 2 +- sysdeps/powerpc/powerpc64/power4/memcmp.S | 26 +++++++-------- sysdeps/powerpc/powerpc64/power4/memcpy.S | 18 +++++----- sysdeps/powerpc/powerpc64/power4/strncmp.S | 2 +- sysdeps/powerpc/powerpc64/power6/memcpy.S | 28 ++++++++-------- sysdeps/powerpc/powerpc64/power7/memcmp.S | 26 +++++++-------- sysdeps/powerpc/powerpc64/power7/memrchr.S | 2 +- sysdeps/powerpc/powerpc64/power7/strcasecmp.S | 4 +-- sysdeps/powerpc/powerpc64/power7/strncmp.S | 2 +- sysdeps/powerpc/powerpc64/strncmp.S | 2 +- 43 files changed, 167 insertions(+), 121 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1d0ff3e631..b0e45aea5b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,49 @@ +2013-01-07 Anton Blanchard + + * sysdeps/powerpc/fpu/feholdexcpt.c: Fixed spelling errors. + * sysdeps/powerpc/fpu/feupdateenv.c: Likewise. + * sysdeps/powerpc/fpu/math_ldbl.h: Likewise. + * sysdeps/powerpc/powerpc32/bits/atomic.h: Likewise. + * sysdeps/powerpc/powerpc32/cell/memcpy.S: Likewise. + * sysdeps/powerpc/powerpc32/dl-machine.c: Likewise. + * sysdeps/powerpc/powerpc32/dl-start.S: Likewise. + * sysdeps/powerpc/powerpc32/memset.S: Likewise. + * sysdeps/powerpc/powerpc32/power4/fpu/mpa.c: Likewise. + * sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c: Likewise. + * sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S: Likewise. + * sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S: Likewise. + * sysdeps/powerpc/powerpc32/power4/hp-timing.h: Likewise. + * sysdeps/powerpc/powerpc32/power4/memcmp.S: Likewise. + * sysdeps/powerpc/powerpc32/power4/strncmp.S: Likewise. + * sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S: Likewise. + * sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S: Likewise. + * sysdeps/powerpc/powerpc32/power6/memcpy.S: Likewise. + * sysdeps/powerpc/powerpc32/power7/memchr.S: Likewise. + * sysdeps/powerpc/powerpc32/power7/memcmp.S: Likewise. + * sysdeps/powerpc/powerpc32/power7/memrchr.S: Likewise. + * sysdeps/powerpc/powerpc32/power7/strcasecmp.S: Likewise. + * sysdeps/powerpc/powerpc32/power7/strncmp.S: Likewise. + * sysdeps/powerpc/powerpc32/strncmp.S: Likewise. + * sysdeps/powerpc/powerpc64/bits/atomic.h: Likewise. + * sysdeps/powerpc/powerpc64/cell/memcpy.S: Likewise. + * sysdeps/powerpc/powerpc64/dl-machine.h: Likewise. + * sysdeps/powerpc/powerpc64/fpu/s_ceill.S: Likewise. + * sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S: Likewise. + * sysdeps/powerpc/powerpc64/hp-timing.h: Likewise. + * sysdeps/powerpc/powerpc64/memcpy.S: Likewise. + * sysdeps/powerpc/powerpc64/power4/fpu/mpa.c: Likewise. + * sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c: Likewise. + * sysdeps/powerpc/powerpc64/power4/memcmp.S: Likewise. + * sysdeps/powerpc/powerpc64/power4/memcpy.S: Likewise. + * sysdeps/powerpc/powerpc64/power4/strncmp.S: Likewise. + * sysdeps/powerpc/powerpc64/power6/memcpy.S: Likewise. + * sysdeps/powerpc/powerpc64/power7/memchr.S: Likewise. + * sysdeps/powerpc/powerpc64/power7/memcmp.S: Likewise. + * sysdeps/powerpc/powerpc64/power7/memrchr.S: Likewise. + * sysdeps/powerpc/powerpc64/power7/strcasecmp.S: Likewise. + * sysdeps/powerpc/powerpc64/power7/strncmp.S: Likewise. + * sysdeps/powerpc/powerpc64/strncmp.S: Likewise. + 2013-01-07 Joseph Myers * malloc/malloc.h (__MALLOC_P): Remove all definitions. diff --git a/sysdeps/powerpc/fpu/feholdexcpt.c b/sysdeps/powerpc/fpu/feholdexcpt.c index c91645560a..671724b287 100644 --- a/sysdeps/powerpc/fpu/feholdexcpt.c +++ b/sysdeps/powerpc/fpu/feholdexcpt.c @@ -33,7 +33,7 @@ feholdexcept (fenv_t *envp) new.l[1] = old.l[1] & 7; new.l[0] = old.l[0]; - /* If the old env had any eabled exceptions, then mask SIGFPE in the + /* If the old env had any enabled exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the FPU to run faster because it always takes the default action and can not generate SIGFPE. */ if ((old.l[1] & _FPU_MASK_ALL) != 0) diff --git a/sysdeps/powerpc/fpu/feupdateenv.c b/sysdeps/powerpc/fpu/feupdateenv.c index 30f8a6b6c6..66f2826398 100644 --- a/sysdeps/powerpc/fpu/feupdateenv.c +++ b/sysdeps/powerpc/fpu/feupdateenv.c @@ -37,14 +37,14 @@ __feupdateenv (const fenv_t *envp) unchanged. */ new.l[1] = (old.l[1] & 0x1FFFFF00) | (new.l[1] & 0x1FF80FFF); - /* If the old env has no eabled exceptions and the new env has any enabled + /* If the old env has no enabled exceptions and the new env has any enabled exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the hardware into "precise mode" and may cause the FPU to run slower on some hardware. */ if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0) (void)__fe_nomask_env (); - /* If the old env had any eabled exceptions and the new env has no enabled + /* If the old env had any enabled exceptions and the new env has no enabled exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the FPU to run faster because it always takes the default action and can not generate SIGFPE. */ diff --git a/sysdeps/powerpc/fpu/math_ldbl.h b/sysdeps/powerpc/fpu/math_ldbl.h index 6cd6d0bdfe..20224e6646 100644 --- a/sysdeps/powerpc/fpu/math_ldbl.h +++ b/sysdeps/powerpc/fpu/math_ldbl.h @@ -27,7 +27,7 @@ ldbl_extract_mantissa (int64_t *hi64, u_int64_t *lo64, int *exp, long double x) lo |= (1ULL << 52); lo = lo << 7; /* pre-shift lo to match ieee854. */ /* The lower double is normalized separately from the upper. We - may need to adjust the lower manitissa to reflect this. */ + may need to adjust the lower mantissa to reflect this. */ ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2; if (ediff > 53) lo = lo >> (ediff-53); diff --git a/sysdeps/powerpc/powerpc32/bits/atomic.h b/sysdeps/powerpc/powerpc32/bits/atomic.h index 2f441ed985..3e3a1effe5 100644 --- a/sysdeps/powerpc/powerpc32/bits/atomic.h +++ b/sysdeps/powerpc/powerpc32/bits/atomic.h @@ -21,7 +21,7 @@ This is a hint to the hardware to expect additional updates adjacent to the lock word or not. If we are acquiring a Mutex, the hint should be true. Otherwise we releasing a Mutex or doing a simple - atomic operation. In that case we don't expect addtional updates + atomic operation. In that case we don't expect additional updates adjacent to the lock word after the Store Conditional and the hint should be false. */ @@ -35,7 +35,7 @@ /* * The 32-bit exchange_bool is different on powerpc64 because the subf - * does signed 64-bit arthmatic while the lwarx is 32-bit unsigned + * does signed 64-bit arithmetic while the lwarx is 32-bit unsigned * (a load word and zero (high 32) form). So powerpc64 has a slightly * different version in sysdeps/powerpc/powerpc64/bits/atomic.h. */ diff --git a/sysdeps/powerpc/powerpc32/cell/memcpy.S b/sysdeps/powerpc/powerpc32/cell/memcpy.S index 5fbdab1db4..6d7d4ce5db 100644 --- a/sysdeps/powerpc/powerpc32/cell/memcpy.S +++ b/sysdeps/powerpc/powerpc32/cell/memcpy.S @@ -34,7 +34,7 @@ * latency to memory is >400 clocks * To improve copy performance we need to prefetch source data * far ahead to hide this latency - * For best performance instructionforms ending in "." like "andi." + * For best performance instruction forms ending in "." like "andi." * should be avoided as the are implemented in microcode on CELL. * The below code is loop unrolled for the CELL cache line of 128 bytes */ @@ -146,7 +146,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) lfd fp9, 0x08(r4) dcbz r11,r6 lfd fp10, 0x10(r4) /* 4 register stride copy is optimal */ - lfd fp11, 0x18(r4) /* to hide 1st level cache lantency. */ + lfd fp11, 0x18(r4) /* to hide 1st level cache latency. */ lfd fp12, 0x20(r4) stfd fp9, 0x08(r6) stfd fp10, 0x10(r6) diff --git a/sysdeps/powerpc/powerpc32/dl-machine.c b/sysdeps/powerpc/powerpc32/dl-machine.c index f9f2a5d8f3..bd42fdf7d5 100644 --- a/sysdeps/powerpc/powerpc32/dl-machine.c +++ b/sysdeps/powerpc/powerpc32/dl-machine.c @@ -113,7 +113,7 @@ __elf_preferred_address (struct link_map *loader, size_t maplength, /* Otherwise, quickly look for a suitable gap between 0x3FFFF and 0x70000000. 0x3FFFF is so that references off NULL pointers will cause a segfault, 0x70000000 is just paranoia (it should always - be superceded by the program's load address). */ + be superseded by the program's load address). */ low = 0x0003FFFF; high = 0x70000000; for (nsid = 0; nsid < DL_NNS; ++nsid) diff --git a/sysdeps/powerpc/powerpc32/dl-start.S b/sysdeps/powerpc/powerpc32/dl-start.S index 01484e8e94..fa9c9bc4ae 100644 --- a/sysdeps/powerpc/powerpc32/dl-start.S +++ b/sysdeps/powerpc/powerpc32/dl-start.S @@ -74,7 +74,7 @@ _dl_start_user: slwi r5,r3,2 add r6,r4,r5 addi r5,r6,4 -/* pass the auxilary vector in r6. This is passed to us just after _envp. */ +/* pass the auxiliary vector in r6. This is passed to us just after _envp. */ 2: lwzu r0,4(r6) cmpwi r0,0 bne 2b diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S index 2e86d1c910..45c79d858b 100644 --- a/sysdeps/powerpc/powerpc32/memset.S +++ b/sysdeps/powerpc/powerpc32/memset.S @@ -275,7 +275,7 @@ L(checklinesize): beq cr1,L(nondcbz) /* If the cache line size is 32 bytes then goto to L(zloopstart), - which is coded specificly for 32-byte lines (and 601). */ + which is coded specifically for 32-byte lines (and 601). */ cmplwi cr1,rCLS,32 beq cr1,L(zloopstart) diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c index f167969ea3..b6f8341afa 100644 --- a/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c +++ b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c @@ -409,9 +409,9 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { if (k > p2) {i1=k-p2; i2=p2+1; } else {i1=1; i2=k; } #if 1 - /* rearange this inner loop to allow the fmadd instructions to be + /* rearrange this inner loop to allow the fmadd instructions to be independent and execute in parallel on processors that have - dual symetrical FP pipelines. */ + dual symmetrical FP pipelines. */ if (i1 < (i2-1)) { /* make sure we have at least 2 iterations */ @@ -437,7 +437,7 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { zk += x->d[i1]*y->d[i1]; } #else - /* The orginal code. */ + /* The original code. */ for (i=i1,j=i2-1; i400 clocks * To improve copy performance we need to prefetch source data * far ahead to hide this latency - * For best performance instructionforms ending in "." like "andi." + * For best performance instruction forms ending in "." like "andi." * should be avoided as the are implemented in microcode on CELL. * The below code is loop unrolled for the CELL cache line of 128 bytes */ @@ -146,7 +146,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) ld r9, 0x08(r4) dcbz r11,r6 ld r7, 0x10(r4) /* 4 register stride copy is optimal */ - ld r8, 0x18(r4) /* to hide 1st level cache lantency. */ + ld r8, 0x18(r4) /* to hide 1st level cache latency. */ ld r0, 0x20(r4) std r9, 0x08(r6) std r7, 0x10(r6) diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h index 7b3e4ddc67..14ade21d96 100644 --- a/sysdeps/powerpc/powerpc64/dl-machine.h +++ b/sysdeps/powerpc/powerpc64/dl-machine.h @@ -202,7 +202,7 @@ BODY_PREFIX "_dl_start_user:\n" \ " sldi 5,3,3\n" \ " add 6,4,5\n" \ " addi 5,6,8\n" \ -/* Pass the auxilary vector in r6. This is passed to us just after \ +/* Pass the auxiliary vector in r6. This is passed to us just after \ _envp. */ \ "2: ldu 0,8(6)\n" \ " cmpdi 0,0\n" \ @@ -322,13 +322,13 @@ elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) /* Relocate the DT_PPC64_GLINK entry in the _DYNAMIC section. elf_get_dynamic_info takes care of the standard entries but doesn't know exactly what to do with processor specific - entires. */ + entries. */ if (info[DT_PPC64(GLINK)] != NULL) info[DT_PPC64(GLINK)]->d_un.d_ptr += l_addr; if (lazy) { - /* The function descriptor of the appropriate trampline + /* The function descriptor of the appropriate trampoline routine is used to set the 1st and 2nd doubleword of the plt_reserve. */ Elf64_FuncDesc *resolve_fd; diff --git a/sysdeps/powerpc/powerpc64/fpu/s_ceill.S b/sysdeps/powerpc/powerpc64/fpu/s_ceill.S index 24fd521f0f..bffac3962c 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_ceill.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_ceill.S @@ -31,7 +31,7 @@ PowerPC64 long double uses the IBM extended format which is represented two 64-floating point double values. The values are non-overlapping giving an effective precision of 106 bits. The first - double contains the high order bits of mantisa and is always ceiled + double contains the high order bits of mantissa and is always ceiled to represent a normal ceiling of long double to double. Since the long double value is sum of the high and low values, the low double normally has the opposite sign to compensate for the this ceiling. @@ -40,7 +40,7 @@ 1) |x| < 2**52, all the integer bits are in the high double. ceil the high double and set the low double to -0.0. 2) |x| >= 2**52, ceiling involves both doubles. - See the comment before lable .L2 for details. + See the comment before label .L2 for details. */ ENTRY (__ceill) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S b/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S index 9cf03ccd7a..b235d9b99c 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S @@ -26,16 +26,16 @@ .section ".text" /* long double [fp1,fp2] nearbyintl (long double x [fp1,fp2]) - IEEE 1003.1 nearbyintl function. nearbyintl is simular to the rintl + IEEE 1003.1 nearbyintl function. nearbyintl is similar to the rintl but does raise the "inexact" exception. This implementation is - based on rintl but explicitly maskes the inexact exception on entry + based on rintl but explicitly masks the inexact exception on entry and clears any pending inexact before restoring the exception mask on exit. PowerPC64 long double uses the IBM extended format which is represented two 64-floating point double values. The values are non-overlapping giving an effective precision of 106 bits. The first - double contains the high order bits of mantisa and is always rounded + double contains the high order bits of mantissa and is always rounded to represent a normal rounding of long double to double. Since the long double value is sum of the high and low values, the low double normally has the opposite sign to compensate for the this rounding. @@ -44,7 +44,7 @@ 1) |x| < 2**52, all the integer bits are in the high double. floor the high double and set the low double to -0.0. 2) |x| >= 2**52, Rounding involves both doubles. - See the comment before lable .L2 for details. + See the comment before label .L2 for details. */ ENTRY (__nearbyintl) mffs fp11 /* Save current FPSCR. */ diff --git a/sysdeps/powerpc/powerpc64/hp-timing.h b/sysdeps/powerpc/powerpc64/hp-timing.h index 12053a4206..e73ad5a664 100644 --- a/sysdeps/powerpc/powerpc64/hp-timing.h +++ b/sysdeps/powerpc/powerpc64/hp-timing.h @@ -82,7 +82,7 @@ typedef unsigned long long int hp_timing_t; /* That's quite simple. Use the `mftb' instruction. Note that the value might not be 100% accurate since there might be some more instructions running in this moment. This could be changed by using a barrier like - 'lwsync' right before the `mftb' instruciton. But we are not interested + 'lwsync' right before the `mftb' instruction. But we are not interested in accurate clock cycles here so we don't do this. */ #ifdef _ARCH_PWR4 #define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mfspr %0,268" : "=r" (Var)) diff --git a/sysdeps/powerpc/powerpc64/memcpy.S b/sysdeps/powerpc/powerpc64/memcpy.S index 82a40f39f6..7c1b656be1 100644 --- a/sysdeps/powerpc/powerpc64/memcpy.S +++ b/sysdeps/powerpc/powerpc64/memcpy.S @@ -28,11 +28,11 @@ with the appropriate combination of byte and halfword load/stores. There is minimal effort to optimize the alignment of short moves. The 64-bit implementations of POWER3 and POWER4 do a reasonable job - of handling unligned load/stores that do not cross 32-byte boundries. + of handling unaligned load/stores that do not cross 32-byte boundaries. Longer moves (>= 32-bytes) justify the effort to get at least the destination doubleword (8-byte) aligned. Further optimization is - posible when both source and destination are doubleword aligned. + possible when both source and destination are doubleword aligned. Each case has a optimized unrolled loop. */ EALIGN (BP_SYM (memcpy), 5, 0) @@ -43,9 +43,9 @@ EALIGN (BP_SYM (memcpy), 5, 0) std 3,-16(1) std 31,-8(1) cfi_offset(31,-8) - andi. 11,3,7 /* check alignement of dst. */ + andi. 11,3,7 /* check alignment of dst. */ clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ - clrldi 10,4,61 /* check alignement of src. */ + clrldi 10,4,61 /* check alignment of src. */ cmpldi cr6,5,8 ble- cr1,.L2 /* If move < 32 bytes use short move code. */ cmpld cr6,10,11 @@ -56,7 +56,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) beq .L0 subf 31,0,5 - /* Move 0-7 bytes as needed to get the destination doubleword alligned. */ + /* Move 0-7 bytes as needed to get the destination doubleword aligned. */ 1: bf 31,2f lbz 6,0(12) addi 12,12,1 @@ -73,10 +73,10 @@ EALIGN (BP_SYM (memcpy), 5, 0) stw 6,0(3) addi 3,3,4 0: - clrldi 10,12,61 /* check alignement of src again. */ + clrldi 10,12,61 /* check alignment of src again. */ srdi 9,31,3 /* Number of full double words remaining. */ - /* Copy doublewords from source to destination, assumpting the + /* Copy doublewords from source to destination, assuming the destination is aligned on a doubleword boundary. At this point we know there are at least 25 bytes left (32-7) to copy. @@ -152,7 +152,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) add 12,12,0 /* At this point we have a tail of 0-7 bytes and we know that the - destiniation is double word aligned. */ + destination is double word aligned. */ 4: bf 29,2f lwz 6,0(12) addi 12,12,4 @@ -282,7 +282,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) bne cr6,4f /* Would have liked to use use ld/std here but the 630 processors are slow for load/store doubles that are not at least word aligned. - Unaligned Load/Store word execute with only a 1 cycle penaltity. */ + Unaligned Load/Store word execute with only a 1 cycle penalty. */ lwz 6,0(4) lwz 7,4(4) stw 6,0(3) diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c index f167969ea3..b6f8341afa 100644 --- a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c +++ b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c @@ -409,9 +409,9 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { if (k > p2) {i1=k-p2; i2=p2+1; } else {i1=1; i2=k; } #if 1 - /* rearange this inner loop to allow the fmadd instructions to be + /* rearrange this inner loop to allow the fmadd instructions to be independent and execute in parallel on processors that have - dual symetrical FP pipelines. */ + dual symmetrical FP pipelines. */ if (i1 < (i2-1)) { /* make sure we have at least 2 iterations */ @@ -437,7 +437,7 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { zk += x->d[i1]*y->d[i1]; } #else - /* The orginal code. */ + /* The original code. */ for (i=i1,j=i2-1; i= 32-bytes) justify the effort to get at least the destination doubleword (8-byte) aligned. Further optimization is - posible when both source and destination are doubleword aligned. + possible when both source and destination are doubleword aligned. Each case has a optimized unrolled loop. */ .machine power4 @@ -44,9 +44,9 @@ EALIGN (BP_SYM (memcpy), 5, 0) std 3,-16(1) std 31,-8(1) cfi_offset(31,-8) - andi. 11,3,7 /* check alignement of dst. */ + andi. 11,3,7 /* check alignment of dst. */ clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ - clrldi 10,4,61 /* check alignement of src. */ + clrldi 10,4,61 /* check alignment of src. */ cmpldi cr6,5,8 ble- cr1,.L2 /* If move < 32 bytes use short move code. */ cmpld cr6,10,11 @@ -57,7 +57,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) beq .L0 subf 31,0,5 - /* Move 0-7 bytes as needed to get the destination doubleword alligned. */ + /* Move 0-7 bytes as needed to get the destination doubleword aligned. */ 1: bf 31,2f lbz 6,0(12) addi 12,12,1 @@ -74,10 +74,10 @@ EALIGN (BP_SYM (memcpy), 5, 0) stw 6,0(3) addi 3,3,4 0: - clrldi 10,12,61 /* check alignement of src again. */ + clrldi 10,12,61 /* check alignment of src again. */ srdi 9,31,3 /* Number of full double words remaining. */ - /* Copy doublewords from source to destination, assumpting the + /* Copy doublewords from source to destination, assuming the destination is aligned on a doubleword boundary. At this point we know there are at least 25 bytes left (32-7) to copy. @@ -154,7 +154,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) add 12,12,0 /* At this point we have a tail of 0-7 bytes and we know that the - destiniation is double word aligned. */ + destination is double word aligned. */ 4: bf 29,2f lwz 6,0(12) addi 12,12,4 @@ -284,7 +284,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) bne cr6,4f /* Would have liked to use use ld/std here but the 630 processors are slow for load/store doubles that are not at least word aligned. - Unaligned Load/Store word execute with only a 1 cycle penaltity. */ + Unaligned Load/Store word execute with only a 1 cycle penalty. */ lwz 6,0(4) lwz 7,4(4) stw 6,0(3) diff --git a/sysdeps/powerpc/powerpc64/power4/strncmp.S b/sysdeps/powerpc/powerpc64/power4/strncmp.S index 0940571e8d..19877fa785 100644 --- a/sysdeps/powerpc/powerpc64/power4/strncmp.S +++ b/sysdeps/powerpc/powerpc64/power4/strncmp.S @@ -52,7 +52,7 @@ EALIGN (BP_SYM(strncmp), 4, 0) cmpldi cr1, rN, 0 lis rFEFE, -0x101 bne L(unaligned) -/* We are doubleword alligned so set up for two loops. first a double word +/* We are doubleword aligned so set up for two loops. first a double word loop, then fall into the byte loop if any residual. */ srdi. rTMP, rN, 3 clrldi rN, rN, 61 diff --git a/sysdeps/powerpc/powerpc64/power6/memcpy.S b/sysdeps/powerpc/powerpc64/power6/memcpy.S index 57f4d06990..64f5b2f427 100644 --- a/sysdeps/powerpc/powerpc64/power6/memcpy.S +++ b/sysdeps/powerpc/powerpc64/power6/memcpy.S @@ -28,16 +28,16 @@ with the appropriate combination of byte and halfword load/stores. There is minimal effort to optimize the alignment of short moves. The 64-bit implementations of POWER3 and POWER4 do a reasonable job - of handling unligned load/stores that do not cross 32-byte boundries. + of handling unaligned load/stores that do not cross 32-byte boundaries. Longer moves (>= 32-bytes) justify the effort to get at least the destination doubleword (8-byte) aligned. Further optimization is - posible when both source and destination are doubleword aligned. + possible when both source and destination are doubleword aligned. Each case has a optimized unrolled loop. - For POWER6 unaligned loads will take a 20+ cycle hicup for any + For POWER6 unaligned loads will take a 20+ cycle hiccup for any L1 cache miss that crosses a 32- or 128-byte boundary. Store - is more forgiving and does not take a hicup until page or + is more forgiving and does not take a hiccup until page or segment boundaries. So we require doubleword alignment for the source but may take a risk and only require word alignment for the destination. */ @@ -50,9 +50,9 @@ EALIGN (BP_SYM (memcpy), 7, 0) neg 0,3 std 3,-16(1) std 31,-8(1) - andi. 11,3,7 /* check alignement of dst. */ + andi. 11,3,7 /* check alignment of dst. */ clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ - clrldi 10,4,61 /* check alignement of src. */ + clrldi 10,4,61 /* check alignment of src. */ cmpldi cr6,5,8 ble- cr1,.L2 /* If move < 32 bytes use short move code. */ mtcrf 0x01,0 @@ -61,8 +61,8 @@ EALIGN (BP_SYM (memcpy), 7, 0) beq .L0 subf 5,0,5 - /* Move 0-7 bytes as needed to get the destination doubleword alligned. - Duplicate some code to maximize fall-throught and minimize agen delays. */ + /* Move 0-7 bytes as needed to get the destination doubleword aligned. + Duplicate some code to maximize fall-through and minimize agen delays. */ 1: bf 31,2f lbz 6,0(4) stb 6,0(3) @@ -95,10 +95,10 @@ EALIGN (BP_SYM (memcpy), 7, 0) add 4,4,0 add 3,3,0 - clrldi 10,4,61 /* check alignement of src again. */ + clrldi 10,4,61 /* check alignment of src again. */ srdi 9,5,3 /* Number of full double words remaining. */ - /* Copy doublewords from source to destination, assumpting the + /* Copy doublewords from source to destination, assuming the destination is aligned on a doubleword boundary. At this point we know there are at least 25 bytes left (32-7) to copy. @@ -130,7 +130,7 @@ EALIGN (BP_SYM (memcpy), 7, 0) load, load, store, store every 2 cycles. The following code is sensitive to cache line alignment. Do not - make any change with out first making sure thay don't result in + make any change with out first making sure they don't result in splitting ld/std pairs across a cache line. */ mtcrf 0x02,5 @@ -329,7 +329,7 @@ L(das_tail): L(das_tail2): /* At this point we have a tail of 0-7 bytes and we know that the - destiniation is double word aligned. */ + destination is double word aligned. */ 4: bf 29,2f lwz 6,0(4) stw 6,0(3) @@ -537,7 +537,7 @@ L(dus_tailX): .LE8: mr 12,4 bne cr6,L(dus_4) -/* Exactly 8 bytes. We may cross a 32-/128-byte boundry and take a ~20 +/* Exactly 8 bytes. We may cross a 32-/128-byte boundary and take a ~20 cycle delay. This case should be rare and any attempt to avoid this would take most of 20 cycles any way. */ ld 6,0(4) @@ -1146,7 +1146,7 @@ L(du_done): add 3,3,0 add 12,12,0 /* At this point we have a tail of 0-7 bytes and we know that the - destiniation is double word aligned. */ + destination is double word aligned. */ 4: bf 29,2f lwz 6,0(12) addi 12,12,4 diff --git a/sysdeps/powerpc/powerpc64/power7/memcmp.S b/sysdeps/powerpc/powerpc64/power7/memcmp.S index 80687f86da..a7caa48946 100644 --- a/sysdeps/powerpc/powerpc64/power7/memcmp.S +++ b/sysdeps/powerpc/powerpc64/power7/memcmp.S @@ -55,7 +55,7 @@ EALIGN (BP_SYM(memcmp),4,0) beq- cr6,L(zeroLength) dcbt 0,rSTR1 dcbt 0,rSTR2 -/* If less than 8 bytes or not aligned, use the unalligned +/* If less than 8 bytes or not aligned, use the unaligned byte loop. */ blt cr1,L(bytealigned) std rWORD8,-8(r1) @@ -64,7 +64,7 @@ EALIGN (BP_SYM(memcmp),4,0) cfi_offset(rWORD7,-16) bne L(unaligned) /* At this point we know both strings have the same alignment and the - compare length is at least 8 bytes. rBITDIF containes the low order + compare length is at least 8 bytes. rBITDIF contains the low order 3 bits of rSTR1 and cr5 contains the result of the logical compare of rBITDIF to 0. If rBITDIF == 0 then we are already double word aligned and can perform the DWaligned loop. @@ -72,7 +72,7 @@ EALIGN (BP_SYM(memcmp),4,0) Otherwise we know the two strings have the same alignment (but not yet DW). So we can force the string addresses to the next lower DW boundary and special case this first DW word using shift left to - ellimiate bits preceeding the first byte. Since we want to join the + eliminate bits preceding the first byte. Since we want to join the normal (DWaligned) compare loop, starting at the second double word, we need to adjust the length (rN) and special case the loop versioning for the first DW. This insures that the loop count is @@ -154,8 +154,8 @@ L(DWaligned): L(dP1): mtctr rTMP /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early - (8-15 byte compare), we want to use only volitile registers. This - means we can avoid restoring non-volitile registers since we did not + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not change any on the early exit path. The key here is the non-early exit path only cares about the condition code (cr5), not about which register pair was used. */ @@ -217,7 +217,7 @@ L(dP2e): bne cr5,L(dLcr5) b L(dLoop2) /* Again we are on a early exit path (16-23 byte compare), we want to - only use volitile registers and avoid restoring non-volitile + only use volatile registers and avoid restoring non-volatile registers. */ .align 4 L(dP2x): @@ -258,7 +258,7 @@ L(dP3e): bne cr6,L(dLcr6) b L(dLoop1) /* Again we are on a early exit path (24-31 byte compare), we want to - only use volitile registers and avoid restoring non-volitile + only use volatile registers and avoid restoring non-volatile registers. */ .align 4 L(dP3x): @@ -342,7 +342,7 @@ L(d04): beq L(zeroLength) /* At this point we have a remainder of 1 to 7 bytes to compare. Since we are aligned it is safe to load the whole double word, and use - shift right double to elliminate bits beyond the compare length. */ + shift right double to eliminate bits beyond the compare length. */ L(d00): ld rWORD1,8(rSTR1) ld rWORD2,8(rSTR2) @@ -498,15 +498,15 @@ L(zeroLength): .align 4 /* At this point we know the strings have different alignment and the - compare length is at least 8 bytes. rBITDIF containes the low order + compare length is at least 8 bytes. rBITDIF contains the low order 3 bits of rSTR1 and cr5 contains the result of the logical compare of rBITDIF to 0. If rBITDIF == 0 then rStr1 is double word aligned and can perform the DWunaligned loop. - Otherwise we know that rSTR1 is not aready DW aligned yet. + Otherwise we know that rSTR1 is not already DW aligned yet. So we can force the string addresses to the next lower DW boundary and special case this first DW word using shift left to - ellimiate bits preceeding the first byte. Since we want to join the + eliminate bits preceding the first byte. Since we want to join the normal (DWaligned) compare loop, starting at the second double word, we need to adjust the length (rN) and special case the loop versioning for the first DW. This insures that the loop count is @@ -539,7 +539,7 @@ L(unaligned): clrrdi rSTR2,rSTR2,3 std r26,-48(r1) cfi_offset(r26,-48) -/* Compute the leaft/right shift counts for the unalign rSTR2, +/* Compute the left/right shift counts for the unaligned rSTR2, compensating for the logical (DW aligned) start of rSTR1. */ clrldi rSHL,r27,61 clrrdi rSTR1,rSTR1,3 @@ -878,7 +878,7 @@ L(du14): sldi. rN,rN,3 bne cr5,L(duLcr5) /* At this point we have a remainder of 1 to 7 bytes to compare. We use - shift right double to elliminate bits beyond the compare length. + shift right double to eliminate bits beyond the compare length. This allows the use of double word subtract to compute the final result. diff --git a/sysdeps/powerpc/powerpc64/power7/memrchr.S b/sysdeps/powerpc/powerpc64/power7/memrchr.S index 624d74fd38..d3ffe4c087 100644 --- a/sysdeps/powerpc/powerpc64/power7/memrchr.S +++ b/sysdeps/powerpc/powerpc64/power7/memrchr.S @@ -52,7 +52,7 @@ L(proceed): cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */ sld r10,r10,r0 srd r10,r10,r0 - cmpldi cr7,r10,0 /* If r10 == 0, no BYTE's have been found. */ + cmpldi cr7,r10,0 /* If r10 == 0, no BYTEs have been found. */ bne cr7,L(done) /* Are we done already? */ diff --git a/sysdeps/powerpc/powerpc64/power7/strcasecmp.S b/sysdeps/powerpc/powerpc64/power7/strcasecmp.S index da24588c70..6323154eaa 100644 --- a/sysdeps/powerpc/powerpc64/power7/strcasecmp.S +++ b/sysdeps/powerpc/powerpc64/power7/strcasecmp.S @@ -40,8 +40,8 @@ ENTRY (BP_SYM (__STRCMP)) #define rSTR1 r5 /* 1st string */ #define rSTR2 r4 /* 2nd string */ #define rLOCARG r5 /* 3rd argument: locale_t */ -#define rCHAR1 r6 /* Byte readed from 1st string */ -#define rCHAR2 r7 /* Byte readed from 2nd string */ +#define rCHAR1 r6 /* Byte read from 1st string */ +#define rCHAR2 r7 /* Byte read from 2nd string */ #define rADDR1 r8 /* Address of tolower(rCHAR1) */ #define rADDR2 r12 /* Address of tolower(rCHAR2) */ #define rLWR1 r8 /* Word tolower(rCHAR1) */ diff --git a/sysdeps/powerpc/powerpc64/power7/strncmp.S b/sysdeps/powerpc/powerpc64/power7/strncmp.S index 7aaad4ffe9..25a6baf479 100644 --- a/sysdeps/powerpc/powerpc64/power7/strncmp.S +++ b/sysdeps/powerpc/powerpc64/power7/strncmp.S @@ -56,7 +56,7 @@ EALIGN (BP_SYM(strncmp),5,0) cmpldi cr1,rN,0 lis rFEFE,-0x101 bne L(unaligned) -/* We are doubleword alligned so set up for two loops. first a double word +/* We are doubleword aligned so set up for two loops. first a double word loop, then fall into the byte loop if any residual. */ srdi. rTMP,rN,3 clrldi rN,rN,61 diff --git a/sysdeps/powerpc/powerpc64/strncmp.S b/sysdeps/powerpc/powerpc64/strncmp.S index 4c1938ef71..89a3246fdc 100644 --- a/sysdeps/powerpc/powerpc64/strncmp.S +++ b/sysdeps/powerpc/powerpc64/strncmp.S @@ -50,7 +50,7 @@ EALIGN (BP_SYM(strncmp), 4, 0) cmpldi cr1, rN, 0 lis rFEFE, -0x101 bne L(unaligned) -/* We are doubleword alligned so set up for two loops. first a double word +/* We are doubleword aligned so set up for two loops. first a double word loop, then fall into the byte loop if any residual. */ srdi. rTMP, rN, 3 clrldi rN, rN, 61 -- 2.11.4.GIT