From a556fd061e70b61d76ac9116d2e0577c7e0d86b2 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 15 Jul 2016 13:20:32 -0700 Subject: [PATCH] kernel - Enhance CPUMASK and atomic ops * Add atomic_testandset_long() Add atomic_testandclear_long() * Add atomic_cmpxchg_long_test(). This is for debugging only, it uses the 'z' flag instead of comparing old-vs-result. But they should have the same effect. * Add macros for atomic_store_rel_cpumask() and atomic_load_acq_cpumask(). * Add ATOMIC_CPUMASK_TESTANDSET() Add ATOMIC_CPUMASK_TESTANDCLR() Add ATOMIC_CPUMASK_COPY() --- sys/cpu/x86_64/include/atomic.h | 55 ++++++++++++++++++++++++++++++++++++++++- sys/cpu/x86_64/include/types.h | 19 +++++++++++--- 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/sys/cpu/x86_64/include/atomic.h b/sys/cpu/x86_64/include/atomic.h index a63fa94cbe..3d6d6bfe7f 100644 --- a/sys/cpu/x86_64/include/atomic.h +++ b/sys/cpu/x86_64/include/atomic.h @@ -71,7 +71,9 @@ extern void atomic_##NAME##_##TYPE##_nonlocked(volatile u_##TYPE *p, u_##TYPE v); int atomic_testandset_int(volatile u_int *p, u_int v); +int atomic_testandset_long(volatile u_long *p, u_long v); int atomic_testandclear_int(volatile u_int *p, u_int v); +int atomic_testandclear_long(volatile u_long *p, u_long v); #else /* !KLD_MODULE */ #define MPLOCKED "lock ; " @@ -418,6 +420,7 @@ atomic_intr_cond_exit(__atomic_intr_t *p, void (*func)(void *), void *arg) #if defined(KLD_MODULE) extern int atomic_cmpxchg_int(volatile u_int *_dst, u_int _old, u_int _new); +extern int atomic_cmpxchg_long_test(volatile u_long *_dst, u_long _old, u_long _new); extern int atomic_cmpset_short(volatile u_short *_dst, u_short _old, u_short _new); extern int atomic_cmpset_int(volatile u_int *_dst, u_int _old, u_int _new); @@ -440,6 +443,20 @@ atomic_cmpxchg_int(volatile u_int *_dst, u_int _old, u_int _new) } static __inline int +atomic_cmpxchg_long_test(volatile u_long *_dst, u_long _old, u_long _new) +{ + u_int res = _old; + + __asm __volatile(MPLOCKED "cmpxchgq %2,%1; " + " setz %%al;" + " movsbq %%al,%%rax" \ + : "+a" (res), "=m" (*_dst) \ + : "r" (_new), "m" (*_dst) \ + : "memory"); + return (res); +} + +static __inline int atomic_cmpset_short(volatile u_short *_dst, u_short _old, u_short _new) { u_short res = _old; @@ -517,6 +534,23 @@ atomic_testandset_int(volatile u_int *p, u_int v) } static __inline int +atomic_testandset_long(volatile u_long *p, u_long v) +{ + u_char res; + + __asm __volatile( + " " MPLOCKED " " + " btsq %2,%1 ; " + " setc %0 ; " + "# atomic_testandset_int" + : "=q" (res), /* 0 */ + "+m" (*p) /* 1 */ + : "Ir" (v & 0x3f) /* 2 */ + : "cc"); + return (res); +} + +static __inline int atomic_testandclear_int(volatile u_int *p, u_int v) { u_char res; @@ -533,6 +567,23 @@ atomic_testandclear_int(volatile u_int *p, u_int v) return (res); } +static __inline int +atomic_testandclear_long(volatile u_long *p, u_long v) +{ + u_char res; + + __asm __volatile( + " " MPLOCKED " " + " btrq %2,%1 ; " + " setc %0 ; " + "# atomic_testandclear_int" + : "=q" (res), /* 0 */ + "+m" (*p) /* 1 */ + : "Ir" (v & 0x3f) /* 2 */ + : "cc"); + return (res); +} + #endif /* KLD_MODULE */ #if defined(KLD_MODULE) @@ -547,7 +598,7 @@ extern void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v); static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ - u_##TYPE res; \ + u_##TYPE res; /* accumulator can be anything */ \ \ __asm __volatile(MPLOCKED LOP \ : "=a" (res), /* 0 */ \ @@ -626,6 +677,8 @@ ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); #define atomic_set_cpumask atomic_set_long #define atomic_clear_cpumask atomic_clear_long #define atomic_cmpset_cpumask atomic_cmpset_long +#define atomic_store_rel_cpumask atomic_store_rel_long +#define atomic_load_acq_cpumask atomic_load_acq_long /* Operations on 8-bit bytes. */ #define atomic_set_8 atomic_set_char diff --git a/sys/cpu/x86_64/include/types.h b/sys/cpu/x86_64/include/types.h index d4557910af..fc1d57081a 100644 --- a/sys/cpu/x86_64/include/types.h +++ b/sys/cpu/x86_64/include/types.h @@ -94,9 +94,9 @@ typedef struct { #define CPUMASK_SIMPLE(cpu) ((__uint64_t)1 << (cpu)) #define CPUMASK_ADDR(mask, cpu) \ - ((cpu) < 64) ? &(mask).ary[0] : \ - ((cpu) < 128) ? &(mask).ary[1] : \ - ((cpu) < 192) ? &(mask).ary[2] : &(mask).ary[3]))) + (((cpu) < 64) ? &(mask).ary[0] : \ + (((cpu) < 128) ? &(mask).ary[1] : \ + (((cpu) < 192) ? &(mask).ary[2] : &(mask).ary[3]))) #define BSRCPUMASK(val) ((val).ary[3] ? 192 + bsrq((val).ary[3]) : \ ((val).ary[2] ? 128 + bsrq((val).ary[2]) : \ @@ -255,6 +255,12 @@ typedef struct { atomic_clear_cpumask(&(mask).ary[((i) >> 6) & 3], \ CPUMASK_SIMPLE((i) & 63)) +#define ATOMIC_CPUMASK_TESTANDSET(mask, i) \ + atomic_testandset_long(&(mask).ary[((i) >> 6) & 3], (i)) + +#define ATOMIC_CPUMASK_TESTANDCLR(mask, i) \ + atomic_testandclear_long(&(mask).ary[((i) >> 6) & 3], (i)) + #define ATOMIC_CPUMASK_ORMASK(mask, val) do { \ atomic_set_cpumask(&(mask).ary[0], (val).ary[0]); \ atomic_set_cpumask(&(mask).ary[1], (val).ary[1]); \ @@ -269,6 +275,13 @@ typedef struct { atomic_clear_cpumask(&(mask).ary[3], (val).ary[3]); \ } while(0) +#define ATOMIC_CPUMASK_COPY(mask, val) do { \ + atomic_store_rel_cpumask(&(mask).ary[0], (val).ary[0]);\ + atomic_store_rel_cpumask(&(mask).ary[1], (val).ary[1]);\ + atomic_store_rel_cpumask(&(mask).ary[2], (val).ary[2]);\ + atomic_store_rel_cpumask(&(mask).ary[3], (val).ary[3]);\ + } while(0) + #endif #define CPULOCK_EXCLBIT 0 /* exclusive lock bit number */ -- 2.11.4.GIT