--- //depot/projects/smpng/sys/amd64/include/atomic.h 2011-01-05 17:06:25.000000000 0000 +++ //depot/user/jhb/ktrace/amd64/include/atomic.h 2011-01-05 22:08:56.000000000 0000 @@ -81,7 +81,7 @@ u_int atomic_fetchadd_int(volatile u_int *p, u_int v); u_long atomic_fetchadd_long(volatile u_long *p, u_long v); -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_STORE_LOAD(TYPE) \ u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) @@ -213,13 +213,12 @@ #if defined(_KERNEL) && !defined(SMP) /* - * We assume that a = b will do atomic loads and stores. However, on a - * PentiumPro or higher, reads may pass writes, so for that case we have - * to use a serializing instruction (i.e. with LOCK) to do the load in - * SMP kernels. For UP kernels, however, the cache of the single processor + * We assume that a = b will do atomic loads and stores. However, reads + * may pass writes, so we have to use fences in SMP kernels to preserve + * ordering. For UP kernels, however, the cache of the single processor * is always consistent, so we only need to take care of compiler. */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_STORE_LOAD(TYPE) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ @@ -240,32 +239,22 @@ #else /* !(_KERNEL && !SMP) */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_STORE_LOAD(TYPE) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ - u_##TYPE res; \ + u_##TYPE v; \ \ - __asm __volatile(MPLOCKED LOP \ - : "=a" (res), /* 0 */ \ - "=m" (*p) /* 1 */ \ - : "m" (*p) /* 2 */ \ - : "memory", "cc"); \ - \ - return (res); \ + v = *p; \ + __asm __volatile("lfence" ::: "memory"); \ + return (v); \ } \ \ -/* \ - * The XCHG instruction asserts LOCK automagically. \ - */ \ static __inline void \ atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ - __asm __volatile(SOP \ - : "=m" (*p), /* 0 */ \ - "+r" (v) /* 1 */ \ - : "m" (*p) /* 2 */ \ - : "memory"); \ + __asm __volatile("sfence" ::: "memory"); \ + *p = v; \ } \ struct __hack @@ -293,10 +282,10 @@ ATOMIC_ASM(add, long, "addq %1,%0", "ir", v); ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v); -ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0"); -ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0"); -ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1", "xchgl %1,%0"); -ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); +ATOMIC_STORE_LOAD(char); +ATOMIC_STORE_LOAD(short); +ATOMIC_STORE_LOAD(int); +ATOMIC_STORE_LOAD(long); #undef ATOMIC_ASM #undef ATOMIC_STORE_LOAD