Re: test_and_set_bit implementation

From: Zoltan Menyhart <Zoltan.Menyhart_at_bull.net>
Date: 2006-12-13 21:02:48
Matthew Wilcox wrote:

> The original code and your rewrite both access memory twice in the loop.
> Why don't we do it with one memory reference per loop instead?
> 
> {
> 	CMPXCHG_BUGCHECK_DECL
> 
> 	u32 *m = (u32 *)addr + (nr >> 5);
> 	u32 bit = 1 << (nr & 31);
> 
> 	u32 old = *m;
> 	while (!(old & bit)) {
> 		u32 new = old | bit;
> 		u32 prev = cmpxchg_acq(m, old, new);
> 		CMPXCHG_BUGCHECK(m);
> 		if (prev == old)
> 			return 1;
> 		old = prev;
> 	}
> 
> 	return 0;
> }
> 
> Looking at the disassembly of grab_block() in fs/ext2/balloc.c, I don't
> see much difference.  The ld4.acq turns into a regular ld4 (because
> 'm' is no longer tagged as volatile), and is hoisted out of the loop.
> Interestingly, gcc chooses to reorder the tests, and make the loop four
> bundles long instead of three, but will 'goto repeat' in two bundles
> instead of four.  Using likely()/unlikely() doesn't persuade gcc to
> change the order of the two branches, so I assume it actually is better
> to do it this way.

I like this code with the following slight modifications:
- let's keep "m" as pointer to volatile
- let's keep on using "__u32" types
- not sure we need "new"
- return the old bit

	volatile __u32 *m = (volatile __u32 *)addr + (nr >> 5);
	__u32 bit = 1 << (nr & 31);

	__u32 old = *m;
	while (!(old & bit)) {
		__u32 prev = cmpxchg_acq(m, old, old | bit);
		CMPXCHG_BUGCHECK(m);
		if (prev == old)
			return 0;
		old = prev;
	}
	return 1;

It compiles to:

   0:         and r15=31,r32
   6:         mov r14=1
   c:         extr r32=r32,5,27;;
  10:         shladd r18=r32,2,r33;;
  16:         ld4.acq r16=[r18]
  1c:         shl r17=r14,r15;;
  20:         and r14=r17,r16;;
  26:         nop.m 0x0
  2c:         cmp4.eq p7,p6=0,r14
  30:         nop.b 0x0
  36:         nop.b 0x0
  3c:   (p06) br.cond.dpnt.few a0 <+0xa0>
  40:         nop.m 0x0
  46:         zxt4 r14=r16
  4c:         nop.b 0x0;;
  50:         mov.m ar.ccv=r14;;
  56:         nop.m 0x0
  5c:         or r14=r17,r16
  60:         nop.m 0x0;;
  66:         cmpxchg4.acq r14=[r18],r14,ar.ccv
  6c:         nop.i 0x0;;
  70:         cmp4.eq p7,p6=r14,r16
  76:         nop.f 0x0
  7c:         and r15=r17,r14
  80:         mov r16=r14
  86:         nop.f 0x0
  8c:   (p07) br.cond.dpnt.few b0 <+0xb0>;;
  90:         nop.m 0x0
  96:         cmp4.eq p7,p6=0,r15
  9c:   (p07) br.cond.dptk.few 40 <+0x40>
  a0:         nop.m 0x0
  a6:         mov r8=1
  ac:         br.ret.sptk.many b0;;
  b0:         nop.m 0x0
  b6:         mov r8=r0
  bc:         br.ret.sptk.many b0;;

It seems to be o.k., thanks.

Zoltán Menyhárt


-
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Received on Wed Dec 13 21:31:45 2006

This archive was generated by hypermail 2.1.8 : 2006-12-13 21:31:58 EST