accessed/dirty bit handler tuning

From: Zoltan Menyhart <Zoltan.Menyhart_at_bull.net>
Date: 2006-03-16 00:29:30
This patch is based on the Christoph's one entitled "Fix race in the
accessed/dirty bit handlers".

- It adds the lacking "srlz.d"
- It uses some "nta" and "bias" cache hints
- It slightly reorganizes the routines for some minor performance improvements

Thanks,

Zoltan



Signed-off-by: Zoltan Menyhart <Zoltan.Menyhart@bull.net>

Index: linux-2.6.16-rc5-mm3/arch/ia64/kernel/ivt.S
===================================================================
--- old/arch/ia64/kernel/ivt.S	2006-03-15 12:01:23.000000000 +0100
+++ new/arch/ia64/kernel/ivt.S	2006-03-15 14:11:46.000000000 +0100
@@ -557,29 +557,59 @@ ENTRY(dirty_bit)
 #ifdef CONFIG_SMP
 	mov r28=ar.ccv				// save ar.ccv
 	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
+	/*
+	 * The atomic instructions are handled exclusively by the L2 (L2D) cache.
+	 * "bias" is a hint to acquire exclusive ownership.
+	 * "nta": allocate the cache line only in L2 and to bias it to be replaced.
+	 */
+1:	ld8.bias.nta r18 = [r17]
+	;;
 	mov ar.ccv=r18				// set compare value for cmpxchg
 	or r25=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
 	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
-	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only update if page is present
-	mov r24=PAGE_SHIFT<<2
-	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only compare if page is present
-	;;
-(p6)	itc.d r25				// install updated PTE
+	mov r24 = PAGE_SHIFT << 2
 	;;
 	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
+	 * "nta" is a hint not to allocate the cache line elsewhere than in L2,
+	 * to bias it to be replaced and not to write it back into L3.
+	 *
+	 * We do not care for the result of "cmpxchg". It only makes sure we do not
+	 * overwrite a PTE that has been modified by someone else in the mean time.
+	 * We'll read back the in memory PTE later.
 	 */
-	dv_serialize_data
-
-	ld8 r18=[r17]				// read PTE again
+(p6)	cmpxchg8.acq.nta r26 = [r17],r25,ar.ccv	// Only update if page is present
+	/*
+	 * We load the new translation independently of the success of "cmpxchg". 
+	 * Should "cmpxchg" have failed, we'll purge the new translation later.
+	 */
+(p6)	itc.d r25				// Install updated PTE if page is present
+	;;					// "itc" must be the last in the group
+	/*
+	 * We make sure the visibility of "itc" to generated purges (like "ptc.ga")
+	 * before we re-read the PTE.
+	 * (No, we are not going to use the freshly inserted translation for the next
+	 * "ld".)
+	 * A simple ";;" does not make sure that the purges / invalidations go all the
+	 * way down. E.g. in case of page size of 64 K, up to 16 L1 DTLB entries may be
+	 * purged and all the L1D cache lines brought in via these translations need to
+	 * be invalidated.
+	 */
+(p6)	srlz.d
+	/*
+	 * No need for ";;", the following "ld" can be in the same group as "srlz.d" is.
+	 */
+(p6)	ld8.nta r18 = [r17]			// Read PTE again
 	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+(p6)	cmp.eq p0, p7 = r18, r25		// Is it same as we wanted to install?
 	;;
+	/*
+	 * The new translation (or the old one if "p6" is off) gets purged if:
+	 * - the page is not present
+	 * - the in memory PTE is not what we wanted to write out because:
+	 *   + someone else has modified it after our successful "cmpxchg"
+	 *   + "cmpxchg" has failed (with the exception when someone else has set the
+	 *     very same dirty bit as we wanted to => our new translation is correct)
+	 */
 (p7)	ptc.l r16,r24
 	mov b0=r29				// restore b0
 	mov ar.ccv=r28
@@ -602,7 +632,10 @@ END(dirty_bit)
 // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
 ENTRY(iaccess_bit)
 	DBG_FAULT(9)
-	// Like Entry 8, except for instruction access
+	/*
+	 * Like Entry 8, except for instruction access.
+	 * For the remarks on cache hints and synchronization issues see there.
+	 */
 	mov r16=cr.ifa				// get the address that caused the fault
 	movl r30=1f				// load continuation point in case of nested fault
 	mov r31=pr				// save predicates
@@ -623,28 +656,20 @@ ENTRY(iaccess_bit)
 #ifdef CONFIG_SMP
 	mov r28=ar.ccv				// save ar.ccv
 	;;
-1:	ld8 r18=[r17]
+1:	ld8.bias.nta r18 = [r17]
 	;;
 	mov ar.ccv=r18				// set compare value for cmpxchg
 	or r25=_PAGE_A,r18			// set the accessed bit
 	tbit.z p7,p6 = r18,_PAGE_P_BIT	 	// Check present bit
+	mov r24 = PAGE_SHIFT << 2
 	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page present
-	mov r24=PAGE_SHIFT<<2
-	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only if page present
-	;;
-(p6)	itc.i r25				// install updated PTE
+(p6)	cmpxchg8.acq.nta r26 = [r17],r25,ar.ccv	// Only update if page is present
+(p6)	itc.i r25				// Install updated PTE if page is present
 	;;
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-
-	ld8 r18=[r17]				// read PTE again
+(p6)	srlz.d
+(p6)	ld8.nta r18 = [r17]			// Read PTE again
 	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+(p6)	cmp.eq p0, p7 = r18, r25		// Is it same as we wanted to install?
 	;;
 (p7)	ptc.l r16,r24
 	mov b0=r29				// restore b0
@@ -668,7 +693,10 @@ END(iaccess_bit)
 // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
 ENTRY(daccess_bit)
 	DBG_FAULT(10)
-	// Like Entry 8, except for data access
+	/*
+	 * Like Entry 8, except for data access.
+	 * For the remarks on cache hints and synchronization issues see there.
+	 */
 	mov r16=cr.ifa				// get the address that caused the fault
 	movl r30=1f				// load continuation point in case of nested fault
 	;;
@@ -678,27 +706,20 @@ ENTRY(daccess_bit)
 #ifdef CONFIG_SMP
 	mov r28=ar.ccv				// save ar.ccv
 	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
+1:	ld8.bias.nta r18 = [r17]
+	;;
 	mov ar.ccv=r18				// set compare value for cmpxchg
 	or r25=_PAGE_A,r18			// set the dirty bit
 	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
+	mov r24 = PAGE_SHIFT << 2
 	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page is present
-	mov r24=PAGE_SHIFT<<2
-	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only if page is present
-	;;
-(p6)	itc.d r25				// install updated PTE
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
+(p6)	cmpxchg8.acq.nta r26 = [r17],r25,ar.ccv	// Only update if page is present
+(p6)	itc.d r25				// Install updated PTE if page is present
 	;;
-	ld8 r18=[r17]				// read PTE again
+(p6)	srlz.d
+(p6)	ld8.nta r18 = [r17]			// Read PTE again
 	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+(p6)	cmp.eq p0, p7 = r18, r25		// Is it same as we wanted to install?
 	;;
 (p7)	ptc.l r16,r24
 	mov ar.ccv=r28

-
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Received on Thu Mar 16 00:34:57 2006

This archive was generated by hypermail 2.1.8 : 2006-03-16 00:35:06 EST