Re: accessed/dirty bit handler tuning

From: Luck, Tony <tony.luck_at_intel.com>
Date: 2006-03-30 09:57:40
Ian,

Yes ... I think I goofed when mailing to Zoltan and the list ... the copy
never showed up on the list.  Here is the version of the patch:

-Tony

---

diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 829a43c..86123c1 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -552,48 +552,56 @@ ENTRY(dirty_bit)
 	movl r30=1f				// load continuation point in case of nested fault
 	;;
 	thash r17=r16				// compute virtual address of L3 PTE
+	mov r31=pr
 	mov r29=b0				// save b0 in case of nested fault
-	mov r31=pr				// save pr
 #ifdef CONFIG_SMP
 	mov r28=ar.ccv				// save ar.ccv
 	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
+1:	ld8.bias.nta r18=[r17]
+	;;
 	mov ar.ccv=r18				// set compare value for cmpxchg
 	or r25=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
 	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
 	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only update if page is present
-	mov r24=PAGE_SHIFT<<2
-	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only compare if page is present
-	;;
+	/*
+	 * We do not test for the result of "cmpxchg". It only makes sure we do not
+	 * overwrite a PTE that has been modified by someone else in the mean time.
+	 * We'll read back the in memory PTE later.
+	 */
+(p6)	cmpxchg8.acq.nta r26=[r17],r25,ar.ccv	// Only update if page is present
 (p6)	itc.d r25				// install updated PTE
 	;;
 	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
+	 * We make sure itc.d completes before re-read the PTE.
 	 */
-	dv_serialize_data
-
-	ld8 r18=[r17]				// read PTE again
+(p6)	srlz.d
+(p6)	ld8.nta r18=[r17]				// Read PTE again
 	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+(p6)	cmp.eq p0,p7=r18,r25			// Is it same as we wanted to install?
+	mov r24=PAGE_SHIFT << 2
 	;;
+	/*
+	 * The new translation (or the old one if "p6" is off) gets purged if:
+	 * - the page is not present
+	 * - the in memory PTE is not what we wanted to write out because:
+	 *   + someone else has modified it after our successful "cmpxchg"
+	 *   + "cmpxchg" has failed (with the exception when someone else has set the
+	 *     very same dirty bit as we wanted to => our new translation is correct)
+	 */
 (p7)	ptc.l r16,r24
 	mov b0=r29				// restore b0
 	mov ar.ccv=r28
 #else
 	;;
 1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
+	;;
 	or r18=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
 	mov b0=r29				// restore b0
 	;;
 	st8 [r17]=r18				// store back updated PTE
 	itc.d r18				// install updated PTE
 #endif
-	mov pr=r31,-1				// restore pr
+	mov pr=r31,-1
 	rfi
 END(dirty_bit)
 
@@ -602,7 +610,10 @@ END(dirty_bit)
 // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
 ENTRY(iaccess_bit)
 	DBG_FAULT(9)
-	// Like Entry 8, except for instruction access
+	/*
+	 * Like Entry 8, except for instruction access.
+	 * For the remarks on cache hints and synchronization issues see there.
+	 */
 	mov r16=cr.ifa				// get the address that caused the fault
 	movl r30=1f				// load continuation point in case of nested fault
 	mov r31=pr				// save predicates
@@ -623,33 +634,25 @@ #endif /* CONFIG_ITANIUM */
 #ifdef CONFIG_SMP
 	mov r28=ar.ccv				// save ar.ccv
 	;;
-1:	ld8 r18=[r17]
+1:	ld8.bias.nta r18=[r17]
 	;;
 	mov ar.ccv=r18				// set compare value for cmpxchg
 	or r25=_PAGE_A,r18			// set the accessed bit
-	tbit.z p7,p6 = r18,_PAGE_P_BIT	 	// Check present bit
+	tbit.z p7,p6=r18,_PAGE_P_BIT		// Check present bit
 	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page present
-	mov r24=PAGE_SHIFT<<2
+(p6)	cmpxchg8.acq.nta r26=[r17],r25,ar.ccv	// Only update if page is present
+(p6)	itc.i r25				// Install updated PTE if page is present
 	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only if page present
-	;;
-(p6)	itc.i r25				// install updated PTE
-	;;
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-
-	ld8 r18=[r17]				// read PTE again
+(p6)	srlz.d
+(p6)	ld8.nta r18=[r17]				// Read PTE again
 	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+(p6)	cmp.eq p0,p7=r18,r25			// Is it same as we wanted to install?
+	mov r24=PAGE_SHIFT << 2
 	;;
 (p7)	ptc.l r16,r24
 	mov b0=r29				// restore b0
 	mov ar.ccv=r28
-#else /* !CONFIG_SMP */
+#else
 	;;
 1:	ld8 r18=[r17]
 	;;
@@ -658,7 +661,7 @@ #else /* !CONFIG_SMP */
 	;;
 	st8 [r17]=r18				// store back updated PTE
 	itc.i r18				// install updated PTE
-#endif /* !CONFIG_SMP */
+#endif
 	mov pr=r31,-1
 	rfi
 END(iaccess_bit)
@@ -668,50 +671,47 @@ END(iaccess_bit)
 // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
 ENTRY(daccess_bit)
 	DBG_FAULT(10)
-	// Like Entry 8, except for data access
+	/*
+	 * Like Entry 8, except for data access.
+	 * For the remarks on cache hints and synchronization issues see there.
+	 */
 	mov r16=cr.ifa				// get the address that caused the fault
 	movl r30=1f				// load continuation point in case of nested fault
 	;;
 	thash r17=r16				// compute virtual address of L3 PTE
 	mov r31=pr
-	mov r29=b0				// save b0 in case of nested fault)
+	mov r29=b0				// save b0 in case of nested fault
 #ifdef CONFIG_SMP
 	mov r28=ar.ccv				// save ar.ccv
-	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
-	mov ar.ccv=r18				// set compare value for cmpxchg
-	or r25=_PAGE_A,r18			// set the dirty bit
-	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
 	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page is present
-	mov r24=PAGE_SHIFT<<2
+1:	ld8.bias.nta r18=[r17]
 	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only if page is present
+	mov ar.ccv=r18				// set compare value for cmpxchg
+	or r25=_PAGE_A,r18			// set the accessed bit
+	tbit.z p7,p6=r18,_PAGE_P_BIT		// Check present bit
 	;;
-(p6)	itc.d r25				// install updated PTE
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
+(p6)	cmpxchg8.acq.nta r26=[r17],r25,ar.ccv	// Only update if page is present
+(p6)	itc.d r25				// Install updated PTE if page is present
 	;;
-	ld8 r18=[r17]				// read PTE again
+(p6)	srlz.d
+(p6)	ld8.nta r18=[r17]				// Read PTE again
 	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+(p6)	cmp.eq p0,p7=r18,r25			// Is it same as we wanted to install?
+	mov r24=PAGE_SHIFT << 2
 	;;
 (p7)	ptc.l r16,r24
+	mov b0=r29				// restore b0
 	mov ar.ccv=r28
 #else
 	;;
 1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
+	;;
 	or r18=_PAGE_A,r18			// set the accessed bit
+	mov b0=r29				// restore b0
 	;;
 	st8 [r17]=r18				// store back updated PTE
 	itc.d r18				// install updated PTE
 #endif
-	mov b0=r29				// restore b0
 	mov pr=r31,-1
 	rfi
 END(daccess_bit)
-
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Received on Thu Mar 30 10:00:06 2006

This archive was generated by hypermail 2.1.8 : 2006-03-30 10:02:19 EST