Re: [Linux-ia64] [PATCH] Performance Patch for TLB misses

From: Mario Smarduch <cms063_at_email.mot.com>
Date: 2002-11-08 04:14:54
Christian Hildner wrote:

> Hi,
>
> I created a patch that will (hopefully) increase the performance for TLB
> miss handling.
>
> - enlarge the distance between rsm psr.dt and srlz (the movl do not need
> data access)
>
> - let the processor check for the case of page not present (because for
> useable systems the common case is that the page is present, so optimize
> for that and drop the conditional branch)
>
> - let the processor also check for privilege level because it's very
> very rare

Wouldn't removal of this possibly cause cached and uncahced mappings
to same page?

- mario.

>
>
> The second modification will result (rarely) in the insertion of an
> unused TLB entry. But this entry will be overwritten with the useable
> entry when the page is swapped in. The time needed for the insertion of
> the TLB entry and the additional interruption will be neglectible
> compared to the time needed for swapping in.
>
> Since I'am waiting for Itanium 2 (Itanium 1 does not have any relevance
> nowadays) I have done no performance testing. All Itanium 2 owners are
> invited for testing.
>
> Please tell me your comments.
>
> Christian
>
>   ------------------------------------------------------------------------
> --- ivt.S_orig  Tue Mar 19 11:22:28 2002
>  ivt.S  Thu Nov  7 13:53:02 2002
> @@ -114,8 +114-,8 @@
>          *      - the faulting virtual address has no L1, L2, or L3 mapping
>          */
>         mov r16=cr.ifa                          // get address that caused the TLB miss
> -       ;;
>         rsm psr.dt                              // use physical addressing for data
>         ;;
>         mov r31=pr                              // save the predicate registers
>         mov r19=IA64_KR(PT_BASE)                // get page table base address
>         shl r21=r16,3                           // shift bit 60 into sign bit
> @@ -125,7 +124-,6 @@
>         shr.u r18=r16,PGDIR_SHIFT               // get bits 33-63 of the faulting address
>         ;;
>  (p7)   dep r17=r17,r19,(PAGE_SHIFT-3),3        // put region number bits in place
> -       srlz.d                                  // ensure "rsm psr.dt" has taken effect
>  (p6)   movl r19=__pa(SWAPPER_PGD_ADDR)         // region 5 is rooted at swapper_pg_dir
>  (p6)   shr.u r21=r21,PGDIR_SHIFT+PAE-_SHIFT
>  (p7)   shr.u r21=r21,PGDIR_SHIFT+PAE-_SHIFT-3
> @@ -135,6 +134-,7 @@
>         cmp.eq p7,p6=0,r21                      // unused address bits all zeroes?
>         shr.u r18=r16,PMD_SHIFT                 // shift L2 index into position
>         ;;
>         srlz.d                                  // ensure "rsm psr.dt" has taken effect
>         ld8 r17=[r17]                           // fetch the L1 entry (may be 0)
>         ;;
>  (p7)   cmp.eq p6,p7=r17,r0                     // was L1 entry NULL?
> @@ -212,9 +210-,6 @@
>  1:     ld8 r18=[r17]                           // read L3 PTE
>         ;;
>         mov b0=r29
> -       tbit.z p6,p0=r18,_PAGE_P_BIT            // page present bit cleared?
> -(p6)   br.cond.spnt page_fault
> -       ;;
>         itc.i r18
>         ;;
>  #ifdef CONFIG_SMP
> @@ -250,9 +244-,6 @@
>  1:     ld8 r18=[r17]                           // read L3 PTE
>         ;;
>         mov b0=r29
> -       tbit.z p6,p0=r18,_PAGE_P_BIT            // page present bit cleared?
> -(p6)   br.cond.spnt page_fault
> -       ;;
>         itc.d r18
>         ;;
>  #ifdef CONFIG_SMP
> @@ -274,12 +268-,11 @@
>         DBG_FAULT(3)
>         mov r16=cr.ifa          // get address that caused the TLB miss
>         movl r17=PAGE_KERNEL
> -       mov r21=cr.ipsr
>         movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & +AH4-0xfff)
>         mov r31=pr
>         ;;
>  #ifdef CONFIG_DISABLE_VHPT
> -       shr.u r22=r16,61                        // get the region number into r21
>         shr.u r22=r16,61                        // get the region number
>         ;;
>         cmp.gt p8,p0=6,r22                      // user mode
>         ;;
> @@ -289,16 +280-,13 @@
>  (p8)   mov r29=b0                              // save b0
>  (p8)   br.cond.dptk itlb_fault
>  #endif
> -       extr.u r23=r21,IA64_PSR_CPL0_BIT,2      // extract psr.cpl
>         and r19=r19,r16         // clear ed, reserved bits, and PTE control bits
>         shr.u r18=r16,57        // move address bit 61 to bit 4
>         ;;
>         andcm r18=0x10,r18      // bit 4=+AH4-address-bit(61)
> -       cmp.ne p8,p0=r0,r23     // psr.cpl != 0?
>         or r19=r17,r19          // insert PTE control bits into r19
>         ;;
>         or r19=r19,r18          // set bit 4 (uncached) if the access was to region 6
> -(p8)   br.cond.spnt page_fault
>         ;;
>         itc.i r19               // insert the TLB entry
>         mov pr=r31,-1
> @@ -318,7 +308-,7 @@
>         mov r31=pr
>         ;;
>  #ifdef CONFIG_DISABLE_VHPT
> -       shr.u r22=r16,61                        // get the region number into r21
>         shr.u r22=r16,61                        // get the region number
>         ;;
>         cmp.gt p8,p0=6,r22                      // access to region 0-5
>         ;;
> @@ -328,15 +318-,11 @@
>  (p8)   mov r29=b0                              // save b0
>  (p8)   br.cond.dptk dtlb_fault
>  #endif
> -       extr.u r23=r21,IA64_PSR_CPL0_BIT,2      // extract psr.cpl
>         tbit.nz p6,p7=r20,IA64_ISR_SP_BIT       // is speculation bit on?
>         and r19=r19,r16         // clear ed, reserved bits, and PTE control bits
>         shr.u r18=r16,57        // move address bit 61 to bit 4
>         ;;
>         andcm r18=0x10,r18      // bit 4=+AH4-address-bit(61)
> -       cmp.ne p8,p0=r0,r23
> -(p8)   br.cond.spnt page_fault
> -
>         dep r21=-1,r21,IA64_PSR_ED_BIT,1
>         or r19=r19,r17          // insert PTE control bits into r19
>         ;;
> @@ -1412,3 +138-,5 @@
>  // 0x7f00 Entry 67 (size 16 bundles) Reserved
>         DBG_FAULT(67)
>         FAULT(67)
Received on Thu Nov 07 09:15:06 2002

This archive was generated by hypermail 2.1.8 : 2005-08-02 09:20:10 EST