Follow up on previous discussion, this patch optimize how we handle r8/r10 in syscall return path. If there are no pending work to be done, we will skip storing/loading r8/r10, cutting out 4 memory references in the fast path. This resulted a net of 4 cycles saving. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Rohit Seth <rohit.seth@intel.com> --- linux-ia64-release/arch/ia64/kernel/entry.S.orig 2005-01-26 11:41:24.000000000 -0800 +++ linux-ia64-release/arch/ia64/kernel/entry.S 2005-01-26 12:31:52.000000000 -0800 @@ -558,7 +558,8 @@ GLOBAL_ENTRY(ia64_trace_syscall) .mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8 .mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10 br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value -.ret3: br.cond.sptk ia64_leave_syscall + cmp.eq p9,p8=r0,r0 + br.cond.sptk ia64_leave_syscall strace_error: ld8 r3=[r2] // load pt_regs.r8 @@ -619,12 +620,10 @@ END(ia64_ret_from_clone) // fall through GLOBAL_ENTRY(ia64_ret_from_syscall) PT_REGS_UNWIND_INFO(0) + cmp.eq p8,p9=r0,r0 // p8: ret val in live reg, p9: ret val in pt_regs cmp.ge p6,p7=r8,r0 // syscall executed successfully? adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 - adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 - ;; -(p6) st8 [r2]=r8 // store return value in slot for r8 -(p6) st8 [r3]=r0 // clear error indication in slot for r10 + mov r10=r0 // clear error indication in r10 (p7) br.cond.spnt handle_syscall_error // handle potential syscall failure END(ia64_ret_from_syscall) // fall through @@ -715,10 +714,10 @@ ENTRY(ia64_leave_syscall) ;; mov r16=ar.bsp // M2 get existing backing store pointer (p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending? -(p6) br.cond.spnt .work_pending +(p6) br.cond.spnt .work_pending_syscall ;; // start restoring the state saved on the kernel stack (struct pt_regs): - ld8 r8=[r2],16 +(p9) ld8 r8=[r2],16 ld8 r9=[r3],16 mov f6=f0 // clear f6 ;; @@ -726,9 +725,10 @@ ENTRY(ia64_leave_syscall) rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection mov f9=f0 // clear f9 - ld8 r10=[r2],16 + .pred.rel.mutex p8,p9 +(p9) ld8 r10=[r2],16 ld8 r11=[r3],16 - mov f7=f0 // clear f7 +(p8) add r2=32,r2 ;; ld8 r29=[r2],16 // load cr.ipsr ld8 r28=[r3],16 // load cr.iip @@ -760,7 +760,7 @@ ENTRY(ia64_leave_syscall) ;; srlz.d // M0 ensure interruption collection is off ld8.fill r13=[r3],16 - nop.i 0 + mov f7=f0 // clear f7 ;; ld8.fill r12=[r2] // restore r12 (sp) ld8.fill r15=[r3] // restore r15 @@ -770,8 +770,8 @@ ENTRY(ia64_leave_syscall) (pUStk) st1 [r14]=r17 mov b6=r18 // I0 restore b6 ;; - shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition mov r14=r0 // clear r14 + shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition (pKStk) br.cond.dpnt.many skip_rbs_switch mov.m ar.ccv=r0 // clear ar.ccv @@ -1083,6 +1083,10 @@ skip_rbs_switch: * On exit: * p6 = TRUE if work-pending-check needs to be redone */ +.work_pending_syscall: +(p8) st8 [r2]=r8,16 + ;; +(p8) st8 [r2]=r10,16 .work_pending: tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context? (p6) br.cond.sptk.few .sigdelayed @@ -1104,12 +1108,14 @@ skip_rbs_switch: ;; (pKStk) st4 [r20]=r0 // preempt_count() <- 0 #endif + cmp.eq p9,p8=r0,r0 (pLvSys)br.cond.sptk.many .work_processed_syscall // re-check br.cond.sptk.many .work_processed_kernel // re-check .notify: (pUStk) br.call.spnt.many rp=notify_resume_user .ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 + cmp.eq p9,p8=r0,r0 (pLvSys)br.cond.sptk.many .work_processed_syscall // don't re-check br.cond.sptk.many .work_processed_kernel // don't re-check @@ -1121,6 +1127,7 @@ skip_rbs_switch: .sigdelayed: br.call.sptk.many rp=do_sigdelayed cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check + cmp.eq p9,p8=r0,r0 (pLvSys)br.cond.sptk.many .work_processed_syscall // re-check br.cond.sptk.many .work_processed_kernel // re-check @@ -1135,17 +1142,11 @@ ENTRY(handle_syscall_error) */ PT_REGS_UNWIND_INFO(0) ld8 r3=[r2] // load pt_regs.r8 - sub r9=0,r8 // negate return value to get errno ;; - mov r10=-1 // return -1 in pt_regs.r10 to indicate error cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0? - adds r3=16,r2 // r3=&pt_regs.r10 - ;; -(p6) mov r9=r8 -(p6) mov r10=0 ;; - st8 [r2]=r9 // store errno in pt_regs.r8 - st8 [r3]=r10 // store error indication in pt_regs.r10 +(p7) mov r10=-1 +(p7) sub r8=0,r8 // negate return value to get errno br.cond.sptk ia64_leave_syscall END(handle_syscall_error) - To unsubscribe from this list: send the line "unsubscribe linux-ia64" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.htmlReceived on Wed Jan 26 16:03:18 2005
This archive was generated by hypermail 2.1.8 : 2005-08-02 09:20:35 EST