Re: [Linux-ia64] unalinged access by loadpair instruction

From: Hideki Yamamoto <hideki_at_hpc.bs1.fc.nec.co.jp>
Date: 2002-12-10 22:12:20
 Hi David,

>   Tony> I'd have to reconstruct from scratch (can't do it from memory,
>   Tony> those neurons have been re-assigned :-(
> OK, it looks like the fix is pretty straight-forward.  The patch below
> _should_ work, though I haven't tested it extensively.
> 
> Hideki, can you try it out?  BTW: I think your test program is buggy.

 OK, I will try to run on the Kernel applied the patch
 you sent.

> The core-loop isn't right because br.ctop renames by one register
> position, not two.  I attached a version of the test program which
> does what you wanted.

 Sorry, I did not understand why my program is buggy
 even if I saw your program. So the incremental value
 in my program is 8bytes, it means on purpose. :-)

 Thanks you for sending the patch.

End of my email
--
Yours faithfully,
Hideki Yamamoto   (V).v.(V) # Empowered by Innovation

> 
> ===== arch/ia64/kernel/unaligned.c 1.6 vs edited =====
> --- 1.6/arch/ia64/kernel/unaligned.c	Thu Mar 14 00:28:41 2002
> +++ edited/arch/ia64/kernel/unaligned.c	Mon Dec  9 18:24:54 2002
> @@ -486,7 +486,21 @@
>  	DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
>  }
>  
> -#define IA64_FPH_OFFS(r) (r - IA64_FIRST_ROTATING_FR)
> +/*
> + * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
> + * range from 32-127, result is in the range from 0-95.
> + */
> +static inline unsigned long
> +fph_index (struct pt_regs *regs, long regnum)
> +{
> +	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
> +
> +	regnum -= IA64_FIRST_ROTATING_FR;
> +	regnum += rrb_fr;
> +	if (regnum >= 96)
> +		regnum -= 96;
> +	return regnum;
> +}
>  
>  static void
>  setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
> @@ -507,7 +521,7 @@
>  	 */
>  	if (regnum >= IA64_FIRST_ROTATING_FR) {
>  		ia64_sync_fph(current);
> -		current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval;
> +		current->thread.fph[fph_index(regs, regnum)] = *fpval;
>  	} else {
>  		/*
>  		 * pt_regs or switch_stack ?
> @@ -566,7 +580,7 @@
>  	 */
>  	if (regnum >= IA64_FIRST_ROTATING_FR) {
>  		ia64_flush_fph(current);
> -		*fpval = current->thread.fph[IA64_FPH_OFFS(regnum)];
> +		*fpval = current->thread.fph[fph_index(regs, regnum)];
>  	} else {
>  		/*
>  		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
> ----------------------------------------------------
> #define n 100
> 
> double d[n],d2[n+1];
> 
> main() {
>     int i,j;
> 
>     for (i = 0; i < n; i++)  {
> 	    d[i] = i;
> 	    d2[i] = 0.0;
>     }
>     copy_by_loadpair(&d, &d2, n/2-1);
>     for (i = 0; i < n; i++) {
> 	    if (d2[i] != i)
> 		    printf("d2[%d] = %f, should be d[%d]=%f\n",
> 			   i, d2[i], i, d[i]);
>     }
> }
> 
> ----------------------------------------------------
> 	.file	"a.c"
> 	.pred.safe_across_calls p1-p5,p16-p63
> .text
> 	.align 16
> 	.global copy_by_loadpair
> 	.proc copy_by_loadpair
> copy_by_loadpair:
>         alloc   r8=ar.pfs,3,6,0,0 ;;
>         mov     r15=r32
>         mov     r2=r33
>         add     r3=8,r33
>         mov     ar.lc=r34
>         mov     pr.rot=0x10000
>         mov     ar.ec=5 ;;
> L1:
>   (p16) ldfpd   f32,f37=[r15],16
>   (p20) stfd    [r2]=f36,16
>   (p20) stfd    [r3]=f41,16
>         br.ctop.sptk    L1;;
>         br.ret.sptk.many        b0 ;;
> 	.endp get_by_loadpair
> 
> _______________________________________________
> Linux-IA64 mailing list
> Linux-IA64@linuxia64.org
> http://lists.linuxia64.org/lists/listinfo/linux-ia64
> 
Received on Tue Dec 10 03:16:45 2002

This archive was generated by hypermail 2.1.8 : 2005-08-02 09:20:11 EST