pgd/pmd page error of kernel 2.6.8-rc3 when CONFIG_PREEMPT=y

From: Zhang, Yanmin <yanmin.zhang_at_intel.com>
Date: 2004-08-09 14:35:43
There is a bug in kernel 2.6.8-rc3 when CONFIG_PREEMPT=y. See the below
error log. 

Aug  6 09:23:53 tpcc-el4a3 kernel: mm/memory.c:449: bad pgd
e0000003f45f4000.
Aug  6 09:23:53 tpcc-el4a3 kernel: mm/memory.c:128: bad pgd
e000000108a00000.
Aug  6 09:23:54 tpcc-el4a3 kernel: kernel BUG at mm/page_alloc.c:792!
Aug  6 09:23:54 tpcc-el4a3 kernel: rm[16480]: bugcheck! 0 [1]
Aug  6 09:23:54 tpcc-el4a3 kernel: Modules linked in:
Aug  6 09:23:54 tpcc-el4a3 kernel:
Aug  6 09:23:54 tpcc-el4a3 kernel: Pid: 16480, CPU 1, comm:
rm
Aug  6 09:23:54 tpcc-el4a3 kernel: psr : 0000101008026018 ifs :
8000000000000288 ip  : [<a0000001000e6ca0>]    Not tainted
Aug  6 09:23:54 tpcc-el4a3 kernel: ip is at __free_pages+0x140/0x160
Aug  6 09:23:54 tpcc-el4a3 kernel: unat: 0000000000000000 pfs :
0000000000000288 rsc : 0000000000000003
Aug  6 09:23:54 tpcc-el4a3 kernel: rnat: 0009804c8a70033f bsps:
00000000000bf2fd pr  : 5666a5a655a6a969
Aug  6 09:23:54 tpcc-el4a3 kernel: ldrs: 0000000000000000 ccv :
0000000000000004 fpsr: 0009804c8a70033f
Aug  6 09:23:56 tpcc-el4a3 kernel: csd : 0000000000000000 ssd :
0000000000000000
Aug  6 09:23:56 tpcc-el4a3 kernel: b0  : a0000001000e6ca0 b6  :
a0000001000e42a0 b7  : a00000010008dae0
Aug  6 09:23:56 tpcc-el4a3 kernel: f6  : 1003e0fc0fc0fc0fc0fc1 f7  :
0ffdc8dc0000000000000
Aug  6 09:23:56 tpcc-el4a3 kernel: f8  : 1003e0000000000000280 f9  :
1003e00000000000028a0
Aug  6 09:23:56 tpcc-el4a3 kernel: f10 : 1003e0000000010400000 f11 :
1003e000000003c893320
Aug  6 09:23:56 tpcc-el4a3 kernel: r1  : a000000100aba240 r2  :
0000000000000001 r3  : e000000108c70ec0
Aug  6 09:23:56 tpcc-el4a3 kernel: r8  : 0000000000000023 r9  :
0000000000000001 r10 : 0000000000000002
Aug  6 09:23:56 tpcc-el4a3 kernel: r11 : 0000000000000003 r12 :
e000000108c77d30 r13 : e000000108c70000
Aug  6 09:23:56 tpcc-el4a3 kernel: r14 : 0000000000000004 r15 :
0000000000004000 r16 : e000000108c70ea0
Aug  6 09:23:57 tpcc-el4a3 kernel: r17 : e000000105fe7de8 r18 :
0000000000000002 r19 : 0000000000000004
Aug  6 09:23:57 tpcc-el4a3 kernel: r20 : 0000000000000001 r21 :
0000000000000000 r22 : 0000000000004000
Aug  6 09:23:57 tpcc-el4a3 kernel: r23 : e000000108c77cf0 r24 :
0000000000000003 r25 : e000000108c70eb4
Aug  6 09:23:57 tpcc-el4a3 kernel: r26 : e000000108c70eb0 r27 :
e000000108c70ec0 r28 : 0000000000000000
Aug  6 09:23:57 tpcc-el4a3 kernel: r29 : e000000108c70ec0 r30 :
0000001008022018 r31 : 0000000000000004
Aug  6 09:23:57 tpcc-el4a3 kernel:
Aug  6 09:23:57 tpcc-el4a3 kernel: Call Trace:
Aug  6 09:23:57 tpcc-el4a3 kernel:  [<a000000100015d60>]
show_stack+0x80/0xa0
Aug  6 09:23:58 tpcc-el4a3 kernel:
sp=e000000108c77900 bsp=e000000108c71370
Aug  6 09:23:58 tpcc-el4a3 kernel:  [<a00000010003c0a0>] die+0x1c0/0x2a0
Aug  6 09:23:58 tpcc-el4a3 kernel:
sp=e000000108c77ad0 bsp=e000000108c71338
Aug  6 09:23:58 tpcc-el4a3 kernel:  [<a00000010003c400>]
ia64_bad_break+0x220/0x340
Aug  6 09:23:58 tpcc-el4a3 kernel:
sp=e000000108c77ad0 bsp=e000000108c71310
Aug  6 09:23:58 tpcc-el4a3 kernel:  [<a00000010000e4e0>]
ia64_leave_kernel+0x0/0x270
Aug  6 09:23:58 tpcc-el4a3 kernel:
sp=e000000108c77b60 bsp=e000000108c71310
Aug  6 09:23:58 tpcc-el4a3 kernel:  [<a0000001000e6ca0>]
__free_pages+0x140/0x160
Aug  6 09:23:58 tpcc-el4a3 kernel:
sp=e000000108c77d30 bsp=e000000108c712d0
Aug  6 09:23:58 tpcc-el4a3 kernel:  [<a000000100058fa0>]
check_pgt_cache+0x120/0x160
Aug  6 09:23:58 tpcc-el4a3 kernel:
sp=e000000108c77d30 bsp=e000000108c71298
Aug  6 09:23:58 tpcc-el4a3 kernel:  [<a00000010010c740>]
unmap_region+0x2c0/0x400
Aug  6 09:23:58 tpcc-el4a3 kernel:
sp=e000000108c77d30 bsp=e000000108c71230
Aug  6 09:23:58 tpcc-el4a3 kernel:  [<a00000010010ce50>]
do_munmap+0x290/0x3a0
Aug  6 09:23:58 tpcc-el4a3 kernel:
sp=e000000108c77df0 bsp=e000000108c711e8
Aug  6 09:23:58 tpcc-el4a3 kernel:  [<a00000010010aeb0>]
do_mmap_pgoff+0x430/0xd60
Aug  6 09:23:58 tpcc-el4a3 kernel:
sp=e000000108c77e00 bsp=e000000108c71138
Aug  6 09:23:58 tpcc-el4a3 kernel:  [<a00000010003a350>]
sys_mmap+0x210/0x320
Aug  6 09:23:58 tpcc-el4a3 kernel:
sp=e000000108c77e30 bsp=e000000108c710a8
Aug  6 09:23:58 tpcc-el4a3 kernel:  [<a00000010000e340>]
ia64_ret_from_syscall+0x0/0x20
Aug  6 09:23:58 tpcc-el4a3 kernel:
sp=e000000108c77e30 bsp=e000000108c710a8



Basically, cpu local data 'pgtable_cache_size', 'pgd_quicklist', and
'pmd_quicklist' are not protected. When the current thread is accessing
these data, it might be preempted, and another thread might be scheduled
to run on the same cpu to change these data. 

Next is the patch to fix the bug.

diff -Nraup a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
--- a/arch/ia64/mm/init.c	2004-08-06 12:46:36.765790499 +0800
+++ b/arch/ia64/mm/init.c	2004-08-06 12:47:27.169110194 +0800
@@ -63,6 +63,7 @@ check_pgt_cache (void)
 	low = pgt_cache_water[0];
 	high = pgt_cache_water[1];
 
+	preempt_disable();
 	if (pgtable_cache_size > (u64) high) {
 		do {
 			if (pgd_quicklist)
@@ -71,6 +72,7 @@ check_pgt_cache (void)
 				free_page((unsigned
long)pmd_alloc_one_fast(0, 0));
 		} while (pgtable_cache_size > (u64) low);
 	}
+	preempt_enable();
 }
 
 void
diff -Nraup a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h
--- a/include/asm-ia64/pgalloc.h	2004-08-06 12:46:48.180829421
+0800
+++ b/include/asm-ia64/pgalloc.h	2004-08-06 12:47:27.170086756
+0800
@@ -37,14 +37,20 @@
 static inline pgd_t*
 pgd_alloc_one_fast (struct mm_struct *mm)
 {
-	unsigned long *ret = pgd_quicklist;
+	unsigned long *ret = NULL;
 
+	preempt_disable();
+
+	ret = pgd_quicklist;
 	if (likely(ret != NULL)) {
 		pgd_quicklist = (unsigned long *)(*ret);
 		ret[0] = 0;
 		--pgtable_cache_size;
 	} else
 		ret = NULL;
+
+	preempt_enable();
+
 	return (pgd_t *) ret;
 }
 
@@ -65,9 +71,11 @@ pgd_alloc (struct mm_struct *mm)
 static inline void
 pgd_free (pgd_t *pgd)
 {
+	preempt_disable();
 	*(unsigned long *)pgd = (unsigned long) pgd_quicklist;
 	pgd_quicklist = (unsigned long *) pgd;
 	++pgtable_cache_size;
+	preempt_enable();
 }
 
 static inline void
@@ -80,13 +88,19 @@ pgd_populate (struct mm_struct *mm, pgd_
 static inline pmd_t*
 pmd_alloc_one_fast (struct mm_struct *mm, unsigned long addr)
 {
-	unsigned long *ret = (unsigned long *)pmd_quicklist;
+	unsigned long *ret = NULL;
 
+	preempt_disable();
+
+	ret = (unsigned long *)pmd_quicklist;
 	if (likely(ret != NULL)) {
 		pmd_quicklist = (unsigned long *)(*ret);
 		ret[0] = 0;
 		--pgtable_cache_size;
 	}
+
+	preempt_enable();
+
 	return (pmd_t *)ret;
 }
 
@@ -103,9 +117,11 @@ pmd_alloc_one (struct mm_struct *mm, uns
 static inline void
 pmd_free (pmd_t *pmd)
 {
+	preempt_disable();
 	*(unsigned long *)pmd = (unsigned long) pmd_quicklist;
 	pmd_quicklist = (unsigned long *) pmd;
 	++pgtable_cache_size;
+	preempt_enable();
 }
 
 #define __pmd_free_tlb(tlb, pmd)	pmd_free(pmd)


Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: Zou Nanhai <nanhai.zou@intel.com>

-
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Received on Mon Aug 9 00:36:25 2004

This archive was generated by hypermail 2.1.8 : 2005-08-02 09:20:29 EST