arch/ia64/Kconfig | 6 arch/ia64/kernel/efi.c | 12 + arch/ia64/kernel/ivt.S | 35 ++- arch/ia64/kernel/mca.c | 13 + arch/ia64/mm/fault.c | 6 arch/ia64/mm/init.c | 32 +++ fs/exec.c | 5 include/asm-ia64/ivt.h | 32 ++- include/asm-ia64/page.h | 35 ++- include/asm-ia64/pgalloc.h | 28 ++- include/asm-ia64/pgtable.h | 201 ++++++++++++++++++++-- include/linux/kernel.h | 8 include/linux/mm.h | 19 ++ kernel/sysctl.c | 9 + mm/Makefile | 1 mm/fremap.c | 4 mm/highmem.c | 5 mm/memory.c | 105 ++++++++++- mm/mmap.c | 33 +++ mm/mremap.c | 4 mm/page_alloc.c | 5 mm/super_page.c | 403 +++++++++++++++++++++++++++++++++++++++++++++ mm/swapfile.c | 5 mm/vmalloc.c | 4 24 files changed, 933 insertions(+), 77 deletions(-) Index: linux-2.6/mm/super_page.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6/mm/super_page.c 2005-09-23 12:05:17.000000000 +1000 @@ -0,0 +1,422 @@ +/* + * Linux Super Page internal functions. + */ + +#define SUPER_PAGE_DEBUG 0 + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include /* required for EXPORT_SYMBOL */ + +/* We use arbitrary high number for the sysctl. You may have to change it.*/ +#define CTL_SUPER_PAGE 4558 + +#define CTL_SET_ON 1 +#define CTL_SET_NR 2 +#define CTL_SET_ALIGN 3 +#define CTL_SET_BITMASK 4 +#define CTL_SET_LOGRES 5 + +int super_page_on = 0; /* We start without super_page at first. */ +int super_page_nr = SUPER_PAGE_NR; +int super_page_vm_align = 0; /* We start without super_page align at first. */ +int super_page_tail_align = 0; /* We start without super_page tail align at first. */ +int super_page_bitmask = (1<mm; + + spin_lock(&mm->page_table_lock); + + sp_printk("[set_sp_range] setting an order %d superpage at %lx\n", order, address); + + pgd = pgd_offset(mm, address); + if (!pgd) { + sp_printk(KERN_ERR "pgd_none() failed\n"); + goto out_error; + } + + pud = pud_alloc(mm, pgd, address); + if (!pud) { + sp_printk(KERN_ERR "pud_none() failed %p (%lx)\n", pud, *(unsigned long*)pud); + goto out_error; + } + + pmd = pmd_alloc(mm, pud, address); + if (!pmd) { + sp_printk(KERN_ERR "pmd_none() failed\n"); + goto out_error; + } + + //address &= ~PGDIR_MASK; + pte = pte_alloc_map(mm, pmd, address); + + if (!pte) + goto out_error; + + /* Find consecutive ptes */ + for (i = 0; i < (1 << order); i++) + if (!pte_none(*(pte + i))) + goto out_error; + +#if 1 + for (i = 0; i < (1 << order); i++) + { + itir_ps_modify(pte, order); + sp_printk(KERN_ERR "[set_sp_range] itir_ps_modify(%p, %d) (itir is %lx)\n", + pte, order, pte_itir(*pte)); + pte += 1; + } +#endif + pte_unmap(pte); + + spin_unlock(&mm->page_table_lock); + + return; + out_error: + spin_unlock(&mm->page_table_lock); + sp_printk(KERN_WARNING "[set_sp_range] failed?\n"); + return; +} + +/** + * make_ptes_present - blah + * @addr: stammrt address + * @end: end address + * + * When given fresh pages from mmap or similar we can setup a + * superpage to cover them. + */ + + +static int find_biggest_superpage(unsigned long start, unsigned long end) +{ + unsigned long nbits; + unsigned long size = end - start; + unsigned long mask = 0x115557000UL; + + nbits = ia64_fls(size + 0xfff); + while (unlikely (((1UL << nbits) & mask) == 0) && (nbits < ia64_fls(mask))) + ++nbits; + if (nbits > ia64_fls(mask)) + nbits = ia64_fls(mask); + sp_printk("[find_biggest_superpage] start:%lx end:%lx nbits:%d\n", start, end, nbits); + return nbits - PAGE_SHIFT; +} + + + +/* static int find_biggest_superpage(unsigned long start, unsigned long end) */ +/* { */ +/* unsigned order = 7; */ + +/* /\* Find alignment of start of region *\/ */ +/* while (order && (start & ((PAGE_SIZE << order)-1))) */ +/* order--; */ + +/* /\* Shrink until it fits *\/ */ +/* while (order && (start + (PAGE_SIZE< end)) */ +/* order--; */ +/* sp_printk("[find_biggest_superpage] %d @ %lx\n", order, start); */ +/* return order; */ +/* } */ + + + +/* + * Simplistic new page table allocation for sys_brk.. + * Only GH bit != 0 tables will be allocated. (Note GH is the granularity hint bit in an alpha pte) + * At this time, we will not allocate real storage, it remains + * for the page_fault handler. + */ + +int make_ptes_present(unsigned long addr, unsigned long end) +{ + unsigned long start; + int order = 0; + if (addr >= end) + BUG(); + + start = addr; + sp_printk("[make_ptes_present] begin allocation\n"); + while (start < end) + { + order = find_biggest_superpage(start, end); + set_sp_range(start, order); + start += PAGE_SIZE << order; + } + sp_printk("[make_ptes_present] done\n"); + + return 0; +} + +/** + * adj_sp_pte - blah + * @mm: + * @zip: + * @address: + * @order: the superpage order + * + * Short + * + * Long + */ +void adj_sp_pte(struct mm_struct *mm, int zap, + unsigned long address, int order) + { + int i; + int downgrade = 0; + pgd_t *pgd; + pmd_t *pmd; + pud_t *pud; + pte_t *pte; + + sp_printk("[adj_sp_pte] staring zap = %d, address = %lx, order =%d\n", zap, address, order); + + pgd = pgd_offset(mm, address); + pud = pud_offset(pgd, address); + if (pud_none(*pud)) + return; + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd)) + return; + pte = pte_offset_map(pmd, address); + if (pte_none(*pte)) + return; + + /* for all pages in order size superpage check if the order + * of that page is bigger than the given order, and if so + * downgrade it. + */ + + sp_printk("[adj_sp_pte] address %lx, pte %p\n", address, pte); + + if (zap) + { + sp_printk("[adj_sp_pte] zapping, just calling clear_pte_sp()\n"); + clear_pte_sp(pte); + return; + } + if (downgrade) + clear_pmd_sp(pmd); + + for (i = 0; i < (1 << order) ; i++) + { + retry: + if (pte_to_sp(pte[i]) > order) + { + sp_printk("[adj_sp_pte] %p order (%d) > order %d\n", + (pte + i), + (int)pte_to_sp(pte[i]), + order); + + down_pte_sp(pte+i); + downgrade=1; + goto retry; + } + sp_printk("[adj_sp_pte] next page\n"); + } + + return; + } + +/* + * sys_mprotect -> mprotect_fixup -> change_protection -> adj_sp_range + */ +void adj_sp_range(struct mm_struct *mm, int zap, + unsigned long addr, unsigned long end) +{ + BUG_ON(addr >= end); + + sp_printk("[adj_sp_range] zap:%d mm:%p, addr:%0lx, end:%0lx\n",zap,mm,addr,end); + sp_printk("[adj_sp_range] %lu pages in that range \n", (end-addr)/PAGE_SIZE); + + adj_sp_pte(mm, zap, addr, 0); + + return; +} + +void __break_area (struct page *page, int order) +{ + int i; + int size = 1 << order; + + for (i = 0; i < size; i++) { + set_page_count(page + i, 1); + } + return; +} + +/* break down a superpage */ +void down_pte_sp(pte_t *pte) +{ + int order; + pte_t *addr; + pte_t *end; + + order = pte_to_sp(*pte); + + sp_printk("[down_pte_sp] current order for %p is %d\n", pte, order); + + /* find start of the current superpage */ + addr = (pte_t *)((unsigned long) pte & + ~((1UL << (order + SIZEOF_PTE_LOG2)) - 1)); + end = addr + (1 << order); + /* for each page in the current superpage make order one less */ + for (; addr < end; addr++) { + sp_printk("[down_pte_sp] downgrade %p from %d to %d\n", + addr, order, order -1); + BUG_ON(pte_none(*addr)); + itir_ps_modify(addr, order - 1); + } +} + +/* clear_pte_sp + * @pteptr: + * @index: + */ +void clear_pte_sp(pte_t *pte) +{ + int order; + pte_t *addr; + pte_t *end; + + order = pte_to_sp(*pte); + if (order == 0) + return; + sp_printk("[clear_pte_sp] start (pte %p, SIZEOF_PTE_LOG2 %d, order %d)\n", pte, SIZEOF_PTE_LOG2, order); + + /* + * we then mask out the lower bits of this size so we are back + * at the start of the superpage "cluster" + */ + addr = (pte_t *)((unsigned long) pte & + ~((1UL << (order + SIZEOF_PTE_LOG2)) - 1)); + + end = addr + (1 << order); + /* + * now we should have 2^order pages in our cluster, so go + * through them one by one and set the order back to zero.. + */ + for (; addr < end; addr++) + { + sp_printk("[clear_pte_sp] degrade %p (was %d)\n", addr, + (int)pte_to_sp(*addr)); + if (!pte_none(*addr)) + itir_ps_modify(addr, 0); + else + pte_itir(*addr) = 0; + } +} Index: linux-2.6/arch/ia64/kernel/ivt.S =================================================================== --- linux-2.6.orig/arch/ia64/kernel/ivt.S 2005-09-19 10:05:51.000000000 +1000 +++ linux-2.6/arch/ia64/kernel/ivt.S 2005-09-22 09:46:02.000000000 +1000 @@ -70,6 +70,9 @@ # define DBG_FAULT(i) #endif +# define CALL_DEBUG(i) mov r16=ar.k2;; mov ar.k2=(i);; br.cond.sptk debug_fault;; mov ar.k2 = r16 +# define CALL_DEBUG_P(pr, i) (pr)mov r16=ar.k2;; (pr)mov ar.k2=(i);; (pr)br.cond.sptk debug_fault;; (pr)mov ar.k2 = r16 + #include "minstate.h" #define FAULT(n) \ @@ -107,6 +110,7 @@ * permanently mapped. */ #ifdef CONFIG_IA64_LONG_FORMAT_VHPT + CALL_DEBUG(0) FAULT(0) #else mov r16=cr.ifa // get address that caused the TLB miss @@ -231,9 +235,12 @@ mov r29=b0 // save b0 mov r31=pr // save predicates .itlb_fault: - LOAD_PTE_MISS(r16,r17,r18,r22,page_fault) // find PTE and check present bit + LOAD_PTE_MISS(r16,r17,r18,r20,r22,page_fault) // find PTE and check present bit + ;; + VHPT_INSERT(r16,r17,r18,r20,r22) + ;; + mov cr.itir=r20 ;; - VHPT_INSERT(r16,r17,r18,r22) itc.i r18 #ifdef CONFIG_SMP /* @@ -252,6 +259,7 @@ (p7) ptc.l r16,r20 #endif mov pr=r31,-1 + CALL_DEBUG(1) rfi END(itlb_miss) @@ -269,11 +277,17 @@ mov r16=cr.ifa // get virtual address mov r29=b0 // save b0 mov r31=pr // save predicates + .dtlb_fault: - LOAD_PTE_MISS(r16,r17,r18,r22,page_fault) // find PTE and check present bit + /* va, ppte, pte, tir, hpte, failfn) */ + LOAD_PTE_MISS(r16,r17,r18,r20,r22,page_fault) // find PTE and check present bit + ;; + VHPT_INSERT(r16,r17,r18,r20,r22) + ;; + mov cr.itir=r20 ;; - VHPT_INSERT(r16,r17,r18,r22) itc.d r18 + CALL_DEBUG(2) #ifdef CONFIG_SMP /* * Tell the assemblers dependency-violation checker that the above "itc" instructions @@ -291,6 +305,7 @@ (p7) ptc.l r16,r20 #endif mov pr=r31,-1 +// CALL_DEBUG(2) rfi END(dtlb_miss) @@ -329,6 +344,8 @@ ;; itc.i r19 // insert the TLB entry mov pr=r31,-1 + ;; + CALL_DEBUG_P(p8, 3) rfi END(alt_itlb_miss) @@ -353,7 +370,7 @@ ;; (p8) mov cr.iha=r17 (p8) mov r29=b0 // save b0 -(p8) br.cond.dptk .dtlb_fault +(p8) br.cond.dptk dtlb_fault #endif extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field @@ -375,6 +392,7 @@ ;; (p7) itc.d r19 // insert the TLB entry mov pr=r31,-1 + ;; rfi END(alt_dtlb_miss) @@ -409,6 +427,7 @@ */ #ifdef CONFIG_IA64_LONG_FORMAT_VHPT DBG_FAULT(5) + CALL_DEBUG(5) FAULT(5) #else FIND_PTE(r16,r17,p6,p7) @@ -460,7 +479,34 @@ DBG_FAULT(7) FAULT(7) END(dkey_miss) - + //----------------------------------------------------------------------------------- + // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) +ENTRY(debug_fault) + ssm psr.dt + ;; + srlz.i + ;; + SAVE_MIN_WITH_COVER + alloc r15=ar.pfs,0,0,4,0 + mov out0=cr.ifa + mov out1=cr.itir + mov out3=ar.k2 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collectin is on + ;; +(p15) ssm psr.i // restore psr.i + movl r14=ia64_leave_kernel + ;; + SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 // out2 = pointer to pt_regs + br.call.sptk.many b6=ia64_do_debug_fault// ignore return address +END(debug_fault) + .org ia64_ivt+0x2000 ///////////////////////////////////////////////////////////////////////////////////////// // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) @@ -479,7 +525,7 @@ mov r29=b0 // save b0 in case of nested fault mov r31=pr // save pr ;; - LOAD_PTE_FAULT(r16,r17,r18,r22,.dtlb_fault) + LOAD_PTE_FAULT(r16,r17,r18,r20,r22,.dtlb_fault) #ifdef CONFIG_SMP mov r28=ar.ccv // save ar.ccv @@ -492,7 +538,7 @@ ;; cmp.eq p6,p7=r26,r18 ;; - VHPT_UPDATE(p6,r18,r22) + VHPT_UPDATE(p6,r18,r20,r22) (p6) itc.d r25 // install updated PTE ;; /* @@ -515,10 +561,12 @@ mov b0=r29 // restore b0 ;; st8 [r17]=r18 // store back updated PTE - VHPT_UPDATE(p0,r18,r22) + VHPT_UPDATE(p0,r18,r20,r22) itc.d r18 // install updated PTE #endif mov pr=r31,-1 // restore pr + ;; + CALL_DEBUG(8) rfi END(dirty_bit) @@ -543,7 +591,7 @@ (p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa #endif /* CONFIG_ITANIUM */ ;; - LOAD_PTE_FAULT(r16,r17,r18,r22,.itlb_fault) + LOAD_PTE_FAULT(r16,r17,r18,r20,r22,.itlb_fault) ;; mov b0=r29 // restore b0 #ifdef CONFIG_SMP @@ -557,7 +605,7 @@ ;; cmp.eq p6,p7=r26,r18 ;; - VHPT_UPDATE(p6,r18,r22) + VHPT_UPDATE(p6,r18,r20,r22) (p6) itc.i r25 // install updated PTE ;; /* @@ -579,12 +627,14 @@ or r18=_PAGE_A,r18 // set the accessed bit ;; st8 [r17]=r18 // store back updated PTE - VHPT_UPDATE(p0,r18,r22) + VHPT_UPDATE(p0,r18,r20,r22) itc.i r18 // install updated PTE #endif /* !CONFIG_SMP */ mov pr=r31,-1 ;; mov b0=r29 + ;; + CALL_DEBUG(9) rfi END(iaccess_bit) @@ -598,7 +648,7 @@ mov r29=b0 // save b0 mov r31=pr ;; - LOAD_PTE_FAULT(r16,r17,r18,r22,.dtlb_fault) + LOAD_PTE_FAULT(r16,r17,r18,r20,r22,.dtlb_fault) #ifdef CONFIG_SMP mov r28=ar.ccv // save ar.ccv ;; // avoid RAW on r18 @@ -610,7 +660,7 @@ ;; cmp.eq p6,p7=r26,r18 ;; - VHPT_UPDATE(p6,r18,r22) + VHPT_UPDATE(p6,r18,r20,r22) (p6) itc.d r25 // install updated PTE /* * Tell the assemblers dependency-violation checker that the above "itc" instructions @@ -632,11 +682,13 @@ or r18=_PAGE_A,r18 // set the accessed bit ;; st8 [r17]=r18 // store back updated PTE - VHPT_UPDATE(p0,r18,r22) + VHPT_UPDATE(p0,r18,r20,r22) itc.d r18 // install updated PTE #endif mov b0=r29 // restore b0 mov pr=r31,-1 + ;; + CALL_DEBUG(10) rfi END(daccess_bit) @@ -1191,6 +1243,7 @@ ;; mov r19=24 // fault number br.sptk.many dispatch_to_fault_handler + br.cond.sptk debug_fault END(general_exception) .org ia64_ivt+0x5500 Index: linux-2.6/arch/ia64/mm/init.c =================================================================== --- linux-2.6.orig/arch/ia64/mm/init.c 2005-09-19 10:05:51.000000000 +1000 +++ linux-2.6/arch/ia64/mm/init.c 2005-09-19 10:06:14.000000000 +1000 @@ -51,6 +51,11 @@ unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL; +#ifdef CONFIG_SUPER_PAGE +/* TODO small array for testing */ +int super_page_order[SUPER_PAGE_NR] = {0,1,2,3,4,5,6,7,8}; +#endif + #ifdef CONFIG_VIRTUAL_MEM_MAP unsigned long vmalloc_end = VMALLOC_END_INIT; EXPORT_SYMBOL(vmalloc_end); @@ -118,8 +123,10 @@ if (!pte_exec(pte)) return; /* not an executable page... */ + sp_printk("[update_mmu_cache] pte->%lx, itir->%lx\n", (unsigned long)pte_val(pte), pte_itir(pte)); + page = pte_page(pte); - addr = (unsigned long) page_address(page); + addr = (unsigned long)page_address(page); if (test_bit(PG_arch_1, &page->flags)) return; /* i-cache is already coherent with d-cache */ @@ -274,6 +281,8 @@ pmd_t *pmd; pte_t *pte; + printk("[put_kernel_page] *page->%p addr %lx\n", page, address); + if (!PageReserved(page)) printk(KERN_ERR "put_kernel_page: page at 0x%p not in reserved memory\n", page_address(page)); @@ -296,7 +305,11 @@ pte_unmap(pte); goto out; } +#ifdef CONFIG_SUPER_PAGE + set_pte(pte, mk_pte(page, pgprot, 0)); +#else set_pte(pte, mk_pte(page, pgprot)); +#endif /* CONFIG_SUPER_PAGE */ pte_unmap(pte); } out: spin_unlock(&init_mm.page_table_lock); @@ -414,14 +427,25 @@ /* Pin mapping for percpu area into TLB */ psr = ia64_clear_ic(); +#ifdef CONFIG_SUPER_PAGE + ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR, + pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL, 0)), + PERCPU_PAGE_SHIFT); +#else ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR, pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)), PERCPU_PAGE_SHIFT); +#endif /* CONFIG_SUPER_PAGE */ #ifdef CONFIG_IA64_LONG_FORMAT_VHPT /* Insert the permanent translation for the VHPT */ +# ifdef CONFIG_SUPER_PAGE + ia64_itr(0x2, IA64_TR_LONG_VHPT, LONG_VHPT_BASE, + pte_val(pfn_pte(__pa(vhpt_base[cpu]) >> PAGE_SHIFT, PAGE_KERNEL, 0)), long_vhpt_bits); +# else ia64_itr(0x2, IA64_TR_LONG_VHPT, LONG_VHPT_BASE, pte_val(pfn_pte(__pa(vhpt_base[cpu]) >> PAGE_SHIFT, PAGE_KERNEL)), long_vhpt_bits); +# endif #endif ia64_set_psr(psr); @@ -528,10 +552,14 @@ if (pmd_none(*pmd)) pmd_populate_kernel(&init_mm, pmd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); pte = pte_offset_kernel(pmd, address); - if (pte_none(*pte)) +#ifdef CONFIG_SUPER_PAGE + set_pte(pte, pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >> PAGE_SHIFT, + PAGE_KERNEL, PAGE_SHIFT)); +#else set_pte(pte, pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >> PAGE_SHIFT, PAGE_KERNEL)); +#endif /* CONFIG_SUPER_PAGE */ } return 0; } Index: linux-2.6/fs/exec.c =================================================================== --- linux-2.6.orig/fs/exec.c 2005-09-19 09:52:51.000000000 +1000 +++ linux-2.6/fs/exec.c 2005-09-19 10:06:14.000000000 +1000 @@ -332,8 +332,13 @@ } inc_mm_counter(mm, rss); lru_cache_add_active(page); +#ifdef CONFIG_SUPER_PAGE + set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte( + page, vma->vm_page_prot, 0)))); +#else set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte( page, vma->vm_page_prot)))); +#endif /* CONFIG_SUPER_PAGE */ page_add_anon_rmap(page, vma, address); pte_unmap(pte); spin_unlock(&mm->page_table_lock); Index: linux-2.6/include/asm-ia64/pgalloc.h =================================================================== --- linux-2.6.orig/include/asm-ia64/pgalloc.h 2005-09-19 09:52:59.000000000 +1000 +++ linux-2.6/include/asm-ia64/pgalloc.h 2005-09-19 11:49:50.000000000 +1000 @@ -27,6 +27,16 @@ DECLARE_PER_CPU(long, __pgtable_quicklist_size); #define pgtable_quicklist_size __ia64_per_cpu_var(__pgtable_quicklist_size) +static inline int page_is_zero(void *p) +{ + int i; + long *pp = (long *)p; + for (i = 0; i < (PAGE_SIZE >> 3); i++) + if (*pp++ != 0UL) + return 0; + return 1; +} + static inline long pgtable_quicklist_total_size(void) { long ql_size = 0; @@ -47,7 +57,15 @@ ret = pgtable_quicklist; if (likely(ret != NULL)) { pgtable_quicklist = (unsigned long *)(*ret); - ret[0] = 0; +/** #### EVIL HACK + * pages are meant to be zeroed when they're put onto the quicklist. + * The superpage patch violates this asumption -- asj_sp_range() sets + * all ITIR fields ot 0x38 + * Bzero the page here to be able to make progress. + * ##### EVIL HACK + */ +// ret[0] = 0; + memset(ret, 0, PAGE_SIZE); --pgtable_quicklist_size; preempt_enable(); } else { @@ -68,7 +86,7 @@ return; } #endif - +// BUG_ON(!page_is_zero(pgtable_entry)); preempt_disable(); *(unsigned long *)pgtable_entry = (unsigned long)pgtable_quicklist; pgtable_quicklist = (unsigned long *)pgtable_entry; @@ -125,7 +143,11 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { - return pgtable_quicklist_alloc(); + pte_t *pte = pgtable_quicklist_alloc(); +#ifdef CONFIG_SUPER_PAGE +// itir_ps_modify(pte, 0); +#endif + return pte; } static inline void pte_free(struct page *pte) Index: linux-2.6/include/asm-ia64/page.h =================================================================== --- linux-2.6.orig/include/asm-ia64/page.h 2005-09-19 10:05:51.000000000 +1000 +++ linux-2.6/include/asm-ia64/page.h 2005-09-19 10:06:14.000000000 +1000 @@ -47,7 +47,7 @@ #define PERCPU_PAGE_SHIFT 16 /* log2() of max. size of per-CPU area */ #define PERCPU_PAGE_SIZE (__IA64_UL_CONST(1) << PERCPU_PAGE_SHIFT) -#define RGN_MAP_LIMIT ((1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE) /* per region addr limit */ +#define RGN_MAP_LIMIT ((1UL << (PGDIR_SHIFT + PGD_INDEX_BITS - 3)) - PAGE_SIZE) /* per region addr limit */ #ifdef CONFIG_HUGETLB_PAGE @@ -87,17 +87,6 @@ flush_dcache_page(page); \ } while (0) - -#define alloc_zeroed_user_highpage(vma, vaddr) \ -({ \ - struct page *page = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr); \ - if (page) \ - flush_dcache_page(page); \ - page; \ -}) - -#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE - #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #ifdef CONFIG_VIRTUAL_MEM_MAP @@ -170,6 +159,12 @@ typedef struct { unsigned long pte, itir, tag, ig; } long_pte_t; #endif +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT +typedef struct { unsigned long pte; unsigned long itir; } pte_t; +#else +typedef struct { unsigned long pte; } pte_t; +#endif + # endif /* __KERNEL__ */ #endif /* !__ASSEMBLY__ */ @@ -177,17 +172,26 @@ /* * These are used to make use of C type-checking.. */ - typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT +# define pte_itir(x) ((x).itir) +#else +# define pte_itir(x) (PAGE_SHIFT<<2) +#endif + # define pte_val(x) ((x).pte) # define pmd_val(x) ((x).pmd) # define pgd_val(x) ((x).pgd) # define pgprot_val(x) ((x).pgprot) -# define __pte(x) ((pte_t) { (x) } ) +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT +# define __pte(pte, itir) ((pte_t){(pte), (itir)}) +#else +# define __pte(pte, itir) ((pte_t){(pte)}) +#endif # define __pgprot(x) ((pgprot_t) { (x) } ) #else /* !STRICT_MM_TYPECHECKS */ @@ -195,18 +199,15 @@ * .. while these make it easier on the compiler */ # ifndef __ASSEMBLY__ - typedef unsigned long pte_t; typedef unsigned long pmd_t; typedef unsigned long pgd_t; typedef unsigned long pgprot_t; # endif -# define pte_val(x) (x) # define pmd_val(x) (x) # define pgd_val(x) (x) # define pgprot_val(x) (x) -# define __pte(x) (x) # define __pgd(x) (x) # define __pgprot(x) (x) #endif /* !STRICT_MM_TYPECHECKS */ Index: linux-2.6/include/asm-ia64/pgtable.h =================================================================== --- linux-2.6.orig/include/asm-ia64/pgtable.h 2005-09-19 10:05:51.000000000 +1000 +++ linux-2.6/include/asm-ia64/pgtable.h 2005-09-23 12:24:03.000000000 +1000 @@ -66,6 +66,12 @@ /* Mask of bits which may be changed by pte_modify(); the odd bits are there for _PAGE_PROTNONE */ #define _PAGE_CHG_MASK (_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK | _PAGE_AR_MASK | _PAGE_ED) +/* ITIR register fields */ +#define _ITIR_KEY_MASK ((__IA64_UL(1) << 24) - 1) +#define _ITIR_KEY_SHIFT 8 +#define _ITIR_PS_SHIFT 2 +#define _ITIR_PS_MASK (0x003F << _ITIR_PS_SHIFT) + #define _PAGE_SIZE_4K 12 #define _PAGE_SIZE_8K 13 #define _PAGE_SIZE_16K 14 @@ -88,10 +94,14 @@ * * PGDIR_SHIFT determines what a first-level page table entry can map. */ -#define PGDIR_SHIFT (PAGE_SHIFT + 2*(PAGE_SHIFT-3)) -#define PGDIR_SIZE (__IA64_UL(1) << PGDIR_SHIFT) +#define PGDIR_SHIFT (PAGE_SHIFT + PTE_INDEX_BITS + PMD_INDEX_BITS) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PTRS_PER_PGD (1UL << (PAGE_SHIFT-3)) + +#define PGD_ENTRY_BITS 3 +#define PGD_INDEX_BITS (PAGE_SHIFT - PGD_ENTRY_BITS) +#define PTRS_PER_PGD (1UL << PGD_INDEX_BITS) + #define USER_PTRS_PER_PGD (5*PTRS_PER_PGD/8) /* regions 0-4 are user regions */ #define FIRST_USER_ADDRESS 0 @@ -101,15 +111,23 @@ * PMD_SHIFT determines the size of the area a second-level page table * can map. */ -#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3)) +#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_BITS) #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -#define PTRS_PER_PMD (1UL << (PAGE_SHIFT-3)) +#define PMD_ENTRY_BITS 3 +#define PMD_INDEX_BITS (PAGE_SHIFT - PMD_ENTRY_BITS) +#define PTRS_PER_PMD (1UL << PMD_INDEX_BITS) /* * Definitions for third level: */ -#define PTRS_PER_PTE (__IA64_UL(1) << (PAGE_SHIFT-3)) +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT +#define PTE_ENTRY_BITS 4 +#else +#define PTE_ENTRY_BITS 3 +#endif +#define PTE_INDEX_BITS (PAGE_SHIFT - PTE_ENTRY_BITS) +#define PTRS_PER_PTE (1UL << PTE_INDEX_BITS) /* * All the normal masks have the "page accessed" bits on, as any time @@ -203,6 +221,11 @@ */ #define set_pte(ptep, pteval) (*(ptep) = (pteval)) #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) +#ifdef CONFIG_SUPER_PAGE +# define set_pte_itir(ptep, pteval, _itir) \ + do {(*(ptep) = (pteval)); \ + ((ptep)->itir) = (_itir);} while (0) +#endif #define VMALLOC_START (RGN_BASE(RGN_GATE) + 0x200000000UL) #ifdef CONFIG_VIRTUAL_MEM_MAP @@ -217,6 +240,145 @@ #define kc_vaddr_to_offset(v) ((v) - RGN_BASE(RGN_GATE)) #define kc_offset_to_vaddr(o) ((o) + RGN_BASE(RGN_GATE)) + +/* Extract pfn from pte. */ +#define pte_pfn(_pte) ((pte_val(_pte) & _PFN_MASK) >> PAGE_SHIFT) + +#if CONFIG_SUPER_PAGE + +extern pgprot_t super_page_prot[]; + +/* SUPER_PAGE_NR gives the number of entries in super_page_order[] + * -- see arch/ia64/mm/init.c + */ +#define SUPER_PAGE_NR 9 /*** XXX : I don't think we need this at all */ + +/* TODO -- check following values */ +#define SUPER_PAGE_SHIFT 2 +#define SUPER_PAGE_MASK (__IA64_UL(0x3F) << SUPER_PAGE_SHIFT) +#define SIZEOF_PTR_LOG2 3 +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT +#define SIZEOF_PTE_LOG2 4 +#else +#define SIZEOF_PTE_LOG2 SIZEOF_PTR_LOG2 +#endif + +void down_pte_sp(pte_t *pte); +void clear_pte_sp(pte_t *pte); +extern int super_page_order[]; + +static inline int pte_none(pte_t pte) +{ + return !(pte_val(pte)); +} + +#define pte_to_prot(pte) (pte_val(pte) & ~_PFN_MASK) + +/* find the superpage order; take the page size from itir and then subtract the system page size from it */ +#define itir_to_sp(itir) ((((itir) & SUPER_PAGE_MASK) >> SUPER_PAGE_SHIFT) - PAGE_SHIFT) +#define pte_to_sp(pte) (itir_to_sp(pte_itir(pte))) + +/* Test to see the order of this pte */ +extern inline int pte_to_sp_index(pte_t pte) +{ + int i, spo; + + /* pagesz = itir_to_sp(pte_itir(pte)) - PAGE_SHIFT; */ + spo = itir_to_sp(pte_itir(pte)); +// sp_printk(KERN_ERR "[pte_to_sp_index] pte == %lx | spo == %d\n", (unsigned long)pte.pte, spo); + for (i=0; i < SUPER_PAGE_NR; i++) { + /* TODO super_page_prot[] not used */ + if (super_page_order[i] == spo) + return i; + } + BUG_ON(1); /* shouldn't get here! */ + return 0; +} + +extern inline int order_to_sp_index(int order) +{ + int i, spo; + + for ( i=0; i < SUPER_PAGE_NR; i++) + if (super_page_order[i] == spo) + return i; + BUG_ON(1); /* shouldn't get here! */ +// sp_printk("order_to_sp_index: not in super_page_order[].\n"); +//XXX show_stack(NULL); + return 0; +} + +#ifdef CONFIG_IA64_LONG_FORMAT_VHPT +extern inline void pte_clear(pte_t *ptep) +{ + pte_t pte; + pte_val(pte) = 0UL; + pte_itir(pte) = 0UL; + set_pte(ptep, pte); +} +#else +extern inline void pte_clear(pte_t *ptep) +{ + pte_t pte; + pte_val(pte) = 0UL; + pte_itir(pte) = 0UL; + set_pte(ptep, pte); +} +#endif + +/* pteval is type pte_t */ +#define set_pte_raw(pteptr, pteval) set_pte(pteptr, pteval) + +/* page size is 2^PAGE_SHIFT, so we can add exponents to increase page size */ +#define itir_ps_modify(pteptr, order) \ + (pte_itir(*(pteptr)) = (pte_itir(*(pteptr)) & ~SUPER_PAGE_MASK) | (((unsigned long)(PAGE_SHIFT + order) << SUPER_PAGE_SHIFT) & SUPER_PAGE_MASK)) + +#define clear_pmd_sp(pmd) do {} while(0) +#define super_page_populate(mm, adr, page, prot, index) do {} while (0) + + + +#define pfn_pte(pfn, pgprot, order) \ +({ \ + pte_t __pte; \ + pte_val(__pte) = ((pfn) << PAGE_SHIFT) | pgprot_val(pgprot); \ + pte_itir(__pte) = (((unsigned long)(order + PAGE_SHIFT) << _ITIR_PS_SHIFT) & _ITIR_PS_MASK); \ + __pte; \ +}) + +#define mk_pte(page, pgprot, order) pfn_pte(page_to_pfn(page), (pgprot), (order)) + + /* This takes a physical page address that is used by the remapping functions */ +#define mk_pte_phys(physpage, pgprot, order) \ +({ \ + pte_t __pte; \ + pte_val(__pte) = (physpage) + pgprot_val(pgprot); \ + pte_itir(__pte) = ((unsigned long)((PAGE_SHIFT + order) << _ITIR_PS_SHIFT)); \ + __pte; \ +}) + +#define page_pte_prot(page,prot,order) mk_pte(page, prot, order) +#define page_pte(page) page_pte_prot(page, __pgprot(0), PAGE_SHIFT) + +#define itir_ps_mod(pteptr, order) \ + (pte_itir(*pteptr) = (pte_itir(*pteptr) & ~((_ITIR_PS_MASK) | (((unsigned long)(order) << _ITIR_PS_SHIFT) & _ITIR_PS_MASK)) + +#define pte_modify(_pte, newprot) \ + (__pte(((pte_val(_pte) & ~_PAGE_CHG_MASK) | \ + (pgprot_val(newprot) & _PAGE_CHG_MASK)), \ + ((unsigned long)PAGE_SHIFT << _ITIR_PS_SHIFT))) + +#define pte_modify_order(_pte, newprot, order) \ + (__pte(((pte_val(_pte) & ~_PAGE_CHG_MASK) | \ + (pgprot_val(newprot) & _PAGE_CHG_MASK)), \ + ((unsigned long)order << _ITIR_PS_SHIFT))) + + + +#else /* not CONFIG_SUPER_PAGE */ + +#define pte_none(pte) (!pte_val(pte)) + /* * Conversion functions: convert page frame number (pfn) and a protection value to a page * table entry (pte). @@ -224,9 +386,6 @@ #define pfn_pte(pfn, pgprot) \ ({ pte_t __pte; pte_val(__pte) = ((pfn) << PAGE_SHIFT) | pgprot_val(pgprot); __pte; }) -/* Extract pfn from pte. */ -#define pte_pfn(_pte) ((pte_val(_pte) & _PFN_MASK) >> PAGE_SHIFT) - #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) /* This takes a physical page address that is used by the remapping functions */ @@ -239,6 +398,9 @@ #define page_pte_prot(page,prot) mk_pte(page, prot) #define page_pte(page) page_pte_prot(page, __pgprot(0)) +#endif /* CONFIG_SUPER_PAGE */ + + #define pte_none(pte) (!pte_val(pte)) #define pte_present(pte) (pte_val(pte) & (_PAGE_P | _PAGE_PROTNONE)) #define pte_clear(mm,addr,pte) (pte_val(*(pte)) = 0UL) @@ -273,6 +435,16 @@ * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the * access rights: */ +#ifdef CONFIG_SUPER_PAGE +#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~_PAGE_AR_RW, pte_itir(pte))) +#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_AR_RW, pte_itir(pte))) +#define pte_mkexec(pte) (__pte(pte_val(pte) | _PAGE_AR_RX, pte_itir(pte))) +#define pte_mkold(pte) (__pte(pte_val(pte) & ~_PAGE_A, pte_itir(pte))) +#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A, pte_itir(pte))) +#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D, pte_itir(pte))) +#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D, pte_itir(pte))) +#define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_P, pte_itir(pte))) +#else #define pte_wrprotect(pte) (__pte(pte_val(pte) & ~_PAGE_AR_RW)) #define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_AR_RW)) #define pte_mkexec(pte) (__pte(pte_val(pte) | _PAGE_AR_RX)) @@ -281,7 +453,7 @@ #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) #define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_P)) - +#endif /* * Macro to a page protection value as "uncacheable". Note that "protection" is really a * misnomer here as the protection value contains the memory attribute bits, dirty bits, @@ -379,7 +551,8 @@ ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { #ifdef CONFIG_SMP - return __pte(xchg((long *) ptep, 0)); +/* XXX : in lvhpt? */ + return __pte(xchg((long *) ptep, 0), pte_itir(*ptep)); #else pte_t pte = *ptep; pte_clear(mm, addr, ptep); @@ -392,10 +565,10 @@ { #ifdef CONFIG_SMP unsigned long new, old; - +/* XXX: again lvhpt? */ do { old = pte_val(*ptep); - new = pte_val(pte_wrprotect(__pte (old))); + new = pte_val(pte_wrprotect(__pte(old, pte_itir(*ptep)))); } while (cmpxchg((unsigned long *) ptep, old, new) != old); #else pte_t old_pte = *ptep; @@ -437,7 +610,7 @@ #define __swp_offset(entry) (((entry).val << 1) >> 10) #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 2) | ((long) (offset) << 9) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +#define __swp_entry_to_pte(x) __pte((x).val, PAGE_SHIFT << 2) #define PTE_FILE_MAX_BITS 61 #define pte_to_pgoff(pte) ((pte_val(pte) << 1) >> 3) Index: linux-2.6/include/linux/kernel.h =================================================================== --- linux-2.6.orig/include/linux/kernel.h 2005-09-19 09:53:04.000000000 +1000 +++ linux-2.6/include/linux/kernel.h 2005-09-23 14:29:02.000000000 +1000 @@ -137,6 +137,14 @@ static inline int printk(const char *s, ...) { return 0; } #endif +/* IANW */ +//#define SP_PRINTK_DEBUG 0 +#ifdef SP_PRINTK_DEBUG +# define sp_printk(args...) printk(args) +#else +# define sp_printk(args...) +#endif + unsigned long int_sqrt(unsigned long); static inline int __attribute_pure__ long_log2(unsigned long x) Index: linux-2.6/mm/memory.c =================================================================== --- linux-2.6.orig/mm/memory.c 2005-09-19 09:53:05.000000000 +1000 +++ linux-2.6/mm/memory.c 2005-09-23 14:44:40.000000000 +1000 @@ -283,11 +283,14 @@ pte_t fastcall *pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { + pte_t *p; + sp_printk("[pte_alloc_map] *mm->%p, *pmd->%p, addr->%lx\n", mm, pmd, address); if (!pmd_present(*pmd)) { struct page *new; spin_unlock(&mm->page_table_lock); new = pte_alloc_one(mm, address); + sp_printk("[pte_alloc_map] new %lx\n", page_to_pfn(new)); spin_lock(&mm->page_table_lock); if (!new) return NULL; @@ -296,24 +299,41 @@ * entry, as somebody else could have populated it.. */ if (pmd_present(*pmd)) { + sp_printk("[pte_alloc_map] pmd_present(*pmd)!\n"); pte_free(new); goto out; } mm->nr_ptes++; inc_page_state(nr_page_table_pages); pmd_populate(mm, pmd, new); + sp_printk("[pte_alloc_map] %s\n", (pmd_present(*pmd))?"yes":"no"); } out: - return pte_offset_map(pmd, address); + p = pte_offset_map(pmd, address); + + // set the default base page size + if (pte_itir(*p) == 0) + { + sp_printk("[pte_alloc_map] set default page size (%p)\n", p); + itir_ps_modify(p, 0); + } + + return p; } pte_t fastcall * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { + sp_printk("[pte_alloc_map_kernel] address %lx\n", address); if (!pmd_present(*pmd)) { pte_t *new; spin_unlock(&mm->page_table_lock); new = pte_alloc_one_kernel(mm, address); +#if 0 + itir_ps_modify(new, 0); +#endif + sp_printk("[pte_alloc_map_kernel] page %p, itir %lx\n", new, pte_itir(*new)); + spin_lock(&mm->page_table_lock); if (!new) return NULL; @@ -393,8 +413,10 @@ * If it's a shared mapping, mark it clean in * the child */ - if (vm_flags & VM_SHARED) + if (vm_flags & VM_SHARED) { + sp_printk("[copy_one_pte] shared\n"); pte = pte_mkclean(pte); + } pte = pte_mkold(pte); get_page(page); inc_mm_counter(dst_mm, rss); @@ -512,6 +534,14 @@ if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); +#ifdef CONFIG_SUPER_PAGE + if ((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) { + spin_lock(&src_mm->page_table_lock); + adj_sp_range(src_mm, 1, addr, end); + spin_unlock(&src_mm->page_table_lock); + } +#endif + dst_pgd = pgd_offset(dst_mm, addr); src_pgd = pgd_offset(src_mm, addr); do { @@ -645,6 +675,9 @@ next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; +#ifdef CONFIG_SUPER_PAGE + adj_sp_range(vma->vm_mm, 1, addr, next); +#endif zap_pud_range(tlb, pgd, addr, next, details); } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); @@ -1029,8 +1062,12 @@ if (!pte) return -ENOMEM; do { +#ifdef CONFIG_SUPER_PAGE + pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot, 0)); +#else pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot)); - BUG_ON(!pte_none(*pte)); +#endif + BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, zero_pte); } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap(pte - 1); @@ -1084,6 +1121,9 @@ pgd = pgd_offset(mm, addr); flush_cache_range(vma, addr, end); spin_lock(&mm->page_table_lock); +#ifdef CONFIG_SUPER_PAGE + adj_sp_range(mm, 1, addr, end); +#endif do { next = pgd_addr_end(addr, end); err = zeromap_pud_range(mm, pgd, addr, next, prot); @@ -1111,7 +1151,11 @@ do { BUG_ON(!pte_none(*pte)); if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn))) +#ifdef CONFIG_SUPER_PAGE + set_pte_at(mm, addr, pte, pfn_pte(pfn, prot, 0)); +#else set_pte_at(mm, addr, pte, pfn_pte(pfn, prot)); +#endif pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap(pte - 1); @@ -1183,10 +1227,13 @@ pgd = pgd_offset(mm, addr); flush_cache_range(vma, addr, end); spin_lock(&mm->page_table_lock); +#ifdef CONFIG_SUPER_PAGE + adj_sp_range(mm, 1, addr, end); +#endif do { next = pgd_addr_end(addr, end); err = remap_pud_range(mm, pgd, addr, next, - pfn + (addr >> PAGE_SHIFT), prot); + pfn + (addr >> PAGE_SHIFT), prot); if (err) break; } while (pgd++, addr = next, addr != end); @@ -1215,9 +1262,13 @@ pte_t *page_table) { pte_t entry; - +#ifdef CONFIG_SUPER_PAGE + entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot, 0)), + vma); +#else entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)), vma); +#endif ptep_establish(vma, address, page_table, entry); update_mmu_cache(vma, address, entry); lazy_mmu_prot_update(entry); @@ -1258,7 +1309,7 @@ * data, but for the moment just pretend this is OOM. */ pte_unmap(page_table); - printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n", + sp_printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n", address); spin_unlock(&mm->page_table_lock); return VM_FAULT_OOM; @@ -1301,6 +1352,7 @@ goto no_new_page; copy_user_highpage(new_page, old_page, address); } + /* * Re-check the pte - we dropped the lock */ @@ -1716,7 +1768,11 @@ /* The page isn't present yet, go ahead with the fault. */ inc_mm_counter(mm, rss); +#ifdef CONFIG_SUPER_PAGE + pte = mk_pte(page, vma->vm_page_prot, 0); +#else pte = mk_pte(page, vma->vm_page_prot); +#endif if (write_access && can_share_swap_page(page)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); write_access = 0; @@ -1753,6 +1809,25 @@ goto out; } +/* check that consecutive pages in a superpage are clear */ +static int superpage_check_clear(pte_t *page_table, int order) +{ + int i; + pte_t *sp_start; + if (order == 0) + return 0; + /* otherwise walk pages and make sure that they are clear */ + sp_start = (pte_t *)((unsigned long)page_table & ~((1UL << (order + SIZEOF_PTE_LOG2)) -1)); + for(i = 0; i < (1 << order); i++) + { + if (!pte_none(*(sp_start + i))) { + BUG_ON(1); + return -1; + } + } + return 0; +} + /* * We are called with the MM semaphore and page_table_lock * spinlock held to protect against concurrent faults in @@ -1766,20 +1841,57 @@ pte_t entry; struct page * page = ZERO_PAGE(addr); + int i; + /* order of the superpage */ + int order = itir_to_sp(pte_itir(*page_table)); + /* user address that the sp starts at */ + unsigned long sp_addr = addr & ~((PAGE_SIZE << order) - 1); + /* pte that the superpage starts at */ + pte_t *sp_page_table = (pte_t *)((unsigned long)page_table & ~((1UL << (order + SIZEOF_PTE_LOG2)) -1)); + /* Read-only mapping of ZERO_PAGE. */ +#ifdef CONFIG_SUPER_PAGE + entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot, 0)); +#else entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); +#endif + + if (!write_access) + { + sp_printk("[do_anonymous_page] !write_access\n"); + set_pte_at(mm, addr, page_table, entry); + pte_unmap(page_table); + /* No need to invalidate - it was non-present before */ + update_mmu_cache(vma, addr, entry); + lazy_mmu_prot_update(entry); + spin_unlock(&mm->page_table_lock); + + goto out; + } /* ..except if it's a write access */ - if (write_access) { + else if (write_access) { + /* check all pte's in the superpage are not used and ready to go */ + superpage_check_clear(sp_page_table, order); + /* Allocate our own private page. */ pte_unmap(page_table); spin_unlock(&mm->page_table_lock); - if (unlikely(anon_vma_prepare(vma))) + if (unlikely(anon_vma_prepare(vma))) { + sp_printk("[do_anonymous_page] anon_vma_prepare(vma)\n"); goto no_mem; - page = alloc_zeroed_user_highpage(vma, addr); + } + page = alloc_pages(GFP_HIGHUSER, order); if (!page) + { + sp_printk("[do_anonymous_page] goto no_mem (order %d)\n", order); goto no_mem; + } + + /* clear pages in superpage */ + for (i=0; i < 1 << order; i++) + clear_user_highpage(page+i, sp_addr + (i * PAGE_SIZE)); spin_lock(&mm->page_table_lock); page_table = pte_offset_map(pmd, addr); @@ -1788,22 +1900,35 @@ pte_unmap(page_table); page_cache_release(page); spin_unlock(&mm->page_table_lock); + sp_printk("[do_anonymous_page] !pte_none(*page_table)\n"); goto out; } + + /* for each page */ + for (i=0; i < (1 << order); i++) + { + //entry = pte_mkwrite(pte_mkdirty(mk_pte(page+i, __pgprot(pgprot_val(vma->vm_page_prot)), order))); + entry = mk_pte(page+i,PAGE_SHARED, order); + //mm->rss++; + //flush_page_to_ram(page+i); + //lru_cache_add_active(page+i); + SetPageReferenced(page+i); + get_page(page+i); + //set_pte_raw(sp_page_table+i, entry); + set_pte_at(mm, addr, sp_page_table+i, entry); + page_add_anon_rmap(page+i, vma, sp_addr + (i * PAGE_SIZE)); + sp_printk("[do_anonymous_page] sp_page_table %p\n", sp_page_table + i); + sp_printk("[do_anonymous_page] page + i = %lx\n", page + i); + sp_printk("[do_anonymous_page] itir = %lx\n", pte_itir(*(sp_page_table + i))); + /* No need to invalidate - it was non-present before */ + update_mmu_cache(vma, addr, entry); + //pte_unmap(wktable+i); + } + inc_mm_counter(mm, rss); - entry = maybe_mkwrite(pte_mkdirty(mk_pte(page, - vma->vm_page_prot)), - vma); - lru_cache_add_active(page); - SetPageReferenced(page); - page_add_anon_rmap(page, vma, addr); + pte_unmap(page_table); } - set_pte_at(mm, addr, page_table, entry); - pte_unmap(page_table); - - /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, addr, entry); lazy_mmu_prot_update(entry); spin_unlock(&mm->page_table_lock); out: @@ -1835,13 +1960,18 @@ int ret = VM_FAULT_MINOR; int anon = 0; + sp_printk("[do_no_page] start (address %lx)\n", address); + if (!vma->vm_ops || !vma->vm_ops->nopage) return do_anonymous_page(mm, vma, page_table, pmd, write_access, address); pte_unmap(page_table); spin_unlock(&mm->page_table_lock); + sp_printk("[do_no_page] didn't call do_anonymous_page\n"); + if (vma->vm_file) { + sp_printk("[do_no_page] vma->vm_file true [%s]\n", vma->vm_file->f_dentry->d_name.name); mapping = vma->vm_file->f_mapping; sequence = mapping->truncate_count; smp_rmb(); /* serializes i_size against truncate_count */ @@ -1869,6 +1999,8 @@ if (write_access && !(vma->vm_flags & VM_SHARED)) { struct page *page; + sp_printk("[do_no_page] write_access && !(vma->vm_flags & VM_SHARED)\n"); + if (unlikely(anon_vma_prepare(vma))) goto oom; page = alloc_page_vma(GFP_HIGHUSER, vma, address); @@ -1910,7 +2042,11 @@ inc_mm_counter(mm, rss); flush_icache_page(vma, new_page); +#ifdef CONFIG_SUPER_PAGE + entry = mk_pte(new_page, vma->vm_page_prot, 0); +#else entry = mk_pte(new_page, vma->vm_page_prot); +#endif /* CONFIG_SUPER_PAGE */ if (write_access) entry = maybe_mkwrite(pte_mkdirty(entry), vma); set_pte_at(mm, address, page_table, entry); @@ -2002,6 +2138,8 @@ { pte_t entry; + sp_printk("[handle_pte_fault] handle fault for %lx (pte %p, itir %lx)\n", address, pte, pte_itir(*pte)); + sp_printk("[handle_pte_fault] (pte %p, itir %lx, %s)\n", pte, pte_itir(*pte), pte_present(*pte) ? "present" : "not present"); entry = *pte; if (!pte_present(entry)) { /* @@ -2010,13 +2148,21 @@ * drop the lock. */ if (pte_none(entry)) + { + sp_printk("[handle_pte_fault] call do_no_page\n"); return do_no_page(mm, vma, address, write_access, pte, pmd); + } if (pte_file(entry)) + { + sp_printk("[handle_pte_fault] call do_file_page\n"); return do_file_page(mm, vma, address, write_access, pte, pmd); + } + sp_printk("[handle_pte_fault] swap page!\n"); return do_swap_page(mm, vma, address, pte, pmd, entry, write_access); } if (write_access) { + sp_printk("[handle_pte_fault] write_access\n"); if (!pte_write(entry)) return do_wp_page(mm, vma, address, pte, pmd, entry); entry = pte_mkdirty(entry); @@ -2027,6 +2173,7 @@ lazy_mmu_prot_update(entry); pte_unmap(pte); spin_unlock(&mm->page_table_lock); + sp_printk("[handle_pte_fault] return minor fault\n"); return VM_FAULT_MINOR; } @@ -2066,7 +2213,9 @@ pte = pte_alloc_map(mm, pmd, address); if (!pte) goto oom; - + + sp_printk(KERN_INFO "[handle_mm_fault] pte=%p, *pte=%lx, itir=%lx\n", pte, *(unsigned long *)pte, (unsigned long)pte_itir(*pte)); + return handle_pte_fault(mm, vma, address, write_access, pte, pmd); oom: Index: linux-2.6/mm/mmap.c =================================================================== --- linux-2.6.orig/mm/mmap.c 2005-09-19 09:53:05.000000000 +1000 +++ linux-2.6/mm/mmap.c 2005-09-19 10:06:14.000000000 +1000 @@ -915,6 +915,7 @@ /* Obtain the address to map to. we verify (or select) it and ensure * that it represents a valid section of the address space. */ + sp_printk(KERN_ERR "[do_mmap_pgoff] calling get_unmapped_area\n"); addr = get_unmapped_area(file, addr, len, pgoff, flags); if (addr & ~PAGE_MASK) return addr; @@ -1121,6 +1122,13 @@ pgoff, flags & MAP_NONBLOCK); down_write(&mm->mmap_sem); } +#ifdef CONFIG_SUPER_PAGE + /* if superpages on and this length is greater than a order one superpage */ + if (super_page_on && len >= SUPER_PAGE_SIZE(1) && !file) { + sp_printk(KERN_ERR "[do_mmap_pgoff] calling make ptes present\n"); + make_ptes_present(addr, addr + len); + } +#endif return addr; unmap_and_free_vma: @@ -1154,6 +1162,9 @@ * This function "knows" that -ENOMEM has the bits set. */ #ifndef HAVE_ARCH_UNMAPPED_AREA +#if CONFIG_SUPER_PAGE +extern int super_page_vm_align; +#endif unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) @@ -1161,6 +1172,10 @@ struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long start_addr; +#if CONFIG_SUPER_PAGE + unsigned long super_page_mask=0; + sp_printk(KERN_ERR "[arch_get_unmapped_area] addr=%lx, len=%lx\n", addr, len); +#endif if (len > TASK_SIZE) return -ENOMEM; @@ -1180,6 +1195,18 @@ } full_search: +#if CONFIG_SUPER_PAGE + if (super_page_vm_align) { + int i; + for (i = super_page_nr-1; i > 0; i--) { + if (len > SUPER_PAGE_SIZE(i)) { + super_page_mask = (SUPER_PAGE_SIZE(i) - 1); + break; + } + } + } +#endif + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr) { @@ -1204,7 +1231,13 @@ } if (addr + mm->cached_hole_size < vma->vm_start) mm->cached_hole_size = vma->vm_start - addr; +#if CONFIG_SUPER_PAGE + sp_printk(KERN_ERR "[arch_get_unmapped_area] addr before %lx\n", addr); + addr = ((vma->vm_end + super_page_mask) & ~super_page_mask); + sp_printk(KERN_ERR "[arch_get_unmapped_area] addr after %lx\n", addr); +#else addr = vma->vm_end; +#endif } } #endif Index: linux-2.6/arch/ia64/Kconfig =================================================================== --- linux-2.6.orig/arch/ia64/Kconfig 2005-09-19 10:05:51.000000000 +1000 +++ linux-2.6/arch/ia64/Kconfig 2005-09-19 10:06:14.000000000 +1000 @@ -164,6 +164,12 @@ source kernel/Kconfig.hz +config SUPER_PAGE + bool "Superpage Support" + depends on IA64_LONG_FORMAT_VHPT + help + Superpages for IA64 + config IA64_BRL_EMU bool depends on ITANIUM Index: linux-2.6/mm/vmalloc.c =================================================================== --- linux-2.6.orig/mm/vmalloc.c 2005-09-19 09:53:06.000000000 +1000 +++ linux-2.6/mm/vmalloc.c 2005-09-19 10:06:14.000000000 +1000 @@ -96,7 +96,11 @@ WARN_ON(!pte_none(*pte)); if (!page) return -ENOMEM; +#ifdef CONFIG_SUPER_PAGE + set_pte_at(&init_mm, addr, pte, mk_pte(page, prot, 0)); +#else set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); +#endif (*pages)++; } while (pte++, addr += PAGE_SIZE, addr != end); return 0; Index: linux-2.6/mm/Makefile =================================================================== --- linux-2.6.orig/mm/Makefile 2005-09-19 09:53:05.000000000 +1000 +++ linux-2.6/mm/Makefile 2005-09-19 10:06:14.000000000 +1000 @@ -17,6 +17,7 @@ obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SHMEM) += shmem.o +obj-$(CONFIG_SUPER_PAGE) += super_page.o obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o obj-$(CONFIG_FS_XIP) += filemap_xip.o Index: linux-2.6/mm/highmem.c =================================================================== --- linux-2.6.orig/mm/highmem.c 2005-09-19 09:53:05.000000000 +1000 +++ linux-2.6/mm/highmem.c 2005-09-19 10:06:14.000000000 +1000 @@ -139,8 +139,13 @@ } } vaddr = PKMAP_ADDR(last_pkmap_nr); +#ifdef CONFIG_SUPER_PAGE + set_pte_at(&init_mm, vaddr, + &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot, 0)); +#else set_pte_at(&init_mm, vaddr, &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); +#endif pkmap_count[last_pkmap_nr] = 1; set_page_address(page, (void *)vaddr); Index: linux-2.6/mm/fremap.c =================================================================== --- linux-2.6.orig/mm/fremap.c 2005-09-19 09:53:05.000000000 +1000 +++ linux-2.6/mm/fremap.c 2005-09-19 10:06:14.000000000 +1000 @@ -94,7 +94,11 @@ inc_mm_counter(mm,rss); flush_icache_page(vma, page); +#ifdef CONFIG_SUPER_PAGE + set_pte_at(mm, addr, pte, mk_pte(page, prot, 0)); +#else set_pte_at(mm, addr, pte, mk_pte(page, prot)); +#endif page_add_file_rmap(page); pte_val = *pte; pte_unmap(pte); Index: linux-2.6/mm/swapfile.c =================================================================== --- linux-2.6.orig/mm/swapfile.c 2005-09-19 09:53:06.000000000 +1000 +++ linux-2.6/mm/swapfile.c 2005-09-19 10:06:14.000000000 +1000 @@ -410,8 +410,13 @@ { inc_mm_counter(vma->vm_mm, rss); get_page(page); +#ifdef CONFIG_SUPER_PAGE + set_pte_at(vma->vm_mm, addr, pte, + pte_mkold(mk_pte(page, vma->vm_page_prot, 0))); +#else set_pte_at(vma->vm_mm, addr, pte, pte_mkold(mk_pte(page, vma->vm_page_prot))); +#endif page_add_anon_rmap(page, vma, addr); swap_free(entry); /* Index: linux-2.6/include/linux/mm.h =================================================================== --- linux-2.6.orig/include/linux/mm.h 2005-09-19 09:53:04.000000000 +1000 +++ linux-2.6/include/linux/mm.h 2005-09-19 10:06:14.000000000 +1000 @@ -970,5 +970,24 @@ /* /proc//oom_adj set to -17 protects from the oom-killer */ #define OOM_DISABLE -17 +#if CONFIG_SUPER_PAGE +extern int super_page_on; +extern int super_page_nr; +extern unsigned long super_page_reserve[]; +extern unsigned long super_page_allocate[]; +extern unsigned long super_page_downgrade[]; +void super_page_init(void); +int make_ptes_present(unsigned long addr, unsigned long end); +void __break_area (struct page *page, int order); +void adj_sp_range(struct mm_struct *mm, + int zap,unsigned long address, unsigned long end); +#define break_area(page, order) __break_area(page, order) + +/* Like PAGE_SIZE but for a given super page order */ +#define SUPER_PAGE_SIZE(order) ((PAGE_SIZE << super_page_order[order])) +#define SUPER_PAGE_BASE_SIZE 0 + +#endif /* CONFIG_SUPER_PAGE */ + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ Index: linux-2.6/arch/ia64/kernel/efi.c =================================================================== --- linux-2.6.orig/arch/ia64/kernel/efi.c 2005-09-19 09:52:37.000000000 +1000 +++ linux-2.6/arch/ia64/kernel/efi.c 2005-09-19 10:06:14.000000000 +1000 @@ -347,7 +347,6 @@ last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr); if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) trim_top(md, last_granule_addr); - if (is_available_memory(md)) { if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) { if (md->phys_addr >= max_addr) @@ -526,9 +525,16 @@ * Cannot write to CRx with PSR.ic=1 */ psr = ia64_clear_ic(); +#ifdef CONFIG_SUPER_PAGE + /* TODO is PAGE_SHIFT the correct value? */ + ia64_itr(0x1, IA64_TR_PALCODE, GRANULEROUNDDOWN((unsigned long) pal_vaddr), + pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL, 0)), + IA64_GRANULE_SHIFT); +#else ia64_itr(0x1, IA64_TR_PALCODE, GRANULEROUNDDOWN((unsigned long) pal_vaddr), - pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)), - IA64_GRANULE_SHIFT); + pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)), + IA64_GRANULE_SHIFT); +#endif ia64_set_psr(psr); /* restore psr */ ia64_srlz_i(); } Index: linux-2.6/kernel/sysctl.c =================================================================== --- linux-2.6.orig/kernel/sysctl.c 2005-09-19 09:53:05.000000000 +1000 +++ linux-2.6/kernel/sysctl.c 2005-09-19 10:06:14.000000000 +1000 @@ -173,6 +173,11 @@ static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); #endif +#if CONFIG_SUPER_PAGE +void super_page_init(void); +#endif + + /* The default sysctl tables: */ static ctl_table root_table[] = { @@ -998,6 +1003,10 @@ register_proc_table(root_table, proc_sys_root); init_irq_proc(); #endif +#ifdef CONFIG_SUPER_PAGE + printk(KERN_INFO "calling super_page_init()\n"); + super_page_init(); +#endif } int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, Index: linux-2.6/include/asm-ia64/ivt.h =================================================================== --- linux-2.6.orig/include/asm-ia64/ivt.h 2005-09-19 10:05:51.000000000 +1000 +++ linux-2.6/include/asm-ia64/ivt.h 2005-09-22 13:09:15.000000000 +1000 @@ -7,9 +7,9 @@ */ #define temp r24 -#define rgn r25 /* region register */ -#define tir r26 /* translation register */ -#define mbz r27 /* "must be zero" else signifies non valid region */ +//#define rgn r25 /* region register */ +//#define tir r26 /* translation register */ +//#define mbz r27 /* "must be zero" else signifies non valid region */ #define scratch r28 @@ -60,7 +60,7 @@ shr.u r19=r22,PAGE_SHIFT; /* shift L3 index into position */ \ ;; \ (ok) cmp.eq.or.andcm fail,ok=ppte,r0; /* was L2 entry NULL? */ \ - dep ppte=r19,ppte,3,(PAGE_SHIFT-3); /* compute address of L3 page table entry */ + dep ppte=r19,ppte,PTE_ENTRY_BITS,(PAGE_SHIFT-PTE_ENTRY_BITS); /* compute address of L3 page table entry */ #ifdef CONFIG_IA64_LONG_FORMAT_VHPT /* @@ -71,13 +71,16 @@ * @hpte, hash page table entry * * Given a va get the ppte and load its value into pte +(p7) ld8 tir=[ppte],-8; load the itir field */ -#define LOAD_PTE_MISS(va, ppte, pte, hpte, failfn) \ +#define LOAD_PTE_MISS(va, ppte, pte, tir, hpte, failfn) \ ;; \ FIND_PTE(va, ppte, p6, p7) \ ;; \ -(p7) ld8 pte=[ppte]; \ +(p7) ld8 pte=[ppte],8; \ ;; \ +(p7) ld8 tir=[ppte],-8; \ + ;; /* then ,set ppte back */ \ (p7) tbit.z p6,p0=pte,_PAGE_P_BIT; /* page present bit cleared? */ \ (p6) br.cond.spnt failfn; @@ -96,7 +99,7 @@ */ #define tag r25 #define htag r26 -#define LOAD_PTE_FAULT(va, ppte, pte, hpte, failfn) \ +#define LOAD_PTE_FAULT(va, ppte, pte, tir, hpte, failfn) \ thash scratch=va; \ rsm psr.dt; \ ;; \ @@ -112,7 +115,9 @@ ;; \ cmp.ne p6,p7=htag, tag; /* verify tag */ \ ;; \ -(p7) ld8 pte=[ppte]; \ + (p7) ld8 pte=[ppte],8; \ + ;; \ + (p7) ld8 tir=[ppte],-8; \ ;; \ (p6) mov cr.iha=scratch; /* set cr.iha only if we are going to take */ \ (p6) br.cond.spnt failfn; /* the failfn fault, it depends on it */ @@ -127,10 +132,8 @@ * Insert the va into the VHPT and tlb, the tlb insert * happens in ivt.S for the appropriate fault instruction or data. */ -#define tir r26 -#define VHPT_INSERT(va, ppte, pte, hpte) \ +#define VHPT_INSERT(va, ppte, pte, tir, hpte) \ mov hpte=cr.iha; \ - mov tir=cr.itir; \ ;; \ tpa hpte=hpte; /* make hash address physical */ \ ttag tag=va; \ @@ -146,8 +149,10 @@ /* * Update the VHPT with pte value obtained from LOAD_PTE_FAULT */ -#define VHPT_UPDATE(cond, pte, hpte) \ -(cond) st8 [hpte]=pte,16; +#define VHPT_UPDATE(cond, pte, tir, hpte) \ +(cond) st8 [hpte]=pte,8; \ +;; \ +(cond) st8 [hpte]=tir,8; /* * Invalidate the tlb for the VHPT pointing to hpte, this is achieved by @@ -173,7 +178,7 @@ * Given a va get the ppte and load its value into pte * Loads r30 with a continuation address. */ -#define LOAD_PTE_MISS(va, ppte, pte, hpte, failfn) \ +#define LOAD_PTE_MISS(va, ppte, pte, tir, hpte, failfn) \ mov ppte=cr.iha; /* get virtual address of L3 PTE */ \ movl r30=1f; /* load continuation point */ \ ;; \ @@ -188,15 +193,15 @@ * gets a PTE using the faulting address. * Broken on Merced. */ -#define LOAD_PTE_FAULT(va, ppte, pte, hpte, failfn) \ +#define LOAD_PTE_FAULT(va, ppte, pte, tir, hpte, failfn) \ thash ppte=va; /* get virtual address of L3 PTE */ \ movl r30=1f; /* load continuation point */ \ ;; \ 1: ld8 pte=[ppte]; \ mov b0=r29; /* restore possibly destroyed b0 */ -#define VHPT_INSERT(va, ppte, pte, hpte) /* nothing */ -#define VHPT_UPDATE(cond, pte, hpte) /* nothing */ +#define VHPT_INSERT(va, ppte, tir, pte, hpte) /* nothing */ +#define VHPT_UPDATE(cond, pte, tir, hpte) /* nothing */ #define VHPT_PURGE(cond, hpte) /* nothing */ #endif /* !CONFIG_IA64_LONG_FORMAT_VHPT */ Index: linux-2.6/mm/page_alloc.c =================================================================== --- linux-2.6.orig/mm/page_alloc.c 2005-09-19 09:53:06.000000000 +1000 +++ linux-2.6/mm/page_alloc.c 2005-09-19 10:06:14.000000000 +1000 @@ -879,12 +879,17 @@ goto got_pg; } } + printk(KERN_INFO "[__alloc_pages] memalloc = %lu\n", p->flags & PF_MEMALLOC); + printk(KERN_INFO "[__alloc_pages] memdie = %d\n", test_thread_flag(TIF_MEMDIE)); goto nopage; } /* Atomic allocations - we can't balance anything */ if (!wait) + { + printk(KERN_INFO "[__alloc_pages] !(wait)\n"); goto nopage; + } rebalance: cond_resched(); Index: linux-2.6/arch/ia64/kernel/mca.c =================================================================== --- linux-2.6.orig/arch/ia64/kernel/mca.c 2005-09-19 09:52:37.000000000 +1000 +++ linux-2.6/arch/ia64/kernel/mca.c 2005-09-19 10:06:14.000000000 +1000 @@ -1414,7 +1414,11 @@ * We may need it during MCA recovery. */ __get_cpu_var(ia64_mca_per_cpu_pte) = +#ifdef CONFIG_SUPER_PAGE + pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL, 0)); +#else pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL)); +#endif /* * Also, stash away a copy of the PAL address and the PTE @@ -1425,8 +1429,13 @@ return; __get_cpu_var(ia64_mca_pal_base) = GRANULEROUNDDOWN((unsigned long) pal_vaddr); - __get_cpu_var(ia64_mca_pal_pte) = pte_val(mk_pte_phys(__pa(pal_vaddr), - PAGE_KERNEL)); + __get_cpu_var(ia64_mca_pal_pte) = pte_val( +#ifdef CONFIG_SUPER_PAGE + mk_pte_phys(__pa(pal_vaddr),PAGE_KERNEL, 0) +#else + mk_pte_phys(__pa(pal_vaddr),PAGE_KERNEL); +#endif + ); } /* Index: linux-2.6/arch/ia64/mm/fault.c =================================================================== --- linux-2.6.orig/arch/ia64/mm/fault.c 2005-09-19 09:52:37.000000000 +1000 +++ linux-2.6/arch/ia64/mm/fault.c 2005-09-23 14:40:26.000000000 +1000 @@ -86,6 +86,10 @@ struct siginfo si; unsigned long mask; + + sp_printk("[ia64_do_page_fault] fault @ 0x%lx (vhpt 0x%lx : iip %lx)\n", address, + ia64_thash(address), regs->cr_iip); + /* * If we're in an interrupt or have no user context, we must not take the fault.. */ @@ -169,6 +173,7 @@ BUG(); } up_read(&mm->mmap_sem); + sp_printk("[ia64_do_page_fault] survived!\n\n"); return; check_expansion: @@ -214,6 +219,7 @@ si.si_isr = isr; si.si_flags = __ISR_VALID; force_sig_info(signal, &si, current); + sp_printk("[ia64_do_page_fault] leaving with signal %d\n\n\n", signal); return; } @@ -271,3 +277,38 @@ do_exit(SIGKILL); goto no_context; } + +void __kprobes +ia64_do_debug_fault (unsigned long address, unsigned long isr, struct pt_regs *regs, unsigned long code) +{ + + return; + switch (code) { + case 0: + printk(KERN_ERR "[ia64_debug_fault] VHPT MISS???? (%lx)\n", address); + break; + case 1: + printk(KERN_ERR "[ia64_debug_fault] ITLB MISS addr=%lx itir=%lx \n", address, isr); + break; + case 2: + printk(KERN_ERR "[ia64_debug_fault] DTLB MISS addr=%lx itir=%lx\n", address, isr); + break; + case 3: + printk(KERN_ERR "[ia64_debug_fault] ALT ITLB MISS %lx\n", address); + break; + case 5: + printk(KERN_ERR "[ia64_debug_fault] !!!!! NESTED TLB MISS addr=%lx !!!!!\n", address); + break; + case 8: + printk(KERN_ERR "[ia64_debuf_fault] DIRTY MISS %lx\n", address); + break; + case 9: + printk(KERN_ERR "[ia64_debug_fault] IACCESS MISS %lx\n", address); + break; + case 10: + printk(KERN_ERR "[ia64_debug_fault] DACCESS MISS %lx\n", address); + break; + default: + printk(KERN_ERR "[ia64_debug_fault] UNKNOWN MISS (%lx, %lx)\n", code, address); + } +} Index: linux-2.6/mm/mremap.c =================================================================== --- linux-2.6.orig/mm/mremap.c 2005-09-19 09:53:05.000000000 +1000 +++ linux-2.6/mm/mremap.c 2005-09-19 10:06:14.000000000 +1000 @@ -144,7 +144,11 @@ /* ZERO_PAGE can be dependant on virtual addr */ if (pfn_valid(pte_pfn(pte)) && pte_page(pte) == ZERO_PAGE(old_addr)) +#ifdef CONFIG_SUPER_PAGE + pte = pte_wrprotect(mk_pte(ZERO_PAGE(new_addr), new_vma->vm_page_prot, 0)); +#else pte = pte_wrprotect(mk_pte(ZERO_PAGE(new_addr), new_vma->vm_page_prot)); +#endif set_pte_at(mm, new_addr, dst, pte); } else error = -ENOMEM;