Re: hugetlb demand paging patch part [2/3]

From: 'David Gibson' <david_at_gibson.dropbear.id.au>
Date: 2004-04-16 12:34:42
On Thu, Apr 15, 2004 at 10:27:58AM -0700, Chen, Kenneth W wrote:
> >>>> David Gibson wrote on Thursday, April 15, 2004 12:17 AM
> > > diff -Nurp linux-2.6.5/mm/memory.c linux-2.6.5.htlb/mm/memory.c
> > > +++ linux-2.6.5.htlb/mm/memory.c	2004-04-13 12:02:31.000000000 -0700
> > > @@ -769,11 +769,6 @@ int get_user_pages(struct task_struct *t
> > >  		if ((pages && vm_io) || !(flags & vma->vm_flags))
> > >  			return i ? : -EFAULT;
> > >
> > > -		if (is_vm_hugetlb_page(vma)) {
> > > -			i = follow_hugetlb_page(mm, vma, pages, vmas,
> > > -						&start, &len, i);
> > > -			continue;
> > > -		}
> > >  		spin_lock(&mm->page_table_lock);
> > >  		do {
> > >  			struct page *map = NULL;
> >
> > Ok, I notice that you've removed the follow_hugtlb_page() function
> > (and from the arch specific stuff, as well).  As far as I can tell,
> > this isn't actually related to demand-paging, in fact as far as I can
> > tell this function is unnecessary
> 
> That was the reason I removed the function because it is no longer used
> with demand paging.

Yes, but what I'm saying is as far as I can tell it shouldn't be
necessary even *without* demand paging.  Again I'm trying to separate
cleanups from new features in your patches.

> > should already work for huge pages.  In particular the path in
> > get_user_pages() which can call handle_mm_fault() (which won't work on
> > hugepages without your patch) should never get triggered, since
> > hugepages are all prefaulted.
> 
> > Does that sound right?  In other words, do you think the patch below,
> > which just kills off follow_hugetlb_page() is safe, or have I missed
> > something?
> >
> > Index: working-2.6/mm/memory.c
> > ===================================================================
> > +++ working-2.6/mm/memory.c	2004-04-15 17:03:01.421905400 +1000
> > @@ -766,16 +766,13 @@
> > [snip]
> >  		spin_lock(&mm->page_table_lock);
> >  		do {
> >  			struct page *map;
> >  			int lookup_write = write;
> >  			while (!(map = follow_page(mm, start, lookup_write))) {
> > +				/* hugepages should always be prefaulted */
> > +				BUG_ON(is_vm_hugetlb_page(vma));
> >  				/*
> >  				 * Shortcut for anonymous pages. We don't want
> >  				 * to force the creation of pages tables for
> 
> This portion is incorrect, because it will trigger BUG_ON all the time
> for faulting hugetlb page.

Ah, yes, I did that because I was (and am) thinking of the case
without demand paging.  But I just realised that of course we can get
a fault in that case, if there's a mapping truncation at the wrong
time.  Removing the BUG_ON does mean that its significant that the
never-called hugetlb nopage function is non-NULL, so that
untouched_anonymous_page() returns 0 before looking at the page
tables, which is a bit more subtle than I'd like.  Nontheless, revised
patch below.

> Yes, killing follow_hugetlb_page() is safe because follow_page() takes
> care of hugetlb page.  See 2nd patch posted earlier in
> hugetlb_demanding_generic.patch

Yes, I looked at it already.  But what I'm asking about is applying
this patch *without* (or before) going to demand paging.

Index: working-2.6/mm/memory.c
===================================================================
--- working-2.6.orig/mm/memory.c	2004-04-13 11:42:42.000000000 +1000
+++ working-2.6/mm/memory.c	2004-04-16 11:46:31.935870496 +1000
@@ -766,16 +766,13 @@
 				|| !(flags & vma->vm_flags))
 			return i ? : -EFAULT;
 
-		if (is_vm_hugetlb_page(vma)) {
-			i = follow_hugetlb_page(mm, vma, pages, vmas,
-						&start, &len, i);
-			continue;
-		}
 		spin_lock(&mm->page_table_lock);
 		do {
 			struct page *map;
 			int lookup_write = write;
 			while (!(map = follow_page(mm, start, lookup_write))) {
+				/* hugepages should always be prefaulted */
+				BUG_ON(is_vm_hugetlb_page(vma));
 				/*
 				 * Shortcut for anonymous pages. We don't want
 				 * to force the creation of pages tables for
Index: working-2.6/include/linux/hugetlb.h
===================================================================
--- working-2.6.orig/include/linux/hugetlb.h	2004-04-13 11:42:41.000000000 +1000
+++ working-2.6/include/linux/hugetlb.h	2004-04-16 11:46:31.947868672 +1000
@@ -12,7 +12,6 @@
 
 int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void *, size_t *);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
-int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
 void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
 void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
 int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
@@ -64,7 +63,6 @@
 	return 0;
 }
 
-#define follow_hugetlb_page(m,v,p,vs,a,b,i)	({ BUG(); 0; })
 #define follow_huge_addr(mm, vma, addr, write)	0
 #define copy_hugetlb_page_range(src, dst, vma)	({ BUG(); 0; })
 #define hugetlb_prefault(mapping, vma)		({ BUG(); 0; })
Index: working-2.6/arch/ppc64/mm/hugetlbpage.c
===================================================================
--- working-2.6.orig/arch/ppc64/mm/hugetlbpage.c	2004-04-13 11:42:35.000000000 +1000
+++ working-2.6/arch/ppc64/mm/hugetlbpage.c	2004-04-16 11:46:31.950868216 +1000
@@ -288,52 +288,6 @@
 	return 0;
 }
 
-int
-follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
-		    struct page **pages, struct vm_area_struct **vmas,
-		    unsigned long *position, int *length, int i)
-{
-	unsigned long vpfn, vaddr = *position;
-	int remainder = *length;
-
-	WARN_ON(!is_vm_hugetlb_page(vma));
-
-	vpfn = vaddr/PAGE_SIZE;
-	while (vaddr < vma->vm_end && remainder) {
-		BUG_ON(!in_hugepage_area(mm->context, vaddr));
-
-		if (pages) {
-			hugepte_t *pte;
-			struct page *page;
-
-			pte = hugepte_offset(mm, vaddr);
-
-			/* hugetlb should be locked, and hence, prefaulted */
-			WARN_ON(!pte || hugepte_none(*pte));
-
-			page = &hugepte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
-
-			WARN_ON(!PageCompound(page));
-
-			get_page(page);
-			pages[i] = page;
-		}
-
-		if (vmas)
-			vmas[i] = vma;
-
-		vaddr += PAGE_SIZE;
-		++vpfn;
-		--remainder;
-		++i;
-	}
-
-	*length = remainder;
-	*position = vaddr;
-
-	return i;
-}
-
 struct page *
 follow_huge_addr(struct mm_struct *mm,
 	struct vm_area_struct *vma, unsigned long address, int write)
Index: working-2.6/arch/i386/mm/hugetlbpage.c
===================================================================
--- working-2.6.orig/arch/i386/mm/hugetlbpage.c	2004-04-13 11:42:35.000000000 +1000
+++ working-2.6/arch/i386/mm/hugetlbpage.c	2004-04-16 11:49:11.914912248 +1000
@@ -93,65 +93,27 @@
 	return -ENOMEM;
 }
 
-int
-follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
-		    struct page **pages, struct vm_area_struct **vmas,
-		    unsigned long *position, int *length, int i)
-{
-	unsigned long vpfn, vaddr = *position;
-	int remainder = *length;
-
-	WARN_ON(!is_vm_hugetlb_page(vma));
-
-	vpfn = vaddr/PAGE_SIZE;
-	while (vaddr < vma->vm_end && remainder) {
-
-		if (pages) {
-			pte_t *pte;
-			struct page *page;
-
-			pte = huge_pte_offset(mm, vaddr);
-
-			/* hugetlb should be locked, and hence, prefaulted */
-			WARN_ON(!pte || pte_none(*pte));
-
-			page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
-
-			WARN_ON(!PageCompound(page));
-
-			get_page(page);
-			pages[i] = page;
-		}
-
-		if (vmas)
-			vmas[i] = vma;
-
-		vaddr += PAGE_SIZE;
-		++vpfn;
-		--remainder;
-		++i;
-	}
-
-	*length = remainder;
-	*position = vaddr;
-
-	return i;
-}
-
 #if 0	/* This is just for testing */
 struct page *
 follow_huge_addr(struct mm_struct *mm,
 	struct vm_area_struct *vma, unsigned long address, int write)
 {
-	unsigned long start = address;
-	int length = 1;
-	int nr;
+	int vpfn = vaddr / PAGE_SIZE;
 	struct page *page;
+	pte_t *pte;
 
-	nr = follow_hugetlb_page(mm, vma, &page, NULL, &start, &length, 0);
-	if (nr == 1)
-		return page;
-	return NULL;
+	pte = huge_pte_offset(mm, address);
+
+	/* PTE could be absent if there's been a mapping truncation */
+	if (! pte || pte_none(*pte))
+		return NULL;
+
+	page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
+
+	WARN_ON(!PageCompound(page));
+
+	get_page(page);
+	return page;
 }
 
 /*
Index: working-2.6/arch/sparc64/mm/hugetlbpage.c
===================================================================
--- working-2.6.orig/arch/sparc64/mm/hugetlbpage.c	2004-04-13 11:42:35.000000000 +1000
+++ working-2.6/arch/sparc64/mm/hugetlbpage.c	2004-04-16 11:46:31.961866544 +1000
@@ -121,47 +121,6 @@
 	return -ENOMEM;
 }
 
-int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
-			struct page **pages, struct vm_area_struct **vmas,
-			unsigned long *position, int *length, int i)
-{
-	unsigned long vaddr = *position;
-	int remainder = *length;
-
-	WARN_ON(!is_vm_hugetlb_page(vma));
-
-	while (vaddr < vma->vm_end && remainder) {
-		if (pages) {
-			pte_t *pte;
-			struct page *page;
-
-			pte = huge_pte_offset(mm, vaddr);
-
-			/* hugetlb should be locked, and hence, prefaulted */
-			BUG_ON(!pte || pte_none(*pte));
-
-			page = pte_page(*pte);
-
-			WARN_ON(!PageCompound(page));
-
-			get_page(page);
-			pages[i] = page;
-		}
-
-		if (vmas)
-			vmas[i] = vma;
-
-		vaddr += PAGE_SIZE;
-		--remainder;
-		++i;
-	}
-
-	*length = remainder;
-	*position = vaddr;
-
-	return i;
-}
-
 struct page *follow_huge_addr(struct mm_struct *mm,
 			      struct vm_area_struct *vma,
 			      unsigned long address, int write)
Index: working-2.6/arch/ia64/mm/hugetlbpage.c
===================================================================
--- working-2.6.orig/arch/ia64/mm/hugetlbpage.c	2004-04-14 12:22:48.000000000 +1000
+++ working-2.6/arch/ia64/mm/hugetlbpage.c	2004-04-16 11:46:31.963866240 +1000
@@ -113,43 +113,6 @@
 	return -ENOMEM;
 }
 
-int
-follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
-		    struct page **pages, struct vm_area_struct **vmas,
-		    unsigned long *st, int *length, int i)
-{
-	pte_t *ptep, pte;
-	unsigned long start = *st;
-	unsigned long pstart;
-	int len = *length;
-	struct page *page;
-
-	do {
-		pstart = start & HPAGE_MASK;
-		ptep = huge_pte_offset(mm, start);
-		pte = *ptep;
-
-back1:
-		page = pte_page(pte);
-		if (pages) {
-			page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
-			get_page(page);
-			pages[i] = page;
-		}
-		if (vmas)
-			vmas[i] = vma;
-		i++;
-		len--;
-		start += PAGE_SIZE;
-		if (((start & HPAGE_MASK) == pstart) && len &&
-				(start < vma->vm_end))
-			goto back1;
-	} while (len && start < vma->vm_end);
-	*length = len;
-	*st = start;
-	return i;
-}
-
 struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr)
 {
 	if (mm->used_hugetlb) {
Index: working-2.6/arch/sh/mm/hugetlbpage.c
===================================================================
--- working-2.6.orig/arch/sh/mm/hugetlbpage.c	2004-04-13 11:42:35.000000000 +1000
+++ working-2.6/arch/sh/mm/hugetlbpage.c	2004-04-16 11:46:31.971865024 +1000
@@ -124,47 +124,6 @@
 	return -ENOMEM;
 }
 
-int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
-			struct page **pages, struct vm_area_struct **vmas,
-			unsigned long *position, int *length, int i)
-{
-	unsigned long vaddr = *position;
-	int remainder = *length;
-
-	WARN_ON(!is_vm_hugetlb_page(vma));
-
-	while (vaddr < vma->vm_end && remainder) {
-		if (pages) {
-			pte_t *pte;
-			struct page *page;
-
-			pte = huge_pte_offset(mm, vaddr);
-
-			/* hugetlb should be locked, and hence, prefaulted */
-			BUG_ON(!pte || pte_none(*pte));
-
-			page = pte_page(*pte);
-
-			WARN_ON(!PageCompound(page));
-
-			get_page(page);
-			pages[i] = page;
-		}
-
-		if (vmas)
-			vmas[i] = vma;
-
-		vaddr += PAGE_SIZE;
-		--remainder;
-		++i;
-	}
-
-	*length = remainder;
-	*position = vaddr;
-
-	return i;
-}
-
 struct page *follow_huge_addr(struct mm_struct *mm,
 			      struct vm_area_struct *vma,
 			      unsigned long address, int write)

-- 
David Gibson			| For every complex problem there is a
david AT gibson.dropbear.id.au	| solution which is simple, neat and
				| wrong.
http://www.ozlabs.org/people/dgibson
-
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Received on Thu Apr 15 22:42:51 2004

This archive was generated by hypermail 2.1.8 : 2005-08-02 09:20:25 EST